next_api/
analyze.rs

1use std::{borrow::Cow, io::Write};
2
3use anyhow::Result;
4use byteorder::{BE, WriteBytesExt};
5use rustc_hash::FxHashMap;
6use serde::Serialize;
7use turbo_rcstr::RcStr;
8use turbo_tasks::{FxIndexSet, ResolvedVc, TryFlatJoinIterExt, TryJoinIterExt, ValueToString, Vc};
9use turbo_tasks_fs::{
10    File, FileContent, FileSystemPath,
11    rope::{Rope, RopeBuilder},
12};
13use turbopack_analyze::split_chunk::split_output_asset_into_parts;
14use turbopack_core::{
15    SOURCE_URL_PROTOCOL,
16    asset::{Asset, AssetContent},
17    chunk::ChunkingType,
18    module::Module,
19    output::{OutputAsset, OutputAssets, OutputAssetsReference},
20    reference::all_assets_from_entries,
21};
22
23use crate::route::ModuleGraphs;
24
25pub struct EdgesData {
26    pub offsets: Vec<u32>,
27    pub data: Vec<u32>,
28}
29
30impl EdgesData {
31    fn from_iterator<'a>(iterable: impl IntoIterator<Item = &'a Vec<u32>> + Clone) -> Self {
32        let mut current_offset = 0;
33        let sum: usize = iterable.clone().into_iter().map(|v| v.len()).sum();
34        let mut data = Vec::with_capacity(sum);
35        let offsets = iterable
36            .into_iter()
37            .map(|edges| {
38                current_offset += edges.len() as u32;
39                data.extend(edges);
40                current_offset
41            })
42            .collect();
43        Self { offsets, data }
44    }
45
46    fn write(&self, writer: &mut impl Write) -> Result<()> {
47        writer.write_u32::<BE>(self.offsets.len() as u32)?;
48        for &offset in &self.offsets {
49            writer.write_u32::<BE>(offset)?;
50        }
51        for &data in &self.data {
52            writer.write_u32::<BE>(data)?;
53        }
54        Ok(())
55    }
56}
57
58#[derive(Serialize)]
59pub struct AnalyzeSource {
60    pub parent_source_index: Option<u32>,
61    /// Path. When there is a parent, this is concatenated to the parent's path.
62    /// Folders end with a slash. Might have multiple path segments when folders contain only a
63    /// single child.
64    pub path: RcStr,
65}
66
67#[derive(Serialize)]
68pub struct AnalyzeModule {
69    pub ident: RcStr,
70    pub path: RcStr,
71}
72
73#[derive(Serialize)]
74pub struct AnalyzeChunkPart {
75    pub source_index: u32,
76    pub output_file_index: u32,
77    pub size: u32,
78}
79
80#[derive(Serialize)]
81pub struct AnalyzeOutputFile {
82    pub filename: RcStr,
83}
84
85#[derive(Serialize)]
86struct EdgesDataReference {
87    pub offset: u32,
88    pub length: u32,
89}
90
91#[derive(Serialize)]
92struct AnalyzeDataHeader {
93    pub sources: Vec<AnalyzeSource>,
94    pub chunk_parts: Vec<AnalyzeChunkPart>,
95    pub output_files: Vec<AnalyzeOutputFile>,
96    /// Edges from chunks to chunk parts
97    pub output_file_chunk_parts: EdgesDataReference,
98    /// Edges from sources to chunk parts
99    pub source_chunk_parts: EdgesDataReference,
100    /// Edges from sources to their children sources
101    pub source_children: EdgesDataReference,
102    /// Root level sources, walking their children will reach all sources
103    pub source_roots: Vec<u32>,
104}
105
106#[derive(Serialize)]
107struct ModulesDataHeader {
108    pub modules: Vec<AnalyzeModule>,
109    /// Edges from modules to modules
110    pub module_dependents: EdgesDataReference,
111    /// Edges from modules to modules
112    pub async_module_dependents: EdgesDataReference,
113    /// Edges from modules to modules
114    pub module_dependencies: EdgesDataReference,
115    /// Edges from modules to modules
116    pub async_module_dependencies: EdgesDataReference,
117}
118
119struct AnalyzeOutputFileBuilder {
120    output_file: AnalyzeOutputFile,
121    chunk_part_indices: Vec<u32>,
122}
123
124struct AnalyzeSourceBuilder {
125    source: AnalyzeSource,
126    child_source_indices: Vec<u32>,
127    chunk_part_indices: Vec<u32>,
128}
129
130struct AnalyzeModuleBuilder {
131    module: AnalyzeModule,
132    dependencies: FxIndexSet<u32>,
133    async_dependencies: FxIndexSet<u32>,
134    dependents: FxIndexSet<u32>,
135    async_dependents: FxIndexSet<u32>,
136}
137
138struct AnalyzeDataBuilder {
139    sources: Vec<AnalyzeSourceBuilder>,
140    source_index_map: FxHashMap<RcStr, u32>,
141    chunk_parts: Vec<AnalyzeChunkPart>,
142    output_files: Vec<AnalyzeOutputFileBuilder>,
143}
144
145struct ModulesDataBuilder {
146    modules: Vec<AnalyzeModuleBuilder>,
147    module_index_map: FxHashMap<RcStr, u32>,
148}
149
150struct EdgesDataSectionBuilder {
151    data: Vec<u8>,
152}
153
154impl EdgesDataSectionBuilder {
155    fn new() -> Self {
156        Self { data: vec![] }
157    }
158
159    fn add_edges(&mut self, edges: &EdgesData) -> EdgesDataReference {
160        let offset = self.data.len().try_into().unwrap();
161        edges.write(&mut self.data).unwrap();
162        let length = (self.data.len() - offset as usize).try_into().unwrap();
163        EdgesDataReference { offset, length }
164    }
165}
166
167impl AnalyzeDataBuilder {
168    fn new() -> Self {
169        Self {
170            sources: vec![],
171            source_index_map: FxHashMap::default(),
172            chunk_parts: vec![],
173            output_files: vec![],
174        }
175    }
176
177    fn ensure_source(&mut self, path: &str) -> (&mut AnalyzeSourceBuilder, u32) {
178        if let Some(&index) = self.source_index_map.get(path) {
179            return (&mut self.sources[index as usize], index);
180        }
181        let index = self.sources.len() as u32;
182        let path = RcStr::from(path);
183        self.source_index_map.insert(path.clone(), index);
184        self.sources.push(AnalyzeSourceBuilder {
185            source: AnalyzeSource {
186                parent_source_index: None,
187                path,
188            },
189            child_source_indices: vec![],
190            chunk_part_indices: vec![],
191        });
192        (&mut self.sources[index as usize], index)
193    }
194
195    fn add_chunk_part(&mut self, chunk_part: AnalyzeChunkPart) -> u32 {
196        let i = self.chunk_parts.len() as u32;
197        self.chunk_parts.push(chunk_part);
198        i
199    }
200
201    fn add_output_file(&mut self, output_file: AnalyzeOutputFile) -> u32 {
202        let i = self.output_files.len() as u32;
203        self.output_files.push(AnalyzeOutputFileBuilder {
204            output_file,
205            chunk_part_indices: vec![],
206        });
207        i
208    }
209
210    fn add_chunk_part_to_output_file(&mut self, output_file_index: u32, chunk_part_index: u32) {
211        self.output_files[output_file_index as usize]
212            .chunk_part_indices
213            .push(chunk_part_index);
214    }
215
216    fn add_chunk_part_to_source(&mut self, source_index: u32, chunk_part_index: u32) {
217        self.sources[source_index as usize]
218            .chunk_part_indices
219            .push(chunk_part_index);
220    }
221
222    fn build(self) -> Rope {
223        let source_roots = self
224            .sources
225            .iter()
226            .enumerate()
227            .filter_map(|(i, s)| {
228                if s.source.parent_source_index.is_none() {
229                    Some(i as u32)
230                } else {
231                    None
232                }
233            })
234            .collect();
235
236        let source_children =
237            EdgesData::from_iterator(self.sources.iter().map(|s| &s.child_source_indices));
238
239        let source_chunk_parts =
240            EdgesData::from_iterator(self.sources.iter().map(|s| &s.chunk_part_indices));
241
242        let output_file_chunk_parts =
243            EdgesData::from_iterator(self.output_files.iter().map(|of| &of.chunk_part_indices));
244
245        let mut binary_section = EdgesDataSectionBuilder::new();
246
247        let header = AnalyzeDataHeader {
248            sources: self.sources.into_iter().map(|s| s.source).collect(),
249            chunk_parts: self.chunk_parts,
250            output_files: self
251                .output_files
252                .into_iter()
253                .map(|of| of.output_file)
254                .collect(),
255            output_file_chunk_parts: binary_section.add_edges(&output_file_chunk_parts),
256            source_chunk_parts: binary_section.add_edges(&source_chunk_parts),
257            source_children: binary_section.add_edges(&source_children),
258            source_roots,
259        };
260
261        let header_json = serde_json::to_vec(&header).unwrap();
262
263        let mut rope = RopeBuilder::default();
264        rope.push_bytes(&(header_json.len() as u32).to_be_bytes());
265        rope.reserve_bytes(header_json.len() + binary_section.data.len());
266        rope.push_bytes(&header_json);
267        rope.push_bytes(&binary_section.data);
268        rope.build()
269    }
270}
271
272impl ModulesDataBuilder {
273    fn new() -> Self {
274        Self {
275            modules: vec![],
276            module_index_map: FxHashMap::default(),
277        }
278    }
279
280    fn get_module(&mut self, ident: &str) -> (&mut AnalyzeModuleBuilder, u32) {
281        if let Some(&index) = self.module_index_map.get(ident) {
282            return (&mut self.modules[index as usize], index);
283        }
284        panic!("Module with ident `{}` not found", ident);
285    }
286
287    fn ensure_module(&mut self, ident: &str, path: &str) -> (&mut AnalyzeModuleBuilder, u32) {
288        if let Some(&index) = self.module_index_map.get(ident) {
289            return (&mut self.modules[index as usize], index);
290        }
291        let index = self.modules.len() as u32;
292        let ident = RcStr::from(ident);
293        let path = RcStr::from(path);
294        self.module_index_map.insert(ident.clone(), index);
295        self.modules.push(AnalyzeModuleBuilder {
296            module: AnalyzeModule { ident, path },
297            dependencies: FxIndexSet::default(),
298            async_dependencies: FxIndexSet::default(),
299            dependents: FxIndexSet::default(),
300            async_dependents: FxIndexSet::default(),
301        });
302        (&mut self.modules[index as usize], index)
303    }
304
305    fn build(self) -> Rope {
306        let module_dependencies_vecs: Vec<Vec<u32>> = self
307            .modules
308            .iter()
309            .map(|s| s.dependencies.iter().copied().collect())
310            .collect();
311        let async_module_dependencies_vecs: Vec<Vec<u32>> = self
312            .modules
313            .iter()
314            .map(|s| s.async_dependencies.iter().copied().collect())
315            .collect();
316        let module_dependents_vecs: Vec<Vec<u32>> = self
317            .modules
318            .iter()
319            .map(|s| s.dependents.iter().copied().collect())
320            .collect();
321        let async_module_dependents_vecs: Vec<Vec<u32>> = self
322            .modules
323            .iter()
324            .map(|s| s.async_dependents.iter().copied().collect())
325            .collect();
326
327        let module_dependencies = EdgesData::from_iterator(&module_dependencies_vecs);
328        let async_module_dependencies = EdgesData::from_iterator(&async_module_dependencies_vecs);
329        let module_dependents = EdgesData::from_iterator(&module_dependents_vecs);
330        let async_module_dependents = EdgesData::from_iterator(&async_module_dependents_vecs);
331
332        let mut binary_section = EdgesDataSectionBuilder::new();
333
334        let header = ModulesDataHeader {
335            modules: self.modules.into_iter().map(|s| s.module).collect(),
336            module_dependents: binary_section.add_edges(&module_dependents),
337            async_module_dependents: binary_section.add_edges(&async_module_dependents),
338            module_dependencies: binary_section.add_edges(&module_dependencies),
339            async_module_dependencies: binary_section.add_edges(&async_module_dependencies),
340        };
341
342        let header_json = serde_json::to_vec(&header).unwrap();
343
344        let mut rope = RopeBuilder::default();
345        rope.push_bytes(&(header_json.len() as u32).to_be_bytes());
346        rope.reserve_bytes(header_json.len() + binary_section.data.len());
347        rope.push_bytes(&header_json);
348        rope.push_bytes(&binary_section.data);
349        rope.build()
350    }
351}
352
353#[turbo_tasks::function]
354pub async fn analyze_output_assets(output_assets: Vc<OutputAssets>) -> Result<Vc<FileContent>> {
355    let output_assets = all_assets_from_entries(output_assets);
356
357    let mut builder = AnalyzeDataBuilder::new();
358
359    let prefix = format!("{SOURCE_URL_PROTOCOL}///");
360
361    // Process the output assets and extract chunk parts.
362    // Also creates sources for the chunk parts.
363    for &asset in output_assets.await? {
364        let filename = asset.path().to_string().owned().await?;
365        if filename.ends_with(".map") || filename.ends_with(".nft.json") {
366            // Skip source maps.
367            continue;
368        }
369
370        let output_file_index = builder.add_output_file(AnalyzeOutputFile { filename });
371        let chunk_parts = split_output_asset_into_parts(*asset).await?;
372        for chunk_part in chunk_parts {
373            let decoded_source = urlencoding::decode(&chunk_part.source)?;
374            let source = if let Some(stripped) = decoded_source.strip_prefix(&prefix) {
375                Cow::Borrowed(stripped)
376            } else {
377                Cow::Owned(format!(
378                    "[project]/{}",
379                    decoded_source.trim_start_matches("../")
380                ))
381            };
382            let source_index = builder.ensure_source(&source).1;
383            let chunk_part_index = builder.add_chunk_part(AnalyzeChunkPart {
384                source_index,
385                output_file_index,
386                size: chunk_part.real_size + chunk_part.unaccounted_size,
387            });
388            builder.add_chunk_part_to_output_file(output_file_index, chunk_part_index);
389            builder.add_chunk_part_to_source(source_index, chunk_part_index);
390        }
391    }
392
393    // Build a directory structure for the sources.
394    let mut i: u32 = 0;
395    while i < builder.sources.len().try_into().unwrap() {
396        let source = &builder.sources[i as usize];
397        let path = source.source.path.as_str();
398        if !path.is_empty() {
399            let (parent_path, path) = if let Some(pos) = path.trim_end_matches('/').rfind('/') {
400                (&path[..pos + 1], &path[pos + 1..])
401            } else {
402                ("", path)
403            };
404            let parent_path = parent_path.to_string();
405            let path = path.into();
406            let (parent_source, parent_index) = builder.ensure_source(&parent_path);
407            parent_source.child_source_indices.push(i);
408            builder.sources[i as usize].source.parent_source_index = Some(parent_index);
409            builder.sources[i as usize].source.path = path;
410        }
411        i += 1;
412    }
413
414    let rope = builder.build();
415    Ok(FileContent::Content(File::from(rope)).cell())
416}
417
418#[turbo_tasks::function]
419pub async fn analyze_module_graphs(module_graphs: Vc<ModuleGraphs>) -> Result<Vc<FileContent>> {
420    let mut builder = ModulesDataBuilder::new();
421
422    let mut all_modules = FxIndexSet::default();
423    let mut all_edges = FxIndexSet::default();
424    let mut all_async_edges = FxIndexSet::default();
425    for &module_graph in module_graphs.await? {
426        let module_graph = module_graph.read_graphs().await?;
427        module_graph.traverse_all_edges_unordered(|parent, node| {
428            if let Some((parent_node, reference)) = parent {
429                all_modules.insert(parent_node);
430                all_modules.insert(node);
431                match reference.chunking_type {
432                    ChunkingType::Async => {
433                        all_async_edges.insert((parent_node, node));
434                    }
435                    _ => {
436                        all_edges.insert((parent_node, node));
437                    }
438                }
439            }
440            Ok(())
441        })?;
442    }
443
444    type ModulePair = (ResolvedVc<Box<dyn Module>>, ResolvedVc<Box<dyn Module>>);
445    async fn mapper((from, to): ModulePair) -> Result<Option<(RcStr, RcStr)>> {
446        if from == to {
447            return Ok(None);
448        }
449        let from_ident = from.ident().to_string().owned().await?;
450        let to_ident = to.ident().to_string().owned().await?;
451        Ok(Some((from_ident, to_ident)))
452    }
453
454    let all_modules = all_modules
455        .iter()
456        .copied()
457        .map(async |module| {
458            let ident = module.ident().to_string().owned().await?;
459            let path = module.ident().path().to_string().owned().await?;
460            Ok((ident, path))
461        })
462        .try_join()
463        .await?;
464
465    for (ident, path) in all_modules {
466        builder.ensure_module(&ident, &path);
467    }
468
469    let all_edges = all_edges
470        .iter()
471        .copied()
472        .map(mapper)
473        .try_flat_join()
474        .await?;
475    let all_async_edges = all_async_edges
476        .iter()
477        .copied()
478        .map(mapper)
479        .try_flat_join()
480        .await?;
481    for (from_ident, to_ident) in all_edges {
482        let from_index = builder.get_module(&from_ident).1;
483        let to_index = builder.get_module(&to_ident).1;
484        if from_index == to_index {
485            continue;
486        }
487        builder.modules[from_index as usize]
488            .dependencies
489            .insert(to_index);
490        builder.modules[to_index as usize]
491            .dependents
492            .insert(from_index);
493    }
494    for (from_ident, to_ident) in all_async_edges {
495        let from_index = builder.get_module(&from_ident).1;
496        let to_index = builder.get_module(&to_ident).1;
497        if from_index == to_index {
498            continue;
499        }
500        builder.modules[from_index as usize]
501            .async_dependencies
502            .insert(to_index);
503        builder.modules[to_index as usize]
504            .async_dependents
505            .insert(from_index);
506    }
507
508    let rope = builder.build();
509    Ok(FileContent::Content(File::from(rope)).cell())
510}
511
512#[turbo_tasks::value]
513pub struct AnalyzeDataOutputAsset {
514    pub path: FileSystemPath,
515    pub output_assets: ResolvedVc<OutputAssets>,
516}
517
518#[turbo_tasks::value_impl]
519impl AnalyzeDataOutputAsset {
520    #[turbo_tasks::function]
521    pub async fn new(
522        path: FileSystemPath,
523        output_assets: ResolvedVc<OutputAssets>,
524    ) -> Result<Vc<Self>> {
525        Ok(Self {
526            path,
527            output_assets,
528        }
529        .cell())
530    }
531}
532
533#[turbo_tasks::value_impl]
534impl Asset for AnalyzeDataOutputAsset {
535    #[turbo_tasks::function]
536    fn content(&self) -> Vc<AssetContent> {
537        let file_content = analyze_output_assets(*self.output_assets);
538        AssetContent::file(file_content)
539    }
540}
541
542#[turbo_tasks::value_impl]
543impl OutputAssetsReference for AnalyzeDataOutputAsset {}
544
545#[turbo_tasks::value_impl]
546impl OutputAsset for AnalyzeDataOutputAsset {
547    #[turbo_tasks::function]
548    fn path(&self) -> Vc<FileSystemPath> {
549        self.path.clone().cell()
550    }
551}
552
553#[turbo_tasks::value]
554pub struct ModulesDataOutputAsset {
555    pub path: FileSystemPath,
556    pub module_graphs: ResolvedVc<ModuleGraphs>,
557}
558
559#[turbo_tasks::value_impl]
560impl ModulesDataOutputAsset {
561    #[turbo_tasks::function]
562    pub async fn new(path: FileSystemPath, module_graphs: Vc<ModuleGraphs>) -> Result<Vc<Self>> {
563        Ok(Self {
564            path,
565            module_graphs: module_graphs.to_resolved().await?,
566        }
567        .cell())
568    }
569}
570
571#[turbo_tasks::value_impl]
572impl Asset for ModulesDataOutputAsset {
573    #[turbo_tasks::function]
574    fn content(&self) -> Vc<AssetContent> {
575        let file_content = analyze_module_graphs(*self.module_graphs);
576        AssetContent::file(file_content)
577    }
578}
579
580#[turbo_tasks::value_impl]
581impl OutputAssetsReference for ModulesDataOutputAsset {}
582
583#[turbo_tasks::value_impl]
584impl OutputAsset for ModulesDataOutputAsset {
585    #[turbo_tasks::function]
586    fn path(&self) -> Vc<FileSystemPath> {
587        self.path.clone().cell()
588    }
589}