Skip to main content

next_api/
analyze.rs

1use std::{borrow::Cow, io::Write};
2
3use anyhow::Result;
4use byteorder::{BE, WriteBytesExt};
5use rustc_hash::FxHashMap;
6use serde::Serialize;
7use turbo_rcstr::RcStr;
8use turbo_tasks::{
9    FxIndexSet, ResolvedVc, TryFlatJoinIterExt, TryJoinIterExt, ValueToString, ValueToStringRef, Vc,
10};
11use turbo_tasks_fs::{
12    File, FileContent, FileSystemPath,
13    rope::{Rope, RopeBuilder},
14};
15use turbopack_analyze::split_chunk::split_output_asset_into_parts;
16use turbopack_core::{
17    SOURCE_URL_PROTOCOL,
18    asset::{Asset, AssetContent},
19    chunk::ChunkingType,
20    module::Module,
21    output::{OutputAsset, OutputAssets, OutputAssetsReference},
22    reference::all_assets_from_entries,
23};
24
25use crate::route::ModuleGraphs;
26
27pub struct EdgesData {
28    pub offsets: Vec<u32>,
29    pub data: Vec<u32>,
30}
31
32impl EdgesData {
33    fn from_iterator<'a>(iterable: impl IntoIterator<Item = &'a Vec<u32>> + Clone) -> Self {
34        let mut current_offset = 0;
35        let sum: usize = iterable.clone().into_iter().map(|v| v.len()).sum();
36        let mut data = Vec::with_capacity(sum);
37        let offsets = iterable
38            .into_iter()
39            .map(|edges| {
40                current_offset += edges.len() as u32;
41                data.extend(edges);
42                current_offset
43            })
44            .collect();
45        Self { offsets, data }
46    }
47
48    fn write(&self, writer: &mut impl Write) -> Result<()> {
49        writer.write_u32::<BE>(self.offsets.len() as u32)?;
50        for &offset in &self.offsets {
51            writer.write_u32::<BE>(offset)?;
52        }
53        for &data in &self.data {
54            writer.write_u32::<BE>(data)?;
55        }
56        Ok(())
57    }
58}
59
60#[derive(Serialize)]
61pub struct AnalyzeSource {
62    pub parent_source_index: Option<u32>,
63    /// Path. When there is a parent, this is concatenated to the parent's path.
64    /// Folders end with a slash. Might have multiple path segments when folders contain only a
65    /// single child.
66    pub path: RcStr,
67}
68
69#[derive(Serialize)]
70pub struct AnalyzeModule {
71    pub ident: RcStr,
72    pub path: RcStr,
73}
74
75#[derive(Serialize)]
76pub struct AnalyzeChunkPart {
77    pub source_index: u32,
78    pub output_file_index: u32,
79    pub size: u32,
80    pub compressed_size: u32,
81}
82
83#[derive(Serialize)]
84pub struct AnalyzeOutputFile {
85    pub filename: RcStr,
86}
87
88#[derive(Serialize)]
89struct EdgesDataReference {
90    pub offset: u32,
91    pub length: u32,
92}
93
94#[derive(Serialize)]
95struct AnalyzeDataHeader {
96    pub sources: Vec<AnalyzeSource>,
97    pub chunk_parts: Vec<AnalyzeChunkPart>,
98    pub output_files: Vec<AnalyzeOutputFile>,
99    /// Edges from chunks to chunk parts
100    pub output_file_chunk_parts: EdgesDataReference,
101    /// Edges from sources to chunk parts
102    pub source_chunk_parts: EdgesDataReference,
103    /// Edges from sources to their children sources
104    pub source_children: EdgesDataReference,
105    /// Root level sources, walking their children will reach all sources
106    pub source_roots: Vec<u32>,
107}
108
109#[derive(Serialize)]
110struct ModulesDataHeader {
111    pub modules: Vec<AnalyzeModule>,
112    /// Edges from modules to modules
113    pub module_dependents: EdgesDataReference,
114    /// Edges from modules to modules
115    pub async_module_dependents: EdgesDataReference,
116    /// Edges from modules to modules
117    pub module_dependencies: EdgesDataReference,
118    /// Edges from modules to modules
119    pub async_module_dependencies: EdgesDataReference,
120}
121
122struct AnalyzeOutputFileBuilder {
123    output_file: AnalyzeOutputFile,
124    chunk_part_indices: Vec<u32>,
125}
126
127struct AnalyzeSourceBuilder {
128    source: AnalyzeSource,
129    child_source_indices: Vec<u32>,
130    chunk_part_indices: Vec<u32>,
131}
132
133struct AnalyzeModuleBuilder {
134    module: AnalyzeModule,
135    dependencies: FxIndexSet<u32>,
136    async_dependencies: FxIndexSet<u32>,
137    dependents: FxIndexSet<u32>,
138    async_dependents: FxIndexSet<u32>,
139}
140
141struct AnalyzeDataBuilder {
142    sources: Vec<AnalyzeSourceBuilder>,
143    source_index_map: FxHashMap<RcStr, u32>,
144    chunk_parts: Vec<AnalyzeChunkPart>,
145    output_files: Vec<AnalyzeOutputFileBuilder>,
146}
147
148struct ModulesDataBuilder {
149    modules: Vec<AnalyzeModuleBuilder>,
150    module_index_map: FxHashMap<RcStr, u32>,
151}
152
153struct EdgesDataSectionBuilder {
154    data: Vec<u8>,
155}
156
157impl EdgesDataSectionBuilder {
158    fn new() -> Self {
159        Self { data: vec![] }
160    }
161
162    fn add_edges(&mut self, edges: &EdgesData) -> EdgesDataReference {
163        let offset = self.data.len().try_into().unwrap();
164        edges.write(&mut self.data).unwrap();
165        let length = (self.data.len() - offset as usize).try_into().unwrap();
166        EdgesDataReference { offset, length }
167    }
168}
169
170impl AnalyzeDataBuilder {
171    fn new() -> Self {
172        Self {
173            sources: vec![],
174            source_index_map: FxHashMap::default(),
175            chunk_parts: vec![],
176            output_files: vec![],
177        }
178    }
179
180    fn ensure_source(&mut self, path: &str) -> (&mut AnalyzeSourceBuilder, u32) {
181        if let Some(&index) = self.source_index_map.get(path) {
182            return (&mut self.sources[index as usize], index);
183        }
184        let index = self.sources.len() as u32;
185        let path = RcStr::from(path);
186        self.source_index_map.insert(path.clone(), index);
187        self.sources.push(AnalyzeSourceBuilder {
188            source: AnalyzeSource {
189                parent_source_index: None,
190                path,
191            },
192            child_source_indices: vec![],
193            chunk_part_indices: vec![],
194        });
195        (&mut self.sources[index as usize], index)
196    }
197
198    fn add_chunk_part(&mut self, chunk_part: AnalyzeChunkPart) -> u32 {
199        let i = self.chunk_parts.len() as u32;
200        self.chunk_parts.push(chunk_part);
201        i
202    }
203
204    fn add_output_file(&mut self, output_file: AnalyzeOutputFile) -> u32 {
205        let i = self.output_files.len() as u32;
206        self.output_files.push(AnalyzeOutputFileBuilder {
207            output_file,
208            chunk_part_indices: vec![],
209        });
210        i
211    }
212
213    fn add_chunk_part_to_output_file(&mut self, output_file_index: u32, chunk_part_index: u32) {
214        self.output_files[output_file_index as usize]
215            .chunk_part_indices
216            .push(chunk_part_index);
217    }
218
219    fn add_chunk_part_to_source(&mut self, source_index: u32, chunk_part_index: u32) {
220        self.sources[source_index as usize]
221            .chunk_part_indices
222            .push(chunk_part_index);
223    }
224
225    fn build(self) -> Rope {
226        let source_roots = self
227            .sources
228            .iter()
229            .enumerate()
230            .filter_map(|(i, s)| {
231                if s.source.parent_source_index.is_none() {
232                    Some(i as u32)
233                } else {
234                    None
235                }
236            })
237            .collect();
238
239        let source_children =
240            EdgesData::from_iterator(self.sources.iter().map(|s| &s.child_source_indices));
241
242        let source_chunk_parts =
243            EdgesData::from_iterator(self.sources.iter().map(|s| &s.chunk_part_indices));
244
245        let output_file_chunk_parts =
246            EdgesData::from_iterator(self.output_files.iter().map(|of| &of.chunk_part_indices));
247
248        let mut binary_section = EdgesDataSectionBuilder::new();
249
250        let header = AnalyzeDataHeader {
251            sources: self.sources.into_iter().map(|s| s.source).collect(),
252            chunk_parts: self.chunk_parts,
253            output_files: self
254                .output_files
255                .into_iter()
256                .map(|of| of.output_file)
257                .collect(),
258            output_file_chunk_parts: binary_section.add_edges(&output_file_chunk_parts),
259            source_chunk_parts: binary_section.add_edges(&source_chunk_parts),
260            source_children: binary_section.add_edges(&source_children),
261            source_roots,
262        };
263
264        let header_json = serde_json::to_vec(&header).unwrap();
265
266        let mut rope = RopeBuilder::default();
267        rope.push_bytes(&(header_json.len() as u32).to_be_bytes());
268        rope.reserve_bytes(header_json.len() + binary_section.data.len());
269        rope.push_bytes(&header_json);
270        rope.push_bytes(&binary_section.data);
271        rope.build()
272    }
273}
274
275impl ModulesDataBuilder {
276    fn new() -> Self {
277        Self {
278            modules: vec![],
279            module_index_map: FxHashMap::default(),
280        }
281    }
282
283    fn get_module(&mut self, ident: &str) -> (&mut AnalyzeModuleBuilder, u32) {
284        if let Some(&index) = self.module_index_map.get(ident) {
285            return (&mut self.modules[index as usize], index);
286        }
287        panic!("Module with ident `{}` not found", ident);
288    }
289
290    fn ensure_module(&mut self, ident: &str, path: &str) -> (&mut AnalyzeModuleBuilder, u32) {
291        if let Some(&index) = self.module_index_map.get(ident) {
292            return (&mut self.modules[index as usize], index);
293        }
294        let index = self.modules.len() as u32;
295        let ident = RcStr::from(ident);
296        let path = RcStr::from(path);
297        self.module_index_map.insert(ident.clone(), index);
298        self.modules.push(AnalyzeModuleBuilder {
299            module: AnalyzeModule { ident, path },
300            dependencies: FxIndexSet::default(),
301            async_dependencies: FxIndexSet::default(),
302            dependents: FxIndexSet::default(),
303            async_dependents: FxIndexSet::default(),
304        });
305        (&mut self.modules[index as usize], index)
306    }
307
308    fn build(self) -> Rope {
309        let module_dependencies_vecs: Vec<Vec<u32>> = self
310            .modules
311            .iter()
312            .map(|s| s.dependencies.iter().copied().collect())
313            .collect();
314        let async_module_dependencies_vecs: Vec<Vec<u32>> = self
315            .modules
316            .iter()
317            .map(|s| s.async_dependencies.iter().copied().collect())
318            .collect();
319        let module_dependents_vecs: Vec<Vec<u32>> = self
320            .modules
321            .iter()
322            .map(|s| s.dependents.iter().copied().collect())
323            .collect();
324        let async_module_dependents_vecs: Vec<Vec<u32>> = self
325            .modules
326            .iter()
327            .map(|s| s.async_dependents.iter().copied().collect())
328            .collect();
329
330        let module_dependencies = EdgesData::from_iterator(&module_dependencies_vecs);
331        let async_module_dependencies = EdgesData::from_iterator(&async_module_dependencies_vecs);
332        let module_dependents = EdgesData::from_iterator(&module_dependents_vecs);
333        let async_module_dependents = EdgesData::from_iterator(&async_module_dependents_vecs);
334
335        let mut binary_section = EdgesDataSectionBuilder::new();
336
337        let header = ModulesDataHeader {
338            modules: self.modules.into_iter().map(|s| s.module).collect(),
339            module_dependents: binary_section.add_edges(&module_dependents),
340            async_module_dependents: binary_section.add_edges(&async_module_dependents),
341            module_dependencies: binary_section.add_edges(&module_dependencies),
342            async_module_dependencies: binary_section.add_edges(&async_module_dependencies),
343        };
344
345        let header_json = serde_json::to_vec(&header).unwrap();
346
347        let mut rope = RopeBuilder::default();
348        rope.push_bytes(&(header_json.len() as u32).to_be_bytes());
349        rope.reserve_bytes(header_json.len() + binary_section.data.len());
350        rope.push_bytes(&header_json);
351        rope.push_bytes(&binary_section.data);
352        rope.build()
353    }
354}
355
356/// Merges two sets of output assets into one. Used to combine per-route output
357/// assets with shared assets (e.g. `_app`, `_document`) at report generation time.
358#[turbo_tasks::function]
359pub async fn combine_output_assets(
360    primary: Vc<OutputAssets>,
361    extra: Vc<OutputAssets>,
362) -> Result<Vc<OutputAssets>> {
363    let mut combined: Vec<ResolvedVc<Box<dyn OutputAsset>>> =
364        primary.await?.iter().copied().collect();
365    combined.extend(extra.await?.iter().copied());
366    Ok(Vc::cell(combined))
367}
368
369#[turbo_tasks::function]
370pub async fn analyze_output_assets(output_assets: Vc<OutputAssets>) -> Result<Vc<FileContent>> {
371    let output_assets = all_assets_from_entries(output_assets);
372
373    let mut builder = AnalyzeDataBuilder::new();
374
375    let prefix = format!("{SOURCE_URL_PROTOCOL}///");
376
377    // Process the output assets and extract chunk parts.
378    // Also creates sources for the chunk parts.
379    for asset in output_assets.await? {
380        let filename = asset.path().to_string().owned().await?;
381        if filename.ends_with(".map") || filename.ends_with(".nft.json") {
382            // Skip source maps.
383            continue;
384        }
385
386        let output_file_index = builder.add_output_file(AnalyzeOutputFile { filename });
387        let chunk_parts = split_output_asset_into_parts(*asset).await?;
388        for chunk_part in &chunk_parts {
389            let decoded_source = urlencoding::decode(&chunk_part.source)?;
390            let source = if let Some(stripped) = decoded_source.strip_prefix(&prefix) {
391                Cow::Borrowed(stripped)
392            } else {
393                Cow::Owned(format!(
394                    "[project]/{}",
395                    decoded_source.trim_start_matches("../")
396                ))
397            };
398            let source_index = builder.ensure_source(&source).1;
399            let chunk_part_index = builder.add_chunk_part(AnalyzeChunkPart {
400                source_index,
401                output_file_index,
402                size: chunk_part.real_size + chunk_part.unaccounted_size,
403                compressed_size: chunk_part.get_compressed_size().await?,
404            });
405            builder.add_chunk_part_to_output_file(output_file_index, chunk_part_index);
406            builder.add_chunk_part_to_source(source_index, chunk_part_index);
407        }
408    }
409
410    // Build a directory structure for the sources.
411    let mut i: u32 = 0;
412    while i < builder.sources.len().try_into().unwrap() {
413        let source = &builder.sources[i as usize];
414        let path = source.source.path.as_str();
415        if !path.is_empty() {
416            let (parent_path, path) = if let Some(pos) = path.trim_end_matches('/').rfind('/') {
417                (&path[..pos + 1], &path[pos + 1..])
418            } else {
419                ("", path)
420            };
421            let parent_path = parent_path.to_string();
422            let path = path.into();
423            let (parent_source, parent_index) = builder.ensure_source(&parent_path);
424            parent_source.child_source_indices.push(i);
425            builder.sources[i as usize].source.parent_source_index = Some(parent_index);
426            builder.sources[i as usize].source.path = path;
427        }
428        i += 1;
429    }
430
431    let rope = builder.build();
432    Ok(FileContent::Content(File::from(rope)).cell())
433}
434
435#[turbo_tasks::function]
436pub async fn analyze_module_graphs(module_graphs: Vc<ModuleGraphs>) -> Result<Vc<FileContent>> {
437    let mut builder = ModulesDataBuilder::new();
438
439    let mut all_modules = FxIndexSet::default();
440    let mut all_edges = FxIndexSet::default();
441    let mut all_async_edges = FxIndexSet::default();
442    for module_graph in module_graphs.await? {
443        let module_graph = module_graph.await?;
444        module_graph.traverse_edges_unordered(|parent, node| {
445            if let Some((parent_node, reference)) = parent {
446                all_modules.insert(parent_node);
447                all_modules.insert(node);
448                match reference.chunking_type {
449                    ChunkingType::Async => {
450                        all_async_edges.insert((parent_node, node));
451                    }
452                    _ => {
453                        all_edges.insert((parent_node, node));
454                    }
455                }
456            }
457            Ok(())
458        })?;
459    }
460
461    type ModulePair = (ResolvedVc<Box<dyn Module>>, ResolvedVc<Box<dyn Module>>);
462    async fn mapper((from, to): ModulePair) -> Result<Option<(RcStr, RcStr)>> {
463        if from == to {
464            return Ok(None);
465        }
466        let from_ident = from.ident().to_string().owned().await?;
467        let to_ident = to.ident().to_string().owned().await?;
468        Ok(Some((from_ident, to_ident)))
469    }
470
471    let all_modules = all_modules
472        .iter()
473        .copied()
474        .map(async |module| {
475            let ident = module.ident().to_string().owned().await?;
476            let path = module.ident().await?.path.to_string_ref().await?;
477            Ok((ident, path))
478        })
479        .try_join()
480        .await?;
481
482    for (ident, path) in all_modules {
483        builder.ensure_module(&ident, &path);
484    }
485
486    let all_edges = all_edges
487        .iter()
488        .copied()
489        .map(mapper)
490        .try_flat_join()
491        .await?;
492    let all_async_edges = all_async_edges
493        .iter()
494        .copied()
495        .map(mapper)
496        .try_flat_join()
497        .await?;
498    for (from_ident, to_ident) in all_edges {
499        let from_index = builder.get_module(&from_ident).1;
500        let to_index = builder.get_module(&to_ident).1;
501        if from_index == to_index {
502            continue;
503        }
504        builder.modules[from_index as usize]
505            .dependencies
506            .insert(to_index);
507        builder.modules[to_index as usize]
508            .dependents
509            .insert(from_index);
510    }
511    for (from_ident, to_ident) in all_async_edges {
512        let from_index = builder.get_module(&from_ident).1;
513        let to_index = builder.get_module(&to_ident).1;
514        if from_index == to_index {
515            continue;
516        }
517        builder.modules[from_index as usize]
518            .async_dependencies
519            .insert(to_index);
520        builder.modules[to_index as usize]
521            .async_dependents
522            .insert(from_index);
523    }
524
525    let rope = builder.build();
526    Ok(FileContent::Content(File::from(rope)).cell())
527}
528
529#[turbo_tasks::value]
530pub struct AnalyzeDataOutputAsset {
531    pub path: FileSystemPath,
532    pub output_assets: ResolvedVc<OutputAssets>,
533}
534
535#[turbo_tasks::value_impl]
536impl AnalyzeDataOutputAsset {
537    #[turbo_tasks::function]
538    pub async fn new(
539        path: FileSystemPath,
540        output_assets: ResolvedVc<OutputAssets>,
541    ) -> Result<Vc<Self>> {
542        Ok(Self {
543            path,
544            output_assets,
545        }
546        .cell())
547    }
548}
549
550#[turbo_tasks::value_impl]
551impl Asset for AnalyzeDataOutputAsset {
552    #[turbo_tasks::function]
553    fn content(&self) -> Vc<AssetContent> {
554        let file_content = analyze_output_assets(*self.output_assets);
555        AssetContent::file(file_content)
556    }
557}
558
559#[turbo_tasks::value_impl]
560impl OutputAssetsReference for AnalyzeDataOutputAsset {}
561
562#[turbo_tasks::value_impl]
563impl OutputAsset for AnalyzeDataOutputAsset {
564    #[turbo_tasks::function]
565    fn path(&self) -> Vc<FileSystemPath> {
566        self.path.clone().cell()
567    }
568}
569
570#[turbo_tasks::value]
571pub struct ModulesDataOutputAsset {
572    pub path: FileSystemPath,
573    pub module_graphs: ResolvedVc<ModuleGraphs>,
574}
575
576#[turbo_tasks::value_impl]
577impl ModulesDataOutputAsset {
578    #[turbo_tasks::function]
579    pub async fn new(path: FileSystemPath, module_graphs: Vc<ModuleGraphs>) -> Result<Vc<Self>> {
580        Ok(Self {
581            path,
582            module_graphs: module_graphs.to_resolved().await?,
583        }
584        .cell())
585    }
586}
587
588#[turbo_tasks::value_impl]
589impl Asset for ModulesDataOutputAsset {
590    #[turbo_tasks::function]
591    fn content(&self) -> Vc<AssetContent> {
592        let file_content = analyze_module_graphs(*self.module_graphs);
593        AssetContent::file(file_content)
594    }
595}
596
597#[turbo_tasks::value_impl]
598impl OutputAssetsReference for ModulesDataOutputAsset {}
599
600#[turbo_tasks::value_impl]
601impl OutputAsset for ModulesDataOutputAsset {
602    #[turbo_tasks::function]
603    fn path(&self) -> Vc<FileSystemPath> {
604        self.path.clone().cell()
605    }
606}