Skip to main content

next_api/
analyze.rs

1use std::{borrow::Cow, io::Write};
2
3use anyhow::Result;
4use byteorder::{BE, WriteBytesExt};
5use rustc_hash::FxHashMap;
6use serde::Serialize;
7use turbo_rcstr::RcStr;
8use turbo_tasks::{FxIndexSet, ResolvedVc, TryFlatJoinIterExt, TryJoinIterExt, ValueToString, Vc};
9use turbo_tasks_fs::{
10    File, FileContent, FileSystemPath,
11    rope::{Rope, RopeBuilder},
12};
13use turbopack_analyze::split_chunk::split_output_asset_into_parts;
14use turbopack_core::{
15    SOURCE_URL_PROTOCOL,
16    asset::{Asset, AssetContent},
17    chunk::ChunkingType,
18    module::Module,
19    output::{OutputAsset, OutputAssets, OutputAssetsReference},
20    reference::all_assets_from_entries,
21};
22
23use crate::route::ModuleGraphs;
24
25pub struct EdgesData {
26    pub offsets: Vec<u32>,
27    pub data: Vec<u32>,
28}
29
30impl EdgesData {
31    fn from_iterator<'a>(iterable: impl IntoIterator<Item = &'a Vec<u32>> + Clone) -> Self {
32        let mut current_offset = 0;
33        let sum: usize = iterable.clone().into_iter().map(|v| v.len()).sum();
34        let mut data = Vec::with_capacity(sum);
35        let offsets = iterable
36            .into_iter()
37            .map(|edges| {
38                current_offset += edges.len() as u32;
39                data.extend(edges);
40                current_offset
41            })
42            .collect();
43        Self { offsets, data }
44    }
45
46    fn write(&self, writer: &mut impl Write) -> Result<()> {
47        writer.write_u32::<BE>(self.offsets.len() as u32)?;
48        for &offset in &self.offsets {
49            writer.write_u32::<BE>(offset)?;
50        }
51        for &data in &self.data {
52            writer.write_u32::<BE>(data)?;
53        }
54        Ok(())
55    }
56}
57
58#[derive(Serialize)]
59pub struct AnalyzeSource {
60    pub parent_source_index: Option<u32>,
61    /// Path. When there is a parent, this is concatenated to the parent's path.
62    /// Folders end with a slash. Might have multiple path segments when folders contain only a
63    /// single child.
64    pub path: RcStr,
65}
66
67#[derive(Serialize)]
68pub struct AnalyzeModule {
69    pub ident: RcStr,
70    pub path: RcStr,
71}
72
73#[derive(Serialize)]
74pub struct AnalyzeChunkPart {
75    pub source_index: u32,
76    pub output_file_index: u32,
77    pub size: u32,
78    pub compressed_size: u32,
79}
80
81#[derive(Serialize)]
82pub struct AnalyzeOutputFile {
83    pub filename: RcStr,
84}
85
86#[derive(Serialize)]
87struct EdgesDataReference {
88    pub offset: u32,
89    pub length: u32,
90}
91
92#[derive(Serialize)]
93struct AnalyzeDataHeader {
94    pub sources: Vec<AnalyzeSource>,
95    pub chunk_parts: Vec<AnalyzeChunkPart>,
96    pub output_files: Vec<AnalyzeOutputFile>,
97    /// Edges from chunks to chunk parts
98    pub output_file_chunk_parts: EdgesDataReference,
99    /// Edges from sources to chunk parts
100    pub source_chunk_parts: EdgesDataReference,
101    /// Edges from sources to their children sources
102    pub source_children: EdgesDataReference,
103    /// Root level sources, walking their children will reach all sources
104    pub source_roots: Vec<u32>,
105}
106
107#[derive(Serialize)]
108struct ModulesDataHeader {
109    pub modules: Vec<AnalyzeModule>,
110    /// Edges from modules to modules
111    pub module_dependents: EdgesDataReference,
112    /// Edges from modules to modules
113    pub async_module_dependents: EdgesDataReference,
114    /// Edges from modules to modules
115    pub module_dependencies: EdgesDataReference,
116    /// Edges from modules to modules
117    pub async_module_dependencies: EdgesDataReference,
118}
119
120struct AnalyzeOutputFileBuilder {
121    output_file: AnalyzeOutputFile,
122    chunk_part_indices: Vec<u32>,
123}
124
125struct AnalyzeSourceBuilder {
126    source: AnalyzeSource,
127    child_source_indices: Vec<u32>,
128    chunk_part_indices: Vec<u32>,
129}
130
131struct AnalyzeModuleBuilder {
132    module: AnalyzeModule,
133    dependencies: FxIndexSet<u32>,
134    async_dependencies: FxIndexSet<u32>,
135    dependents: FxIndexSet<u32>,
136    async_dependents: FxIndexSet<u32>,
137}
138
139struct AnalyzeDataBuilder {
140    sources: Vec<AnalyzeSourceBuilder>,
141    source_index_map: FxHashMap<RcStr, u32>,
142    chunk_parts: Vec<AnalyzeChunkPart>,
143    output_files: Vec<AnalyzeOutputFileBuilder>,
144}
145
146struct ModulesDataBuilder {
147    modules: Vec<AnalyzeModuleBuilder>,
148    module_index_map: FxHashMap<RcStr, u32>,
149}
150
151struct EdgesDataSectionBuilder {
152    data: Vec<u8>,
153}
154
155impl EdgesDataSectionBuilder {
156    fn new() -> Self {
157        Self { data: vec![] }
158    }
159
160    fn add_edges(&mut self, edges: &EdgesData) -> EdgesDataReference {
161        let offset = self.data.len().try_into().unwrap();
162        edges.write(&mut self.data).unwrap();
163        let length = (self.data.len() - offset as usize).try_into().unwrap();
164        EdgesDataReference { offset, length }
165    }
166}
167
168impl AnalyzeDataBuilder {
169    fn new() -> Self {
170        Self {
171            sources: vec![],
172            source_index_map: FxHashMap::default(),
173            chunk_parts: vec![],
174            output_files: vec![],
175        }
176    }
177
178    fn ensure_source(&mut self, path: &str) -> (&mut AnalyzeSourceBuilder, u32) {
179        if let Some(&index) = self.source_index_map.get(path) {
180            return (&mut self.sources[index as usize], index);
181        }
182        let index = self.sources.len() as u32;
183        let path = RcStr::from(path);
184        self.source_index_map.insert(path.clone(), index);
185        self.sources.push(AnalyzeSourceBuilder {
186            source: AnalyzeSource {
187                parent_source_index: None,
188                path,
189            },
190            child_source_indices: vec![],
191            chunk_part_indices: vec![],
192        });
193        (&mut self.sources[index as usize], index)
194    }
195
196    fn add_chunk_part(&mut self, chunk_part: AnalyzeChunkPart) -> u32 {
197        let i = self.chunk_parts.len() as u32;
198        self.chunk_parts.push(chunk_part);
199        i
200    }
201
202    fn add_output_file(&mut self, output_file: AnalyzeOutputFile) -> u32 {
203        let i = self.output_files.len() as u32;
204        self.output_files.push(AnalyzeOutputFileBuilder {
205            output_file,
206            chunk_part_indices: vec![],
207        });
208        i
209    }
210
211    fn add_chunk_part_to_output_file(&mut self, output_file_index: u32, chunk_part_index: u32) {
212        self.output_files[output_file_index as usize]
213            .chunk_part_indices
214            .push(chunk_part_index);
215    }
216
217    fn add_chunk_part_to_source(&mut self, source_index: u32, chunk_part_index: u32) {
218        self.sources[source_index as usize]
219            .chunk_part_indices
220            .push(chunk_part_index);
221    }
222
223    fn build(self) -> Rope {
224        let source_roots = self
225            .sources
226            .iter()
227            .enumerate()
228            .filter_map(|(i, s)| {
229                if s.source.parent_source_index.is_none() {
230                    Some(i as u32)
231                } else {
232                    None
233                }
234            })
235            .collect();
236
237        let source_children =
238            EdgesData::from_iterator(self.sources.iter().map(|s| &s.child_source_indices));
239
240        let source_chunk_parts =
241            EdgesData::from_iterator(self.sources.iter().map(|s| &s.chunk_part_indices));
242
243        let output_file_chunk_parts =
244            EdgesData::from_iterator(self.output_files.iter().map(|of| &of.chunk_part_indices));
245
246        let mut binary_section = EdgesDataSectionBuilder::new();
247
248        let header = AnalyzeDataHeader {
249            sources: self.sources.into_iter().map(|s| s.source).collect(),
250            chunk_parts: self.chunk_parts,
251            output_files: self
252                .output_files
253                .into_iter()
254                .map(|of| of.output_file)
255                .collect(),
256            output_file_chunk_parts: binary_section.add_edges(&output_file_chunk_parts),
257            source_chunk_parts: binary_section.add_edges(&source_chunk_parts),
258            source_children: binary_section.add_edges(&source_children),
259            source_roots,
260        };
261
262        let header_json = serde_json::to_vec(&header).unwrap();
263
264        let mut rope = RopeBuilder::default();
265        rope.push_bytes(&(header_json.len() as u32).to_be_bytes());
266        rope.reserve_bytes(header_json.len() + binary_section.data.len());
267        rope.push_bytes(&header_json);
268        rope.push_bytes(&binary_section.data);
269        rope.build()
270    }
271}
272
273impl ModulesDataBuilder {
274    fn new() -> Self {
275        Self {
276            modules: vec![],
277            module_index_map: FxHashMap::default(),
278        }
279    }
280
281    fn get_module(&mut self, ident: &str) -> (&mut AnalyzeModuleBuilder, u32) {
282        if let Some(&index) = self.module_index_map.get(ident) {
283            return (&mut self.modules[index as usize], index);
284        }
285        panic!("Module with ident `{}` not found", ident);
286    }
287
288    fn ensure_module(&mut self, ident: &str, path: &str) -> (&mut AnalyzeModuleBuilder, u32) {
289        if let Some(&index) = self.module_index_map.get(ident) {
290            return (&mut self.modules[index as usize], index);
291        }
292        let index = self.modules.len() as u32;
293        let ident = RcStr::from(ident);
294        let path = RcStr::from(path);
295        self.module_index_map.insert(ident.clone(), index);
296        self.modules.push(AnalyzeModuleBuilder {
297            module: AnalyzeModule { ident, path },
298            dependencies: FxIndexSet::default(),
299            async_dependencies: FxIndexSet::default(),
300            dependents: FxIndexSet::default(),
301            async_dependents: FxIndexSet::default(),
302        });
303        (&mut self.modules[index as usize], index)
304    }
305
306    fn build(self) -> Rope {
307        let module_dependencies_vecs: Vec<Vec<u32>> = self
308            .modules
309            .iter()
310            .map(|s| s.dependencies.iter().copied().collect())
311            .collect();
312        let async_module_dependencies_vecs: Vec<Vec<u32>> = self
313            .modules
314            .iter()
315            .map(|s| s.async_dependencies.iter().copied().collect())
316            .collect();
317        let module_dependents_vecs: Vec<Vec<u32>> = self
318            .modules
319            .iter()
320            .map(|s| s.dependents.iter().copied().collect())
321            .collect();
322        let async_module_dependents_vecs: Vec<Vec<u32>> = self
323            .modules
324            .iter()
325            .map(|s| s.async_dependents.iter().copied().collect())
326            .collect();
327
328        let module_dependencies = EdgesData::from_iterator(&module_dependencies_vecs);
329        let async_module_dependencies = EdgesData::from_iterator(&async_module_dependencies_vecs);
330        let module_dependents = EdgesData::from_iterator(&module_dependents_vecs);
331        let async_module_dependents = EdgesData::from_iterator(&async_module_dependents_vecs);
332
333        let mut binary_section = EdgesDataSectionBuilder::new();
334
335        let header = ModulesDataHeader {
336            modules: self.modules.into_iter().map(|s| s.module).collect(),
337            module_dependents: binary_section.add_edges(&module_dependents),
338            async_module_dependents: binary_section.add_edges(&async_module_dependents),
339            module_dependencies: binary_section.add_edges(&module_dependencies),
340            async_module_dependencies: binary_section.add_edges(&async_module_dependencies),
341        };
342
343        let header_json = serde_json::to_vec(&header).unwrap();
344
345        let mut rope = RopeBuilder::default();
346        rope.push_bytes(&(header_json.len() as u32).to_be_bytes());
347        rope.reserve_bytes(header_json.len() + binary_section.data.len());
348        rope.push_bytes(&header_json);
349        rope.push_bytes(&binary_section.data);
350        rope.build()
351    }
352}
353
354/// Merges two sets of output assets into one. Used to combine per-route output
355/// assets with shared assets (e.g. `_app`, `_document`) at report generation time.
356#[turbo_tasks::function]
357pub async fn combine_output_assets(
358    primary: Vc<OutputAssets>,
359    extra: Vc<OutputAssets>,
360) -> Result<Vc<OutputAssets>> {
361    let mut combined: Vec<ResolvedVc<Box<dyn OutputAsset>>> =
362        primary.await?.iter().copied().collect();
363    combined.extend(extra.await?.iter().copied());
364    Ok(Vc::cell(combined))
365}
366
367#[turbo_tasks::function]
368pub async fn analyze_output_assets(output_assets: Vc<OutputAssets>) -> Result<Vc<FileContent>> {
369    let output_assets = all_assets_from_entries(output_assets);
370
371    let mut builder = AnalyzeDataBuilder::new();
372
373    let prefix = format!("{SOURCE_URL_PROTOCOL}///");
374
375    // Process the output assets and extract chunk parts.
376    // Also creates sources for the chunk parts.
377    for &asset in output_assets.await? {
378        let filename = asset.path().to_string().owned().await?;
379        if filename.ends_with(".map") || filename.ends_with(".nft.json") {
380            // Skip source maps.
381            continue;
382        }
383
384        let output_file_index = builder.add_output_file(AnalyzeOutputFile { filename });
385        let chunk_parts = split_output_asset_into_parts(*asset).await?;
386        for chunk_part in chunk_parts {
387            let decoded_source = urlencoding::decode(&chunk_part.source)?;
388            let source = if let Some(stripped) = decoded_source.strip_prefix(&prefix) {
389                Cow::Borrowed(stripped)
390            } else {
391                Cow::Owned(format!(
392                    "[project]/{}",
393                    decoded_source.trim_start_matches("../")
394                ))
395            };
396            let source_index = builder.ensure_source(&source).1;
397            let chunk_part_index = builder.add_chunk_part(AnalyzeChunkPart {
398                source_index,
399                output_file_index,
400                size: chunk_part.real_size + chunk_part.unaccounted_size,
401                compressed_size: chunk_part.get_compressed_size().await?,
402            });
403            builder.add_chunk_part_to_output_file(output_file_index, chunk_part_index);
404            builder.add_chunk_part_to_source(source_index, chunk_part_index);
405        }
406    }
407
408    // Build a directory structure for the sources.
409    let mut i: u32 = 0;
410    while i < builder.sources.len().try_into().unwrap() {
411        let source = &builder.sources[i as usize];
412        let path = source.source.path.as_str();
413        if !path.is_empty() {
414            let (parent_path, path) = if let Some(pos) = path.trim_end_matches('/').rfind('/') {
415                (&path[..pos + 1], &path[pos + 1..])
416            } else {
417                ("", path)
418            };
419            let parent_path = parent_path.to_string();
420            let path = path.into();
421            let (parent_source, parent_index) = builder.ensure_source(&parent_path);
422            parent_source.child_source_indices.push(i);
423            builder.sources[i as usize].source.parent_source_index = Some(parent_index);
424            builder.sources[i as usize].source.path = path;
425        }
426        i += 1;
427    }
428
429    let rope = builder.build();
430    Ok(FileContent::Content(File::from(rope)).cell())
431}
432
433#[turbo_tasks::function]
434pub async fn analyze_module_graphs(module_graphs: Vc<ModuleGraphs>) -> Result<Vc<FileContent>> {
435    let mut builder = ModulesDataBuilder::new();
436
437    let mut all_modules = FxIndexSet::default();
438    let mut all_edges = FxIndexSet::default();
439    let mut all_async_edges = FxIndexSet::default();
440    for &module_graph in module_graphs.await? {
441        let module_graph = module_graph.await?;
442        module_graph.traverse_edges_unordered(|parent, node| {
443            if let Some((parent_node, reference)) = parent {
444                all_modules.insert(parent_node);
445                all_modules.insert(node);
446                match reference.chunking_type {
447                    ChunkingType::Async => {
448                        all_async_edges.insert((parent_node, node));
449                    }
450                    _ => {
451                        all_edges.insert((parent_node, node));
452                    }
453                }
454            }
455            Ok(())
456        })?;
457    }
458
459    type ModulePair = (ResolvedVc<Box<dyn Module>>, ResolvedVc<Box<dyn Module>>);
460    async fn mapper((from, to): ModulePair) -> Result<Option<(RcStr, RcStr)>> {
461        if from == to {
462            return Ok(None);
463        }
464        let from_ident = from.ident().to_string().owned().await?;
465        let to_ident = to.ident().to_string().owned().await?;
466        Ok(Some((from_ident, to_ident)))
467    }
468
469    let all_modules = all_modules
470        .iter()
471        .copied()
472        .map(async |module| {
473            let ident = module.ident().to_string().owned().await?;
474            let path = module.ident().path().to_string().owned().await?;
475            Ok((ident, path))
476        })
477        .try_join()
478        .await?;
479
480    for (ident, path) in all_modules {
481        builder.ensure_module(&ident, &path);
482    }
483
484    let all_edges = all_edges
485        .iter()
486        .copied()
487        .map(mapper)
488        .try_flat_join()
489        .await?;
490    let all_async_edges = all_async_edges
491        .iter()
492        .copied()
493        .map(mapper)
494        .try_flat_join()
495        .await?;
496    for (from_ident, to_ident) in all_edges {
497        let from_index = builder.get_module(&from_ident).1;
498        let to_index = builder.get_module(&to_ident).1;
499        if from_index == to_index {
500            continue;
501        }
502        builder.modules[from_index as usize]
503            .dependencies
504            .insert(to_index);
505        builder.modules[to_index as usize]
506            .dependents
507            .insert(from_index);
508    }
509    for (from_ident, to_ident) in all_async_edges {
510        let from_index = builder.get_module(&from_ident).1;
511        let to_index = builder.get_module(&to_ident).1;
512        if from_index == to_index {
513            continue;
514        }
515        builder.modules[from_index as usize]
516            .async_dependencies
517            .insert(to_index);
518        builder.modules[to_index as usize]
519            .async_dependents
520            .insert(from_index);
521    }
522
523    let rope = builder.build();
524    Ok(FileContent::Content(File::from(rope)).cell())
525}
526
527#[turbo_tasks::value]
528pub struct AnalyzeDataOutputAsset {
529    pub path: FileSystemPath,
530    pub output_assets: ResolvedVc<OutputAssets>,
531}
532
533#[turbo_tasks::value_impl]
534impl AnalyzeDataOutputAsset {
535    #[turbo_tasks::function]
536    pub async fn new(
537        path: FileSystemPath,
538        output_assets: ResolvedVc<OutputAssets>,
539    ) -> Result<Vc<Self>> {
540        Ok(Self {
541            path,
542            output_assets,
543        }
544        .cell())
545    }
546}
547
548#[turbo_tasks::value_impl]
549impl Asset for AnalyzeDataOutputAsset {
550    #[turbo_tasks::function]
551    fn content(&self) -> Vc<AssetContent> {
552        let file_content = analyze_output_assets(*self.output_assets);
553        AssetContent::file(file_content)
554    }
555}
556
557#[turbo_tasks::value_impl]
558impl OutputAssetsReference for AnalyzeDataOutputAsset {}
559
560#[turbo_tasks::value_impl]
561impl OutputAsset for AnalyzeDataOutputAsset {
562    #[turbo_tasks::function]
563    fn path(&self) -> Vc<FileSystemPath> {
564        self.path.clone().cell()
565    }
566}
567
568#[turbo_tasks::value]
569pub struct ModulesDataOutputAsset {
570    pub path: FileSystemPath,
571    pub module_graphs: ResolvedVc<ModuleGraphs>,
572}
573
574#[turbo_tasks::value_impl]
575impl ModulesDataOutputAsset {
576    #[turbo_tasks::function]
577    pub async fn new(path: FileSystemPath, module_graphs: Vc<ModuleGraphs>) -> Result<Vc<Self>> {
578        Ok(Self {
579            path,
580            module_graphs: module_graphs.to_resolved().await?,
581        }
582        .cell())
583    }
584}
585
586#[turbo_tasks::value_impl]
587impl Asset for ModulesDataOutputAsset {
588    #[turbo_tasks::function]
589    fn content(&self) -> Vc<AssetContent> {
590        let file_content = analyze_module_graphs(*self.module_graphs);
591        AssetContent::file(file_content)
592    }
593}
594
595#[turbo_tasks::value_impl]
596impl OutputAssetsReference for ModulesDataOutputAsset {}
597
598#[turbo_tasks::value_impl]
599impl OutputAsset for ModulesDataOutputAsset {
600    #[turbo_tasks::function]
601    fn path(&self) -> Vc<FileSystemPath> {
602        self.path.clone().cell()
603    }
604}