turbopack_ecmascript/tree_shake/
mod.rs

1use std::fmt::Write;
2
3use anyhow::{Result, bail};
4use rustc_hash::FxHashMap;
5use swc_core::{
6    common::{DUMMY_SP, GLOBALS, SyntaxContext, comments::Comments, util::take::Take},
7    ecma::{
8        ast::{
9            ExportAll, ExportNamedSpecifier, Expr, ExprStmt, Id, Ident, ImportDecl, Lit, Module,
10            ModuleDecl, ModuleExportName, ModuleItem, NamedExport, Program, Stmt,
11        },
12        codegen::to_code,
13    },
14};
15use turbo_rcstr::RcStr;
16use turbo_tasks::{FxIndexSet, ResolvedVc, ValueToString, Vc};
17use turbopack_core::{ident::AssetIdent, resolve::ModulePart, source::Source};
18
19use self::graph::{DepGraph, ItemData, ItemId, ItemIdGroupKind, Mode, SplitModuleResult};
20pub(crate) use self::graph::{
21    PartId, create_turbopack_part_id_assert, find_turbopack_part_id_in_asserts,
22};
23use crate::{EcmascriptModuleAsset, analyzer::graph::EvalContext, parse::ParseResult};
24
25pub mod asset;
26pub mod chunk_item;
27mod graph;
28pub mod merge;
29mod optimizations;
30pub mod side_effect_module;
31#[cfg(test)]
32mod tests;
33mod util;
34
35pub(crate) const TURBOPACK_PART_IMPORT_SOURCE: &str = "__TURBOPACK_PART__";
36
37pub struct Analyzer<'a> {
38    g: &'a mut DepGraph,
39    item_ids: &'a Vec<ItemId>,
40    items: &'a mut FxHashMap<ItemId, ItemData>,
41
42    last_side_effects: Vec<ItemId>,
43
44    vars: FxHashMap<Id, VarState>,
45}
46
47#[derive(Debug, Default, Clone)]
48struct VarState {
49    declarator: Option<ItemId>,
50
51    /// The module items that might trigger side effects on that variable.
52    /// We also store if this is a `const` write, so no further change will
53    /// happen to this var.
54    last_writes: Vec<ItemId>,
55    /// The module items that might read that variable.
56    last_reads: Vec<ItemId>,
57
58    last_op: Option<VarOp>,
59}
60
61#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62enum VarOp {
63    Read,
64    Write,
65}
66
67impl Analyzer<'_> {
68    pub(super) fn analyze(
69        module: &Module,
70        comments: &dyn Comments,
71        unresolved_ctxt: SyntaxContext,
72        top_level_ctxt: SyntaxContext,
73    ) -> (DepGraph, FxHashMap<ItemId, ItemData>) {
74        let mut g = DepGraph::default();
75        let (item_ids, mut items) = g.init(module, comments, unresolved_ctxt, top_level_ctxt);
76
77        let mut analyzer = Analyzer {
78            g: &mut g,
79            item_ids: &item_ids,
80            items: &mut items,
81            last_side_effects: Default::default(),
82            vars: Default::default(),
83        };
84
85        let eventual_ids = analyzer.hoist_vars_and_bindings();
86
87        analyzer.evaluate_immediate(module, &eventual_ids);
88
89        analyzer.evaluate_eventual(module);
90
91        analyzer.handle_exports(module);
92
93        analyzer.handle_explicit_deps();
94
95        (g, items)
96    }
97
98    fn handle_explicit_deps(&mut self) {
99        for item_id in self.item_ids.iter() {
100            if let Some(item) = self.items.get(item_id)
101                && !item.explicit_deps.is_empty()
102            {
103                self.g.add_strong_deps(item_id, item.explicit_deps.iter());
104            }
105        }
106    }
107
108    /// Phase 1: Hoisted Variables and Bindings
109    ///
110    ///
111    /// Returns all (EVENTUAL_READ/WRITE_VARS) in the module.
112    fn hoist_vars_and_bindings(&mut self) -> FxIndexSet<Id> {
113        let mut eventual_ids = FxIndexSet::default();
114
115        for item_id in self.item_ids.iter() {
116            if let Some(item) = self.items.get(item_id) {
117                eventual_ids.extend(item.eventual_read_vars.iter().cloned());
118                eventual_ids.extend(item.eventual_write_vars.iter().cloned());
119
120                if item.is_hoisted && item.side_effects {
121                    self.g
122                        .add_strong_deps(item_id, self.last_side_effects.last());
123
124                    self.last_side_effects.push(item_id.clone());
125                }
126
127                for id in item.var_decls.iter() {
128                    let state = self.vars.entry(id.clone()).or_default();
129
130                    if state.declarator.is_none() {
131                        state.declarator = Some(item_id.clone());
132                    }
133
134                    if item.is_hoisted {
135                        state.last_writes.push(item_id.clone());
136                    } else {
137                        // TODO(WEB-705): Create a fake module item
138                        // state.last_writes.push(item_id.clone());
139                    }
140                }
141            }
142        }
143
144        eventual_ids
145    }
146
147    /// Phase 2: Immediate evaluation
148    fn evaluate_immediate(&mut self, _module: &Module, eventual_ids: &FxIndexSet<Id>) {
149        for item_id in self.item_ids.iter() {
150            if let Some(item) = self.items.get(item_id) {
151                // Ignore HOISTED module items, they have been processed in phase 1 already.
152                if item.is_hoisted {
153                    continue;
154                }
155
156                for id in item.var_decls.iter() {
157                    let state = self.vars.entry(id.clone()).or_default();
158                    if state.declarator.is_none() {
159                        state.declarator = Some(item_id.clone());
160                    }
161                }
162
163                // For each var in READ_VARS:
164                for id in item.read_vars.iter() {
165                    // read (last: read) -> ref last_writes, push last_reads
166                    // read (last: (read +) write) -> ref last_writes, clear last_reads, push
167                    // last_reads
168
169                    // (the writes need to be executed before this read)
170                    let state = self.vars.entry(id.clone()).or_default();
171                    self.g.add_strong_deps(item_id, state.last_writes.iter());
172
173                    if let Some(declarator) = &state.declarator
174                        && declarator != item_id
175                    {
176                        // A read also depends on the declaration.
177                        self.g
178                            .add_strong_deps(item_id, [declarator].iter().copied());
179                    }
180
181                    if state.last_op == Some(VarOp::Write) && !item.write_vars.contains(id) {
182                        state.last_reads.clear();
183                    }
184                }
185
186                // For each var in WRITE_VARS:
187                for id in item.write_vars.iter() {
188                    // Create a weak dependency to all module items listed in
189                    // LAST_READS for that var.
190
191                    // (the reads need to be executed before this write, when
192                    // it’s needed)
193
194                    let state = self.vars.entry(id.clone()).or_default();
195                    self.g.add_weak_deps(item_id, state.last_reads.iter());
196
197                    if let Some(declarator) = &state.declarator
198                        && declarator != item_id
199                    {
200                        // A write also depends on the declaration.
201                        self.g.add_strong_deps(item_id, [declarator]);
202                    }
203
204                    if !item.read_vars.contains(id) {
205                        // write (last: read) -> weak_ref last_reads, clear last_writes, push
206                        // last_writes
207
208                        if state.last_op == Some(VarOp::Read) {
209                            state.last_writes.clear();
210                        } else if state.last_op == Some(VarOp::Write) {
211                            // write (last: (read +) write) -> weak_ref last_reads, push last_writes
212                        }
213                    } else {
214                        // read+write (last: read) -> weak_ref last_reads, ref last_writes, clear
215                        // last_reads, clear last_writes, push last_reads, push last_writes
216
217                        // read+write (last: (read +) write) -> ref last_writes, clear
218                        // last_reads, clear last_writes, push
219                        // last_reads, push last_writes
220                        if state.last_op.is_some() {
221                            state.last_reads.clear();
222                            state.last_writes.clear();
223                        }
224                    }
225                }
226
227                if item.side_effects {
228                    // Create a strong dependency to LAST_SIDE_EFFECT.
229
230                    self.g
231                        .add_strong_deps(item_id, self.last_side_effects.last());
232
233                    // Create weak dependencies to all LAST_WRITES and strong
234                    // dependencies to LAST_READS.
235                    //
236                    // We need to create strong dependencies to LAST_READS because
237                    // prototype-based methods definitions should be executed before
238                    // any usage of those methods, and the usage of those methods are
239                    // flagged as a side effect.
240                    for id in eventual_ids.iter() {
241                        let state = self.vars.entry(id.clone()).or_default();
242
243                        self.g.add_weak_deps(item_id, state.last_writes.iter());
244                        self.g.add_strong_deps(item_id, state.last_reads.iter());
245                    }
246                }
247
248                // For each var in WRITE_VARS:
249                for id in item.write_vars.iter() {
250                    // Add this module item to LAST_WRITES
251
252                    let state = self.vars.entry(id.clone()).or_default();
253                    state.last_writes.push(item_id.clone());
254
255                    // Optimization: Remove each module item to which we
256                    // just created a strong dependency from LAST_WRITES
257
258                    state
259                        .last_writes
260                        .retain(|last_write| !self.g.has_dep(item_id, last_write, true));
261
262                    // Drop all writes which are not reachable from this item.
263                    //
264                    // For
265                    //
266                    // var x = 0
267                    // x = 1
268                    // x = 2
269                    // x += 3
270                    //
271                    // this will drop `x = 1` as not reachable from `x += 3`.
272
273                    state
274                        .last_writes
275                        .retain(|last_write| self.g.has_path_connecting(item_id, last_write));
276                }
277
278                // For each var in READ_VARS:
279                for id in item.read_vars.iter() {
280                    // Add this module item to LAST_READS
281
282                    let state = self.vars.entry(id.clone()).or_default();
283                    state.last_reads.push(item_id.clone());
284
285                    // Optimization: Remove each module item to which we
286                    // have a strong dependency
287
288                    state
289                        .last_reads
290                        .retain(|last_read| !self.g.has_dep(item_id, last_read, true));
291
292                    state.last_op = Some(VarOp::Read);
293                }
294
295                for id in item.write_vars.iter() {
296                    let state = self.vars.entry(id.clone()).or_default();
297                    state.last_op = Some(VarOp::Write);
298                }
299
300                if item.side_effects {
301                    self.last_side_effects.push(item_id.clone());
302                }
303            }
304        }
305    }
306
307    /// Phase 3: Eventual evaluation
308    fn evaluate_eventual(&mut self, _module: &Module) {
309        for item_id in self.item_ids.iter() {
310            if let Some(item) = self.items.get(item_id) {
311                // For each var in EVENTUAL_READ_VARS:
312
313                for id in item.eventual_read_vars.iter() {
314                    // Create a strong dependency to all module items listed in
315                    // LAST_WRITES for that var.
316
317                    let state = self.vars.entry(id.clone()).or_default();
318                    self.g.add_strong_deps(item_id, state.last_writes.iter());
319
320                    if let Some(declarator) = &state.declarator
321                        && declarator != item_id
322                    {
323                        // A read also depends on the declaration.
324                        self.g.add_strong_deps(item_id, [declarator]);
325                    }
326                }
327
328                // For each var in EVENTUAL_WRITE_VARS:
329                for id in item.eventual_write_vars.iter() {
330                    // Create a weak dependency to all module items listed in
331                    // LAST_READS for that var.
332
333                    let state = self.vars.entry(id.clone()).or_default();
334
335                    self.g.add_weak_deps(item_id, state.last_reads.iter());
336
337                    if let Some(declarator) = &state.declarator
338                        && declarator != item_id
339                    {
340                        // A write also depends on the declaration.
341                        self.g.add_strong_deps(item_id, [declarator]);
342                    }
343                }
344
345                // (no state update happens, since this is only triggered by
346                // side effects, which we already handled)
347            }
348        }
349    }
350
351    /// Phase 4: Exports
352    fn handle_exports(&mut self, _module: &Module) {
353        // We use the last side effect as a module evaluation
354        if let Some(last) = self.last_side_effects.last()
355            && let Some(item) = self.items.get_mut(last)
356        {
357            item.is_module_evaluation = true;
358        }
359
360        for item_id in self.item_ids.iter() {
361            if let ItemId::Group(ItemIdGroupKind::Export(local, _)) = item_id {
362                // Create a strong dependency to LAST_WRITES for this var
363
364                let state = self.vars.entry(local.clone()).or_default();
365
366                self.g.add_strong_deps(item_id, state.last_writes.iter());
367            }
368        }
369    }
370}
371
372#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
373pub(crate) enum Key {
374    ModuleEvaluation,
375    Export(RcStr),
376    Exports,
377    StarExports,
378}
379
380/// Converts [ModulePart] to the index.
381async fn get_part_id(result: &SplitResult, part: &ModulePart) -> Result<u32> {
382    // TODO implement ModulePart::Facade
383    let key = match part {
384        ModulePart::Evaluation => Key::ModuleEvaluation,
385        ModulePart::Export(export) => Key::Export(export.clone()),
386        ModulePart::Exports => Key::Exports,
387        ModulePart::Internal(part_id) => return Ok(*part_id),
388        ModulePart::Locals
389        | ModulePart::Facade
390        | ModulePart::RenamedExport { .. }
391        | ModulePart::RenamedNamespace { .. } => {
392            bail!("invalid module part")
393        }
394    };
395
396    let SplitResult::Ok {
397        entrypoints,
398        modules,
399        ..
400    } = &result
401    else {
402        bail!("split failed")
403    };
404
405    if let Some(id) = entrypoints.get(&key) {
406        return Ok(*id);
407    }
408
409    // This is required to handle `export * from 'foo'`
410    if let ModulePart::Export(..) = part
411        && let Some(&v) = entrypoints
412            .get(&Key::StarExports)
413            .or_else(|| entrypoints.get(&Key::Exports))
414    {
415        return Ok(v);
416    }
417
418    let mut dump = String::new();
419
420    for (idx, m) in modules.iter().enumerate() {
421        let ParseResult::Ok { program, .. } = &*m.await? else {
422            bail!("failed to get module")
423        };
424
425        {
426            let code = to_code(&program);
427
428            writeln!(dump, "# Module #{idx}:\n{code}\n\n\n")?;
429        }
430    }
431
432    bail!(
433        "could not find part id for module part {:?} in {:?}\n\nModule dump:\n{dump}",
434        key,
435        entrypoints
436    )
437}
438
439#[turbo_tasks::value(shared, serialization = "none", eq = "manual")]
440pub(crate) enum SplitResult {
441    Ok {
442        asset_ident: ResolvedVc<AssetIdent>,
443
444        /// `u32` is a index to `modules`.
445        #[turbo_tasks(trace_ignore)]
446        entrypoints: FxHashMap<Key, u32>,
447
448        #[turbo_tasks(debug_ignore, trace_ignore)]
449        modules: Vec<ResolvedVc<ParseResult>>,
450
451        #[turbo_tasks(trace_ignore)]
452        deps: FxHashMap<u32, Vec<PartId>>,
453
454        #[turbo_tasks(debug_ignore, trace_ignore)]
455        star_reexports: Vec<ExportAll>,
456    },
457    Failed {
458        parse_result: ResolvedVc<ParseResult>,
459    },
460}
461
462impl PartialEq for SplitResult {
463    fn eq(&self, other: &Self) -> bool {
464        match (self, other) {
465            (Self::Ok { .. }, Self::Ok { .. }) => false,
466            _ => core::mem::discriminant(self) == core::mem::discriminant(other),
467        }
468    }
469}
470
471#[turbo_tasks::function]
472pub(super) fn split_module(asset: Vc<EcmascriptModuleAsset>) -> Result<Vc<SplitResult>> {
473    Ok(split(asset.source().ident(), asset.source(), asset.parse()))
474}
475
476#[turbo_tasks::function]
477pub(super) async fn split(
478    ident: ResolvedVc<AssetIdent>,
479    source: ResolvedVc<Box<dyn Source>>,
480    parsed: ResolvedVc<ParseResult>,
481) -> Result<Vc<SplitResult>> {
482    // Do not split already split module
483    if !ident.await?.parts.is_empty() {
484        return Ok(SplitResult::Failed {
485            parse_result: parsed,
486        }
487        .cell());
488    }
489
490    // Turbopack has a bug related to parsing of CJS files where the package.json has
491    // a `"type": "module"` and the file is a CJS file.
492    let name = ident.to_string().await?;
493    if name.ends_with(".cjs") {
494        return Ok(SplitResult::Failed {
495            parse_result: parsed,
496        }
497        .cell());
498    }
499
500    let parse_result = parsed.await?;
501
502    match &*parse_result {
503        ParseResult::Ok {
504            program,
505            comments,
506            eval_context,
507            source_map,
508            globals,
509            ..
510        } => {
511            // If the script file is a common js file, we cannot split the module
512            if util::should_skip_tree_shaking(program) {
513                return Ok(SplitResult::Failed {
514                    parse_result: parsed,
515                }
516                .cell());
517            }
518
519            let module = match program {
520                Program::Module(module) => module,
521                Program::Script(..) => unreachable!("CJS is already handled"),
522            };
523
524            // We copy directives like `use client` or `use server` to each module
525            let directives = module
526                .body
527                .iter()
528                .take_while(|item| {
529                    matches!(
530                        item,
531                        ModuleItem::Stmt(Stmt::Expr(ExprStmt {
532                            expr: box Expr::Lit(Lit::Str(..)),
533                            ..
534                        }))
535                    )
536                })
537                .cloned()
538                .collect::<Vec<_>>();
539
540            let (mut dep_graph, items) = GLOBALS.set(globals, || {
541                Analyzer::analyze(
542                    module,
543                    comments,
544                    SyntaxContext::empty().apply_mark(eval_context.unresolved_mark),
545                    SyntaxContext::empty().apply_mark(eval_context.top_level_mark),
546                )
547            });
548
549            dep_graph.handle_weak(Mode::Production);
550
551            let SplitModuleResult {
552                entrypoints,
553                part_deps,
554                modules,
555                star_reexports,
556            } = dep_graph.split_module(&directives, &items);
557
558            assert_ne!(modules.len(), 0, "modules.len() == 0;\nModule: {module:?}",);
559
560            for &v in entrypoints.values() {
561                debug_assert!(
562                    v < modules.len() as u32,
563                    "Invalid entrypoint '{}' while there are only '{}' modules",
564                    v,
565                    modules.len()
566                );
567            }
568            let modules = modules
569                .into_iter()
570                .map(|module| {
571                    let program = Program::Module(module);
572                    let eval_context = EvalContext::new(
573                        &program,
574                        eval_context.unresolved_mark,
575                        eval_context.top_level_mark,
576                        eval_context.force_free_values.clone(),
577                        None,
578                        Some(source),
579                    );
580
581                    ParseResult::resolved_cell(ParseResult::Ok {
582                        program,
583                        globals: globals.clone(),
584                        comments: comments.clone(),
585                        source_map: source_map.clone(),
586                        eval_context,
587                    })
588                })
589                .collect();
590
591            Ok(SplitResult::Ok {
592                asset_ident: ident,
593                entrypoints,
594                deps: part_deps,
595                modules,
596                star_reexports,
597            }
598            .cell())
599        }
600
601        _ => Ok(SplitResult::Failed {
602            parse_result: parsed,
603        }
604        .cell()),
605    }
606}
607
608#[turbo_tasks::function]
609pub(crate) async fn part_of_module(
610    split_data: Vc<SplitResult>,
611    part: ModulePart,
612) -> Result<Vc<ParseResult>> {
613    let split_data = split_data.await?;
614
615    match &*split_data {
616        SplitResult::Ok {
617            asset_ident,
618            modules,
619            entrypoints,
620            deps,
621            star_reexports,
622            ..
623        } => {
624            debug_assert_ne!(modules.len(), 0, "modules.len() == 0");
625
626            if part == ModulePart::Facade {
627                if let ParseResult::Ok {
628                    comments,
629                    eval_context,
630                    globals,
631                    source_map,
632                    ..
633                } = &*modules[0].await?
634                {
635                    let mut module = Module::dummy();
636
637                    let mut export_names = entrypoints
638                        .keys()
639                        .filter_map(|key| {
640                            if let Key::Export(v) = key {
641                                Some(v.clone())
642                            } else {
643                                None
644                            }
645                        })
646                        .collect::<Vec<_>>();
647                    export_names.sort();
648
649                    module
650                        .body
651                        .push(ModuleItem::ModuleDecl(ModuleDecl::Import(ImportDecl {
652                            span: DUMMY_SP,
653                            specifiers: vec![],
654                            src: Box::new(TURBOPACK_PART_IMPORT_SOURCE.into()),
655                            type_only: false,
656                            with: Some(Box::new(create_turbopack_part_id_assert(
657                                PartId::ModuleEvaluation,
658                            ))),
659                            phase: Default::default(),
660                        })));
661
662                    let specifiers = export_names
663                        .into_iter()
664                        .map(|export_name| {
665                            swc_core::ecma::ast::ExportSpecifier::Named(ExportNamedSpecifier {
666                                span: DUMMY_SP,
667                                orig: ModuleExportName::Ident(Ident::new(
668                                    export_name.as_str().into(),
669                                    DUMMY_SP,
670                                    Default::default(),
671                                )),
672                                exported: None,
673                                is_type_only: false,
674                            })
675                        })
676                        .collect::<Vec<_>>();
677
678                    module
679                        .body
680                        .push(ModuleItem::ModuleDecl(ModuleDecl::ExportNamed(
681                            NamedExport {
682                                span: DUMMY_SP,
683                                specifiers,
684                                src: Some(Box::new(TURBOPACK_PART_IMPORT_SOURCE.into())),
685                                type_only: false,
686                                with: Some(Box::new(create_turbopack_part_id_assert(
687                                    PartId::Exports,
688                                ))),
689                            },
690                        )));
691
692                    module.body.extend(star_reexports.iter().map(|export_all| {
693                        ModuleItem::ModuleDecl(ModuleDecl::ExportAll(export_all.clone()))
694                    }));
695
696                    let program = Program::Module(module);
697                    let eval_context = EvalContext::new(
698                        &program,
699                        eval_context.unresolved_mark,
700                        eval_context.top_level_mark,
701                        eval_context.force_free_values.clone(),
702                        None,
703                        None,
704                    );
705
706                    return Ok(ParseResult::Ok {
707                        program,
708                        comments: comments.clone(),
709                        eval_context,
710                        globals: globals.clone(),
711                        source_map: source_map.clone(),
712                    }
713                    .cell());
714                } else {
715                    unreachable!()
716                }
717            }
718
719            let part_id = get_part_id(&split_data, &part).await?;
720
721            if part_id as usize >= modules.len() {
722                bail!(
723                    "part_id is out of range: {part_id} >= {}; asset = {}; entrypoints = \
724                     {entrypoints:?}: part_deps = {deps:?}",
725                    asset_ident.to_string().await?,
726                    modules.len(),
727                );
728            }
729
730            Ok(*modules[part_id as usize])
731        }
732        SplitResult::Failed { parse_result } => Ok(**parse_result),
733    }
734}