turbopack_ecmascript/tree_shake/
mod.rs

1use std::fmt::Write;
2
3use anyhow::{Result, bail};
4use rustc_hash::FxHashMap;
5use swc_core::{
6    common::{DUMMY_SP, GLOBALS, SyntaxContext, comments::Comments, util::take::Take},
7    ecma::{
8        ast::{
9            ExportAll, ExportNamedSpecifier, Expr, ExprStmt, Id, Ident, ImportDecl, Lit, Module,
10            ModuleDecl, ModuleExportName, ModuleItem, NamedExport, Program, Stmt,
11        },
12        codegen::to_code,
13    },
14};
15use turbo_rcstr::RcStr;
16use turbo_tasks::{FxIndexSet, ResolvedVc, ValueToString, Vc};
17use turbopack_core::{ident::AssetIdent, resolve::ModulePart, source::Source};
18
19use self::graph::{DepGraph, ItemData, ItemId, ItemIdGroupKind, Mode, SplitModuleResult};
20pub(crate) use self::graph::{
21    PartId, create_turbopack_part_id_assert, find_turbopack_part_id_in_asserts,
22};
23use crate::{
24    EcmascriptModuleAsset, EcmascriptParsable, analyzer::graph::EvalContext, parse::ParseResult,
25};
26
27pub mod asset;
28pub mod chunk_item;
29mod graph;
30pub mod merge;
31mod optimizations;
32pub mod side_effect_module;
33#[cfg(test)]
34mod tests;
35mod util;
36
37pub(crate) const TURBOPACK_PART_IMPORT_SOURCE: &str = "__TURBOPACK_PART__";
38
39pub struct Analyzer<'a> {
40    g: &'a mut DepGraph,
41    item_ids: &'a Vec<ItemId>,
42    items: &'a mut FxHashMap<ItemId, ItemData>,
43
44    last_side_effects: Vec<ItemId>,
45
46    vars: FxHashMap<Id, VarState>,
47}
48
49#[derive(Debug, Default, Clone)]
50struct VarState {
51    declarator: Option<ItemId>,
52
53    /// The module items that might trigger side effects on that variable.
54    /// We also store if this is a `const` write, so no further change will
55    /// happen to this var.
56    last_writes: Vec<ItemId>,
57    /// The module items that might read that variable.
58    last_reads: Vec<ItemId>,
59
60    last_op: Option<VarOp>,
61}
62
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64enum VarOp {
65    Read,
66    Write,
67}
68
69impl Analyzer<'_> {
70    pub(super) fn analyze(
71        module: &Module,
72        comments: &dyn Comments,
73        unresolved_ctxt: SyntaxContext,
74        top_level_ctxt: SyntaxContext,
75    ) -> (DepGraph, FxHashMap<ItemId, ItemData>) {
76        let mut g = DepGraph::default();
77        let (item_ids, mut items) = g.init(module, comments, unresolved_ctxt, top_level_ctxt);
78
79        let mut analyzer = Analyzer {
80            g: &mut g,
81            item_ids: &item_ids,
82            items: &mut items,
83            last_side_effects: Default::default(),
84            vars: Default::default(),
85        };
86
87        let eventual_ids = analyzer.hoist_vars_and_bindings();
88
89        analyzer.evaluate_immediate(module, &eventual_ids);
90
91        analyzer.evaluate_eventual(module);
92
93        analyzer.handle_exports(module);
94
95        analyzer.handle_explicit_deps();
96
97        (g, items)
98    }
99
100    fn handle_explicit_deps(&mut self) {
101        for item_id in self.item_ids.iter() {
102            if let Some(item) = self.items.get(item_id)
103                && !item.explicit_deps.is_empty()
104            {
105                self.g.add_strong_deps(item_id, item.explicit_deps.iter());
106            }
107        }
108    }
109
110    /// Phase 1: Hoisted Variables and Bindings
111    ///
112    ///
113    /// Returns all (EVENTUAL_READ/WRITE_VARS) in the module.
114    fn hoist_vars_and_bindings(&mut self) -> FxIndexSet<Id> {
115        let mut eventual_ids = FxIndexSet::default();
116
117        for item_id in self.item_ids.iter() {
118            if let Some(item) = self.items.get(item_id) {
119                eventual_ids.extend(item.eventual_read_vars.iter().cloned());
120                eventual_ids.extend(item.eventual_write_vars.iter().cloned());
121
122                if item.is_hoisted && item.side_effects {
123                    self.g
124                        .add_strong_deps(item_id, self.last_side_effects.last());
125
126                    self.last_side_effects.push(item_id.clone());
127                }
128
129                for id in item.var_decls.iter() {
130                    let state = self.vars.entry(id.clone()).or_default();
131
132                    if state.declarator.is_none() {
133                        state.declarator = Some(item_id.clone());
134                    }
135
136                    if item.is_hoisted {
137                        state.last_writes.push(item_id.clone());
138                    } else {
139                        // TODO(WEB-705): Create a fake module item
140                        // state.last_writes.push(item_id.clone());
141                    }
142                }
143            }
144        }
145
146        eventual_ids
147    }
148
149    /// Phase 2: Immediate evaluation
150    fn evaluate_immediate(&mut self, _module: &Module, eventual_ids: &FxIndexSet<Id>) {
151        for item_id in self.item_ids.iter() {
152            if let Some(item) = self.items.get(item_id) {
153                // Ignore HOISTED module items, they have been processed in phase 1 already.
154                if item.is_hoisted {
155                    continue;
156                }
157
158                for id in item.var_decls.iter() {
159                    let state = self.vars.entry(id.clone()).or_default();
160                    if state.declarator.is_none() {
161                        state.declarator = Some(item_id.clone());
162                    }
163                }
164
165                // For each var in READ_VARS:
166                for id in item.read_vars.iter() {
167                    // read (last: read) -> ref last_writes, push last_reads
168                    // read (last: (read +) write) -> ref last_writes, clear last_reads, push
169                    // last_reads
170
171                    // (the writes need to be executed before this read)
172                    let state = self.vars.entry(id.clone()).or_default();
173                    self.g.add_strong_deps(item_id, state.last_writes.iter());
174
175                    if let Some(declarator) = &state.declarator
176                        && declarator != item_id
177                    {
178                        // A read also depends on the declaration.
179                        self.g
180                            .add_strong_deps(item_id, [declarator].iter().copied());
181                    }
182
183                    if state.last_op == Some(VarOp::Write) && !item.write_vars.contains(id) {
184                        state.last_reads.clear();
185                    }
186                }
187
188                // For each var in WRITE_VARS:
189                for id in item.write_vars.iter() {
190                    // Create a weak dependency to all module items listed in
191                    // LAST_READS for that var.
192
193                    // (the reads need to be executed before this write, when
194                    // it’s needed)
195
196                    let state = self.vars.entry(id.clone()).or_default();
197                    self.g.add_weak_deps(item_id, state.last_reads.iter());
198
199                    if let Some(declarator) = &state.declarator
200                        && declarator != item_id
201                    {
202                        // A write also depends on the declaration.
203                        self.g.add_strong_deps(item_id, [declarator]);
204                    }
205
206                    if !item.read_vars.contains(id) {
207                        // write (last: read) -> weak_ref last_reads, clear last_writes, push
208                        // last_writes
209
210                        if state.last_op == Some(VarOp::Read) {
211                            state.last_writes.clear();
212                        } else if state.last_op == Some(VarOp::Write) {
213                            // write (last: (read +) write) -> weak_ref last_reads, push last_writes
214                        }
215                    } else {
216                        // read+write (last: read) -> weak_ref last_reads, ref last_writes, clear
217                        // last_reads, clear last_writes, push last_reads, push last_writes
218
219                        // read+write (last: (read +) write) -> ref last_writes, clear
220                        // last_reads, clear last_writes, push
221                        // last_reads, push last_writes
222                        if state.last_op.is_some() {
223                            state.last_reads.clear();
224                            state.last_writes.clear();
225                        }
226                    }
227                }
228
229                if item.side_effects {
230                    // Create a strong dependency to LAST_SIDE_EFFECT.
231
232                    self.g
233                        .add_strong_deps(item_id, self.last_side_effects.last());
234
235                    // Create weak dependencies to all LAST_WRITES and strong
236                    // dependencies to LAST_READS.
237                    //
238                    // We need to create strong dependencies to LAST_READS because
239                    // prototype-based methods definitions should be executed before
240                    // any usage of those methods, and the usage of those methods are
241                    // flagged as a side effect.
242                    for id in eventual_ids.iter() {
243                        let state = self.vars.entry(id.clone()).or_default();
244
245                        self.g.add_weak_deps(item_id, state.last_writes.iter());
246                        self.g.add_strong_deps(item_id, state.last_reads.iter());
247                    }
248                }
249
250                // For each var in WRITE_VARS:
251                for id in item.write_vars.iter() {
252                    // Add this module item to LAST_WRITES
253
254                    let state = self.vars.entry(id.clone()).or_default();
255                    state.last_writes.push(item_id.clone());
256
257                    // Optimization: Remove each module item to which we
258                    // just created a strong dependency from LAST_WRITES
259
260                    state
261                        .last_writes
262                        .retain(|last_write| !self.g.has_dep(item_id, last_write, true));
263
264                    // Drop all writes which are not reachable from this item.
265                    //
266                    // For
267                    //
268                    // var x = 0
269                    // x = 1
270                    // x = 2
271                    // x += 3
272                    //
273                    // this will drop `x = 1` as not reachable from `x += 3`.
274
275                    state
276                        .last_writes
277                        .retain(|last_write| self.g.has_path_connecting(item_id, last_write));
278                }
279
280                // For each var in READ_VARS:
281                for id in item.read_vars.iter() {
282                    // Add this module item to LAST_READS
283
284                    let state = self.vars.entry(id.clone()).or_default();
285                    state.last_reads.push(item_id.clone());
286
287                    // Optimization: Remove each module item to which we
288                    // have a strong dependency
289
290                    state
291                        .last_reads
292                        .retain(|last_read| !self.g.has_dep(item_id, last_read, true));
293
294                    state.last_op = Some(VarOp::Read);
295                }
296
297                for id in item.write_vars.iter() {
298                    let state = self.vars.entry(id.clone()).or_default();
299                    state.last_op = Some(VarOp::Write);
300                }
301
302                if item.side_effects {
303                    self.last_side_effects.push(item_id.clone());
304                }
305            }
306        }
307    }
308
309    /// Phase 3: Eventual evaluation
310    fn evaluate_eventual(&mut self, _module: &Module) {
311        for item_id in self.item_ids.iter() {
312            if let Some(item) = self.items.get(item_id) {
313                // For each var in EVENTUAL_READ_VARS:
314
315                for id in item.eventual_read_vars.iter() {
316                    // Create a strong dependency to all module items listed in
317                    // LAST_WRITES for that var.
318
319                    let state = self.vars.entry(id.clone()).or_default();
320                    self.g.add_strong_deps(item_id, state.last_writes.iter());
321
322                    if let Some(declarator) = &state.declarator
323                        && declarator != item_id
324                    {
325                        // A read also depends on the declaration.
326                        self.g.add_strong_deps(item_id, [declarator]);
327                    }
328                }
329
330                // For each var in EVENTUAL_WRITE_VARS:
331                for id in item.eventual_write_vars.iter() {
332                    // Create a weak dependency to all module items listed in
333                    // LAST_READS for that var.
334
335                    let state = self.vars.entry(id.clone()).or_default();
336
337                    self.g.add_weak_deps(item_id, state.last_reads.iter());
338
339                    if let Some(declarator) = &state.declarator
340                        && declarator != item_id
341                    {
342                        // A write also depends on the declaration.
343                        self.g.add_strong_deps(item_id, [declarator]);
344                    }
345                }
346
347                // (no state update happens, since this is only triggered by
348                // side effects, which we already handled)
349            }
350        }
351    }
352
353    /// Phase 4: Exports
354    fn handle_exports(&mut self, _module: &Module) {
355        // We use the last side effect as a module evaluation
356        if let Some(last) = self.last_side_effects.last()
357            && let Some(item) = self.items.get_mut(last)
358        {
359            item.is_module_evaluation = true;
360        }
361
362        for item_id in self.item_ids.iter() {
363            if let ItemId::Group(ItemIdGroupKind::Export(local, _)) = item_id {
364                // Create a strong dependency to LAST_WRITES for this var
365
366                let state = self.vars.entry(local.clone()).or_default();
367
368                self.g.add_strong_deps(item_id, state.last_writes.iter());
369            }
370        }
371    }
372}
373
374#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
375pub(crate) enum Key {
376    ModuleEvaluation,
377    Export(RcStr),
378    Exports,
379    StarExports,
380}
381
382/// Converts [ModulePart] to the index.
383async fn get_part_id(result: &SplitResult, part: &ModulePart) -> Result<u32> {
384    // TODO implement ModulePart::Facade
385    let key = match part {
386        ModulePart::Evaluation => Key::ModuleEvaluation,
387        ModulePart::Export(export) => Key::Export(export.clone()),
388        ModulePart::Exports => Key::Exports,
389        ModulePart::Internal(part_id) => return Ok(*part_id),
390        ModulePart::Locals
391        | ModulePart::Facade
392        | ModulePart::RenamedExport { .. }
393        | ModulePart::RenamedNamespace { .. } => {
394            bail!("invalid module part")
395        }
396    };
397
398    let SplitResult::Ok {
399        entrypoints,
400        modules,
401        ..
402    } = &result
403    else {
404        bail!("split failed")
405    };
406
407    if let Some(id) = entrypoints.get(&key) {
408        return Ok(*id);
409    }
410
411    // This is required to handle `export * from 'foo'`
412    if let ModulePart::Export(..) = part
413        && let Some(&v) = entrypoints
414            .get(&Key::StarExports)
415            .or_else(|| entrypoints.get(&Key::Exports))
416    {
417        return Ok(v);
418    }
419
420    let mut dump = String::new();
421
422    for (idx, m) in modules.iter().enumerate() {
423        let ParseResult::Ok { program, .. } = &*m.await? else {
424            bail!("failed to get module")
425        };
426
427        {
428            let code = to_code(&program);
429
430            writeln!(dump, "# Module #{idx}:\n{code}\n\n\n")?;
431        }
432    }
433
434    bail!(
435        "could not find part id for module part {:?} in {:?}\n\nModule dump:\n{dump}",
436        key,
437        entrypoints
438    )
439}
440
441#[turbo_tasks::value(shared, serialization = "none", eq = "manual")]
442pub(crate) enum SplitResult {
443    Ok {
444        asset_ident: ResolvedVc<AssetIdent>,
445
446        /// `u32` is a index to `modules`.
447        #[turbo_tasks(trace_ignore)]
448        entrypoints: FxHashMap<Key, u32>,
449
450        #[turbo_tasks(debug_ignore, trace_ignore)]
451        modules: Vec<ResolvedVc<ParseResult>>,
452
453        #[turbo_tasks(trace_ignore)]
454        deps: FxHashMap<u32, Vec<PartId>>,
455
456        #[turbo_tasks(debug_ignore, trace_ignore)]
457        star_reexports: Vec<ExportAll>,
458    },
459    Failed {
460        parse_result: ResolvedVc<ParseResult>,
461    },
462}
463
464impl PartialEq for SplitResult {
465    fn eq(&self, other: &Self) -> bool {
466        match (self, other) {
467            (Self::Ok { .. }, Self::Ok { .. }) => false,
468            _ => core::mem::discriminant(self) == core::mem::discriminant(other),
469        }
470    }
471}
472
473#[turbo_tasks::function]
474pub(super) async fn split_module(asset: Vc<EcmascriptModuleAsset>) -> Result<Vc<SplitResult>> {
475    let parsed: ResolvedVc<ParseResult> = asset.failsafe_parse().to_resolved().await?;
476    let ident = asset.source().ident().to_resolved().await?;
477    // Do not split already split module
478    if !ident.await?.parts.is_empty() {
479        return Ok(SplitResult::Failed {
480            parse_result: parsed,
481        }
482        .cell());
483    }
484
485    // Turbopack has a bug related to parsing of CJS files where the package.json has
486    // a `"type": "module"` and the file is a CJS file.
487    let name = ident.to_string().await?;
488    if name.ends_with(".cjs") {
489        return Ok(SplitResult::Failed {
490            parse_result: parsed,
491        }
492        .cell());
493    }
494
495    let parse_result = parsed.await?;
496    let source = asset.source().to_resolved().await?;
497
498    match &*parse_result {
499        ParseResult::Ok {
500            program,
501            comments,
502            eval_context,
503            source_map,
504            globals,
505            ..
506        } => {
507            // If the script file is a common js file, we cannot split the module
508            if util::should_skip_tree_shaking(program) {
509                return Ok(SplitResult::Failed {
510                    parse_result: parsed,
511                }
512                .cell());
513            }
514
515            let module = match program {
516                Program::Module(module) => module,
517                Program::Script(..) => unreachable!("CJS is already handled"),
518            };
519
520            // We copy directives like `use client` or `use server` to each module
521            let directives = module
522                .body
523                .iter()
524                .take_while(|item| {
525                    matches!(
526                        item,
527                        ModuleItem::Stmt(Stmt::Expr(ExprStmt {
528                            expr: box Expr::Lit(Lit::Str(..)),
529                            ..
530                        }))
531                    )
532                })
533                .cloned()
534                .collect::<Vec<_>>();
535
536            let (mut dep_graph, items) = GLOBALS.set(globals, || {
537                Analyzer::analyze(
538                    module,
539                    comments,
540                    SyntaxContext::empty().apply_mark(eval_context.unresolved_mark),
541                    SyntaxContext::empty().apply_mark(eval_context.top_level_mark),
542                )
543            });
544
545            dep_graph.handle_weak(Mode::Production);
546
547            let SplitModuleResult {
548                entrypoints,
549                part_deps,
550                modules,
551                star_reexports,
552            } = dep_graph.split_module(&directives, &items);
553
554            assert_ne!(modules.len(), 0, "modules.len() == 0;\nModule: {module:?}",);
555
556            for &v in entrypoints.values() {
557                debug_assert!(
558                    v < modules.len() as u32,
559                    "Invalid entrypoint '{}' while there are only '{}' modules",
560                    v,
561                    modules.len()
562                );
563            }
564            let modules = modules
565                .into_iter()
566                .map(|module| {
567                    let program = Program::Module(module);
568                    let eval_context = EvalContext::new(
569                        Some(&program),
570                        eval_context.unresolved_mark,
571                        eval_context.top_level_mark,
572                        eval_context.force_free_values.clone(),
573                        None,
574                        Some(source),
575                    );
576
577                    ParseResult::resolved_cell(ParseResult::Ok {
578                        program,
579                        globals: globals.clone(),
580                        comments: comments.clone(),
581                        source_map: source_map.clone(),
582                        eval_context,
583                    })
584                })
585                .collect();
586
587            Ok(SplitResult::Ok {
588                asset_ident: ident,
589                entrypoints,
590                deps: part_deps,
591                modules,
592                star_reexports,
593            }
594            .cell())
595        }
596
597        _ => Ok(SplitResult::Failed {
598            parse_result: parsed,
599        }
600        .cell()),
601    }
602}
603
604#[turbo_tasks::function]
605pub(crate) async fn part_of_module(
606    split_data: Vc<SplitResult>,
607    part: ModulePart,
608) -> Result<Vc<ParseResult>> {
609    let split_data = split_data.await?;
610
611    match &*split_data {
612        SplitResult::Ok {
613            asset_ident,
614            modules,
615            entrypoints,
616            deps,
617            star_reexports,
618            ..
619        } => {
620            debug_assert_ne!(modules.len(), 0, "modules.len() == 0");
621
622            if part == ModulePart::Facade {
623                if let ParseResult::Ok {
624                    comments,
625                    eval_context,
626                    globals,
627                    source_map,
628                    ..
629                } = &*modules[0].await?
630                {
631                    let mut module = Module::dummy();
632
633                    let mut export_names = entrypoints
634                        .keys()
635                        .filter_map(|key| {
636                            if let Key::Export(v) = key {
637                                Some(v.clone())
638                            } else {
639                                None
640                            }
641                        })
642                        .collect::<Vec<_>>();
643                    export_names.sort();
644
645                    module
646                        .body
647                        .push(ModuleItem::ModuleDecl(ModuleDecl::Import(ImportDecl {
648                            span: DUMMY_SP,
649                            specifiers: vec![],
650                            src: Box::new(TURBOPACK_PART_IMPORT_SOURCE.into()),
651                            type_only: false,
652                            with: Some(Box::new(create_turbopack_part_id_assert(
653                                PartId::ModuleEvaluation,
654                            ))),
655                            phase: Default::default(),
656                        })));
657
658                    let specifiers = export_names
659                        .into_iter()
660                        .map(|export_name| {
661                            swc_core::ecma::ast::ExportSpecifier::Named(ExportNamedSpecifier {
662                                span: DUMMY_SP,
663                                orig: ModuleExportName::Ident(Ident::new(
664                                    export_name.as_str().into(),
665                                    DUMMY_SP,
666                                    Default::default(),
667                                )),
668                                exported: None,
669                                is_type_only: false,
670                            })
671                        })
672                        .collect::<Vec<_>>();
673
674                    module
675                        .body
676                        .push(ModuleItem::ModuleDecl(ModuleDecl::ExportNamed(
677                            NamedExport {
678                                span: DUMMY_SP,
679                                specifiers,
680                                src: Some(Box::new(TURBOPACK_PART_IMPORT_SOURCE.into())),
681                                type_only: false,
682                                with: Some(Box::new(create_turbopack_part_id_assert(
683                                    PartId::Exports,
684                                ))),
685                            },
686                        )));
687
688                    module.body.extend(star_reexports.iter().map(|export_all| {
689                        ModuleItem::ModuleDecl(ModuleDecl::ExportAll(export_all.clone()))
690                    }));
691
692                    let program = Program::Module(module);
693                    let eval_context = EvalContext::new(
694                        Some(&program),
695                        eval_context.unresolved_mark,
696                        eval_context.top_level_mark,
697                        eval_context.force_free_values.clone(),
698                        None,
699                        None,
700                    );
701
702                    return Ok(ParseResult::Ok {
703                        program,
704                        comments: comments.clone(),
705                        eval_context,
706                        globals: globals.clone(),
707                        source_map: source_map.clone(),
708                    }
709                    .cell());
710                } else {
711                    unreachable!()
712                }
713            }
714
715            let part_id = get_part_id(&split_data, &part).await?;
716
717            if part_id as usize >= modules.len() {
718                bail!(
719                    "part_id is out of range: {part_id} >= {}; asset = {}; entrypoints = \
720                     {entrypoints:?}: part_deps = {deps:?}",
721                    asset_ident.to_string().await?,
722                    modules.len(),
723                );
724            }
725
726            Ok(*modules[part_id as usize])
727        }
728        SplitResult::Failed { parse_result } => Ok(**parse_result),
729    }
730}