Skip to main content

turbopack_ecmascript/tree_shake/
mod.rs

1use std::fmt::Write;
2
3use anyhow::{Result, bail};
4use rustc_hash::FxHashMap;
5use swc_core::{
6    common::{DUMMY_SP, GLOBALS, SyntaxContext, comments::Comments, util::take::Take},
7    ecma::{
8        ast::{
9            ExportAll, ExportNamedSpecifier, Expr, ExprStmt, Id, Ident, ImportDecl, Lit, Module,
10            ModuleDecl, ModuleExportName, ModuleItem, NamedExport, Program, Stmt,
11        },
12        codegen::to_code,
13    },
14};
15use turbo_rcstr::RcStr;
16use turbo_tasks::{FxIndexSet, ResolvedVc, ValueToString, Vc, turbobail};
17use turbopack_core::{ident::AssetIdent, resolve::ModulePart, source::Source};
18
19use self::graph::{DepGraph, ItemData, ItemId, ItemIdGroupKind, Mode, SplitModuleResult};
20pub(crate) use self::graph::{
21    PartId, create_turbopack_part_id_assert, find_turbopack_part_id_in_asserts,
22};
23use crate::{
24    EcmascriptModuleAsset, EcmascriptParsable, analyzer::graph::EvalContext, parse::ParseResult,
25};
26
27mod graph;
28pub mod merge;
29mod optimizations;
30pub mod part;
31pub mod side_effects;
32#[cfg(test)]
33mod tests;
34mod util;
35
36pub(crate) const TURBOPACK_PART_IMPORT_SOURCE: &str = "__TURBOPACK_PART__";
37
38pub struct Analyzer<'a> {
39    g: &'a mut DepGraph,
40    item_ids: &'a Vec<ItemId>,
41    items: &'a mut FxHashMap<ItemId, ItemData>,
42
43    last_side_effects: Vec<ItemId>,
44
45    vars: FxHashMap<Id, VarState>,
46}
47
48#[derive(Debug, Default, Clone)]
49struct VarState {
50    declarator: Option<ItemId>,
51
52    /// The module items that might trigger side effects on that variable.
53    /// We also store if this is a `const` write, so no further change will
54    /// happen to this var.
55    last_writes: Vec<ItemId>,
56    /// The module items that might read that variable.
57    last_reads: Vec<ItemId>,
58
59    last_op: Option<VarOp>,
60}
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq)]
63enum VarOp {
64    Read,
65    Write,
66}
67
68impl Analyzer<'_> {
69    pub(super) fn analyze(
70        module: &Module,
71        comments: &dyn Comments,
72        unresolved_ctxt: SyntaxContext,
73        top_level_ctxt: SyntaxContext,
74    ) -> (DepGraph, FxHashMap<ItemId, ItemData>) {
75        let mut g = DepGraph::default();
76        let (item_ids, mut items) = g.init(module, comments, unresolved_ctxt, top_level_ctxt);
77
78        let mut analyzer = Analyzer {
79            g: &mut g,
80            item_ids: &item_ids,
81            items: &mut items,
82            last_side_effects: Default::default(),
83            vars: Default::default(),
84        };
85
86        let eventual_ids = analyzer.hoist_vars_and_bindings();
87
88        analyzer.evaluate_immediate(module, &eventual_ids);
89
90        analyzer.evaluate_eventual(module);
91
92        analyzer.handle_exports(module);
93
94        analyzer.handle_explicit_deps();
95
96        (g, items)
97    }
98
99    fn handle_explicit_deps(&mut self) {
100        for item_id in self.item_ids.iter() {
101            if let Some(item) = self.items.get(item_id)
102                && !item.explicit_deps.is_empty()
103            {
104                self.g.add_strong_deps(item_id, item.explicit_deps.iter());
105            }
106        }
107    }
108
109    /// Phase 1: Hoisted Variables and Bindings
110    ///
111    ///
112    /// Returns all (EVENTUAL_READ/WRITE_VARS) in the module.
113    fn hoist_vars_and_bindings(&mut self) -> FxIndexSet<Id> {
114        let mut eventual_ids = FxIndexSet::default();
115
116        for item_id in self.item_ids.iter() {
117            if let Some(item) = self.items.get(item_id) {
118                eventual_ids.extend(item.eventual_read_vars.iter().cloned());
119                eventual_ids.extend(item.eventual_write_vars.iter().cloned());
120
121                if item.is_hoisted && item.side_effects {
122                    self.g
123                        .add_strong_deps(item_id, self.last_side_effects.last());
124
125                    self.last_side_effects.push(item_id.clone());
126                }
127
128                for id in item.var_decls.iter() {
129                    let state = self.vars.entry(id.clone()).or_default();
130
131                    if state.declarator.is_none() {
132                        state.declarator = Some(item_id.clone());
133                    }
134
135                    if item.is_hoisted {
136                        state.last_writes.push(item_id.clone());
137                    } else {
138                        // TODO(WEB-705): Create a fake module item
139                        // state.last_writes.push(item_id.clone());
140                    }
141                }
142            }
143        }
144
145        eventual_ids
146    }
147
148    /// Phase 2: Immediate evaluation
149    fn evaluate_immediate(&mut self, _module: &Module, eventual_ids: &FxIndexSet<Id>) {
150        for item_id in self.item_ids.iter() {
151            if let Some(item) = self.items.get(item_id) {
152                // Ignore HOISTED module items, they have been processed in phase 1 already.
153                if item.is_hoisted {
154                    continue;
155                }
156
157                for id in item.var_decls.iter() {
158                    let state = self.vars.entry(id.clone()).or_default();
159                    if state.declarator.is_none() {
160                        state.declarator = Some(item_id.clone());
161                    }
162                }
163
164                // For each var in READ_VARS:
165                for id in item.read_vars.iter() {
166                    // read (last: read) -> ref last_writes, push last_reads
167                    // read (last: (read +) write) -> ref last_writes, clear last_reads, push
168                    // last_reads
169
170                    // (the writes need to be executed before this read)
171                    let state = self.vars.entry(id.clone()).or_default();
172                    self.g.add_strong_deps(item_id, state.last_writes.iter());
173
174                    if let Some(declarator) = &state.declarator
175                        && declarator != item_id
176                    {
177                        // A read also depends on the declaration.
178                        self.g
179                            .add_strong_deps(item_id, [declarator].iter().copied());
180                    }
181
182                    if state.last_op == Some(VarOp::Write) && !item.write_vars.contains(id) {
183                        state.last_reads.clear();
184                    }
185                }
186
187                // For each var in WRITE_VARS:
188                for id in item.write_vars.iter() {
189                    // Create a weak dependency to all module items listed in
190                    // LAST_READS for that var.
191
192                    // (the reads need to be executed before this write, when
193                    // it’s needed)
194
195                    let state = self.vars.entry(id.clone()).or_default();
196                    self.g.add_weak_deps(item_id, state.last_reads.iter());
197
198                    if let Some(declarator) = &state.declarator
199                        && declarator != item_id
200                    {
201                        // A write also depends on the declaration.
202                        self.g.add_strong_deps(item_id, [declarator]);
203                    }
204
205                    if !item.read_vars.contains(id) {
206                        // write (last: read) -> weak_ref last_reads, clear last_writes, push
207                        // last_writes
208
209                        if state.last_op == Some(VarOp::Read) {
210                            state.last_writes.clear();
211                        } else if state.last_op == Some(VarOp::Write) {
212                            // write (last: (read +) write) -> weak_ref last_reads, push last_writes
213                        }
214                    } else {
215                        // read+write (last: read) -> weak_ref last_reads, ref last_writes, clear
216                        // last_reads, clear last_writes, push last_reads, push last_writes
217
218                        // read+write (last: (read +) write) -> ref last_writes, clear
219                        // last_reads, clear last_writes, push
220                        // last_reads, push last_writes
221                        if state.last_op.is_some() {
222                            state.last_reads.clear();
223                            state.last_writes.clear();
224                        }
225                    }
226                }
227
228                if item.side_effects {
229                    // Create a strong dependency to LAST_SIDE_EFFECT.
230
231                    self.g
232                        .add_strong_deps(item_id, self.last_side_effects.last());
233
234                    // Create weak dependencies to all LAST_WRITES and strong
235                    // dependencies to LAST_READS.
236                    //
237                    // We need to create strong dependencies to LAST_READS because
238                    // prototype-based methods definitions should be executed before
239                    // any usage of those methods, and the usage of those methods are
240                    // flagged as a side effect.
241                    for id in eventual_ids.iter() {
242                        let state = self.vars.entry(id.clone()).or_default();
243
244                        self.g.add_weak_deps(item_id, state.last_writes.iter());
245                        self.g.add_strong_deps(item_id, state.last_reads.iter());
246                    }
247                }
248
249                // For each var in WRITE_VARS:
250                for id in item.write_vars.iter() {
251                    // Add this module item to LAST_WRITES
252
253                    let state = self.vars.entry(id.clone()).or_default();
254                    state.last_writes.push(item_id.clone());
255
256                    // Optimization: Remove each module item to which we
257                    // just created a strong dependency from LAST_WRITES
258
259                    state
260                        .last_writes
261                        .retain(|last_write| !self.g.has_dep(item_id, last_write, true));
262
263                    // Drop all writes which are not reachable from this item.
264                    //
265                    // For
266                    //
267                    // var x = 0
268                    // x = 1
269                    // x = 2
270                    // x += 3
271                    //
272                    // this will drop `x = 1` as not reachable from `x += 3`.
273
274                    state
275                        .last_writes
276                        .retain(|last_write| self.g.has_path_connecting(item_id, last_write));
277                }
278
279                // For each var in READ_VARS:
280                for id in item.read_vars.iter() {
281                    // Add this module item to LAST_READS
282
283                    let state = self.vars.entry(id.clone()).or_default();
284                    state.last_reads.push(item_id.clone());
285
286                    // Optimization: Remove each module item to which we
287                    // have a strong dependency
288
289                    state
290                        .last_reads
291                        .retain(|last_read| !self.g.has_dep(item_id, last_read, true));
292
293                    state.last_op = Some(VarOp::Read);
294                }
295
296                for id in item.write_vars.iter() {
297                    let state = self.vars.entry(id.clone()).or_default();
298                    state.last_op = Some(VarOp::Write);
299                }
300
301                if item.side_effects {
302                    self.last_side_effects.push(item_id.clone());
303                }
304            }
305        }
306    }
307
308    /// Phase 3: Eventual evaluation
309    fn evaluate_eventual(&mut self, _module: &Module) {
310        for item_id in self.item_ids.iter() {
311            if let Some(item) = self.items.get(item_id) {
312                // For each var in EVENTUAL_READ_VARS:
313
314                for id in item.eventual_read_vars.iter() {
315                    // Create a strong dependency to all module items listed in
316                    // LAST_WRITES for that var.
317
318                    let state = self.vars.entry(id.clone()).or_default();
319                    self.g.add_strong_deps(item_id, state.last_writes.iter());
320
321                    if let Some(declarator) = &state.declarator
322                        && declarator != item_id
323                    {
324                        // A read also depends on the declaration.
325                        self.g.add_strong_deps(item_id, [declarator]);
326                    }
327                }
328
329                // For each var in EVENTUAL_WRITE_VARS:
330                for id in item.eventual_write_vars.iter() {
331                    // Create a weak dependency to all module items listed in
332                    // LAST_READS for that var.
333
334                    let state = self.vars.entry(id.clone()).or_default();
335
336                    self.g.add_weak_deps(item_id, state.last_reads.iter());
337
338                    if let Some(declarator) = &state.declarator
339                        && declarator != item_id
340                    {
341                        // A write also depends on the declaration.
342                        self.g.add_strong_deps(item_id, [declarator]);
343                    }
344                }
345
346                // (no state update happens, since this is only triggered by
347                // side effects, which we already handled)
348            }
349        }
350    }
351
352    /// Phase 4: Exports
353    fn handle_exports(&mut self, _module: &Module) {
354        // We use the last side effect as a module evaluation
355        if let Some(last) = self.last_side_effects.last()
356            && let Some(item) = self.items.get_mut(last)
357        {
358            item.is_module_evaluation = true;
359        }
360
361        for item_id in self.item_ids.iter() {
362            if let ItemId::Group(ItemIdGroupKind::Export(local, _)) = item_id {
363                // Create a strong dependency to LAST_WRITES for this var
364
365                let state = self.vars.entry(local.clone()).or_default();
366
367                self.g.add_strong_deps(item_id, state.last_writes.iter());
368            }
369        }
370    }
371}
372
373#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
374pub(crate) enum Key {
375    ModuleEvaluation,
376    Export(RcStr),
377    Exports,
378    StarExports,
379}
380
381/// Converts [ModulePart] to the index.
382async fn get_part_id(result: &SplitResult, part: &ModulePart) -> Result<u32> {
383    // TODO implement ModulePart::Facade
384    let key = match part {
385        ModulePart::Evaluation => Key::ModuleEvaluation,
386        ModulePart::Export(export) => Key::Export(export.clone()),
387        ModulePart::Exports => Key::Exports,
388        ModulePart::Internal(part_id) => return Ok(*part_id),
389        ModulePart::Locals
390        | ModulePart::Facade
391        | ModulePart::RenamedExport { .. }
392        | ModulePart::RenamedNamespace { .. } => {
393            bail!("invalid module part")
394        }
395    };
396
397    let SplitResult::Ok {
398        entrypoints,
399        modules,
400        ..
401    } = &result
402    else {
403        bail!("split failed")
404    };
405
406    if let Some(id) = entrypoints.get(&key) {
407        return Ok(*id);
408    }
409
410    // This is required to handle `export * from 'foo'`
411    if let ModulePart::Export(..) = part
412        && let Some(&v) = entrypoints
413            .get(&Key::StarExports)
414            .or_else(|| entrypoints.get(&Key::Exports))
415    {
416        return Ok(v);
417    }
418
419    let mut dump = String::new();
420
421    for (idx, m) in modules.iter().enumerate() {
422        let ParseResult::Ok { program, .. } = &*m.await? else {
423            bail!("failed to get module")
424        };
425
426        {
427            let code = to_code(&program);
428
429            writeln!(dump, "# Module #{idx}:\n{code}\n\n\n")?;
430        }
431    }
432
433    bail!(
434        "could not find part id for module part {:?} in {:?}\n\nModule dump:\n{dump}",
435        key,
436        entrypoints
437    )
438}
439
440#[turbo_tasks::value(shared, serialization = "none", eq = "manual")]
441pub(crate) enum SplitResult {
442    Ok {
443        asset_ident: ResolvedVc<AssetIdent>,
444
445        /// `u32` is a index to `modules`.
446        #[turbo_tasks(trace_ignore)]
447        entrypoints: FxHashMap<Key, u32>,
448
449        #[turbo_tasks(debug_ignore, trace_ignore)]
450        modules: Vec<ResolvedVc<ParseResult>>,
451
452        #[turbo_tasks(trace_ignore)]
453        deps: FxHashMap<u32, Vec<PartId>>,
454
455        #[turbo_tasks(debug_ignore, trace_ignore)]
456        star_reexports: Vec<ExportAll>,
457    },
458    Failed {
459        parse_result: ResolvedVc<ParseResult>,
460    },
461}
462
463impl PartialEq for SplitResult {
464    fn eq(&self, other: &Self) -> bool {
465        match (self, other) {
466            (Self::Ok { .. }, Self::Ok { .. }) => false,
467            _ => core::mem::discriminant(self) == core::mem::discriminant(other),
468        }
469    }
470}
471
472#[turbo_tasks::function]
473pub(super) async fn split_module(asset: Vc<EcmascriptModuleAsset>) -> Result<Vc<SplitResult>> {
474    let parsed: ResolvedVc<ParseResult> = asset.failsafe_parse().to_resolved().await?;
475    let ident = asset.source().ident().to_resolved().await?;
476    // Do not split already split module
477    if !ident.await?.parts.is_empty() {
478        return Ok(SplitResult::Failed {
479            parse_result: parsed,
480        }
481        .cell());
482    }
483
484    // Turbopack has a bug related to parsing of CJS files where the package.json has
485    // a `"type": "module"` and the file is a CJS file.
486    let name = ident.to_string().await?;
487    if name.ends_with(".cjs") {
488        return Ok(SplitResult::Failed {
489            parse_result: parsed,
490        }
491        .cell());
492    }
493
494    let parse_result = parsed.await?;
495    let source = asset.source().to_resolved().await?;
496
497    match &*parse_result {
498        ParseResult::Ok {
499            program,
500            comments,
501            eval_context,
502            source_map,
503            globals,
504            ..
505        } => {
506            // If the script file is a common js file, we cannot split the module
507            if util::should_skip_tree_shaking(program) {
508                return Ok(SplitResult::Failed {
509                    parse_result: parsed,
510                }
511                .cell());
512            }
513
514            let module = match program {
515                Program::Module(module) => module,
516                Program::Script(..) => unreachable!("CJS is already handled"),
517            };
518
519            // We copy directives like `use client` or `use server` to each module
520            let directives = module
521                .body
522                .iter()
523                .take_while(|item| {
524                    matches!(
525                        item,
526                        ModuleItem::Stmt(Stmt::Expr(ExprStmt {
527                            expr: box Expr::Lit(Lit::Str(..)),
528                            ..
529                        }))
530                    )
531                })
532                .cloned()
533                .collect::<Vec<_>>();
534
535            let (mut dep_graph, items) = GLOBALS.set(globals, || {
536                Analyzer::analyze(
537                    module,
538                    comments,
539                    SyntaxContext::empty().apply_mark(eval_context.unresolved_mark),
540                    SyntaxContext::empty().apply_mark(eval_context.top_level_mark),
541                )
542            });
543
544            dep_graph.handle_weak(Mode::Production);
545
546            let SplitModuleResult {
547                entrypoints,
548                part_deps,
549                modules,
550                star_reexports,
551            } = dep_graph.split_module(&directives, &items);
552
553            assert_ne!(modules.len(), 0, "modules.len() == 0;\nModule: {module:?}",);
554
555            for &v in entrypoints.values() {
556                debug_assert!(
557                    v < modules.len() as u32,
558                    "Invalid entrypoint '{}' while there are only '{}' modules",
559                    v,
560                    modules.len()
561                );
562            }
563            let modules = modules
564                .into_iter()
565                .map(|module| {
566                    let program = Program::Module(module);
567                    let eval_context = EvalContext::new(
568                        Some(&program),
569                        eval_context.unresolved_mark,
570                        eval_context.top_level_mark,
571                        eval_context.force_free_values.clone(),
572                        None,
573                        Some(source),
574                    );
575
576                    ParseResult::resolved_cell(ParseResult::Ok {
577                        program,
578                        globals: globals.clone(),
579                        comments: comments.clone(),
580                        source_map: source_map.clone(),
581                        eval_context,
582                        source_mapping_url: None,
583                    })
584                })
585                .collect();
586
587            Ok(SplitResult::Ok {
588                asset_ident: ident,
589                entrypoints,
590                deps: part_deps,
591                modules,
592                star_reexports,
593            }
594            .cell())
595        }
596
597        _ => Ok(SplitResult::Failed {
598            parse_result: parsed,
599        }
600        .cell()),
601    }
602}
603
604#[turbo_tasks::function]
605pub(crate) async fn part_of_module(
606    split_data: Vc<SplitResult>,
607    part: ModulePart,
608) -> Result<Vc<ParseResult>> {
609    let split_data = split_data.await?;
610
611    match &*split_data {
612        SplitResult::Ok {
613            asset_ident,
614            modules,
615            entrypoints,
616            deps,
617            star_reexports,
618            ..
619        } => {
620            debug_assert_ne!(modules.len(), 0, "modules.len() == 0");
621
622            if part == ModulePart::Facade {
623                if let ParseResult::Ok {
624                    comments,
625                    eval_context,
626                    globals,
627                    source_map,
628                    ..
629                } = &*modules[0].await?
630                {
631                    let mut module = Module::dummy();
632
633                    let mut export_names = entrypoints
634                        .keys()
635                        .filter_map(|key| {
636                            if let Key::Export(v) = key {
637                                Some(v.clone())
638                            } else {
639                                None
640                            }
641                        })
642                        .collect::<Vec<_>>();
643                    export_names.sort();
644
645                    module
646                        .body
647                        .push(ModuleItem::ModuleDecl(ModuleDecl::Import(ImportDecl {
648                            span: DUMMY_SP,
649                            specifiers: vec![],
650                            src: Box::new(TURBOPACK_PART_IMPORT_SOURCE.into()),
651                            type_only: false,
652                            with: Some(Box::new(create_turbopack_part_id_assert(
653                                PartId::ModuleEvaluation,
654                            ))),
655                            phase: Default::default(),
656                        })));
657
658                    let specifiers = export_names
659                        .into_iter()
660                        .map(|export_name| {
661                            swc_core::ecma::ast::ExportSpecifier::Named(ExportNamedSpecifier {
662                                span: DUMMY_SP,
663                                orig: ModuleExportName::Ident(Ident::new(
664                                    export_name.as_str().into(),
665                                    DUMMY_SP,
666                                    Default::default(),
667                                )),
668                                exported: None,
669                                is_type_only: false,
670                            })
671                        })
672                        .collect::<Vec<_>>();
673
674                    module
675                        .body
676                        .push(ModuleItem::ModuleDecl(ModuleDecl::ExportNamed(
677                            NamedExport {
678                                span: DUMMY_SP,
679                                specifiers,
680                                src: Some(Box::new(TURBOPACK_PART_IMPORT_SOURCE.into())),
681                                type_only: false,
682                                with: Some(Box::new(create_turbopack_part_id_assert(
683                                    PartId::Exports,
684                                ))),
685                            },
686                        )));
687
688                    module.body.extend(star_reexports.iter().map(|export_all| {
689                        ModuleItem::ModuleDecl(ModuleDecl::ExportAll(export_all.clone()))
690                    }));
691
692                    let program = Program::Module(module);
693                    let eval_context = EvalContext::new(
694                        Some(&program),
695                        eval_context.unresolved_mark,
696                        eval_context.top_level_mark,
697                        eval_context.force_free_values.clone(),
698                        None,
699                        None,
700                    );
701
702                    return Ok(ParseResult::Ok {
703                        program,
704                        comments: comments.clone(),
705                        eval_context,
706                        globals: globals.clone(),
707                        source_map: source_map.clone(),
708                        source_mapping_url: None,
709                    }
710                    .cell());
711                } else {
712                    unreachable!()
713                }
714            }
715
716            let part_id = get_part_id(&split_data, &part).await?;
717
718            if part_id as usize >= modules.len() {
719                turbobail!(
720                    "part_id is out of range: {part_id} >= {}; asset = {}; entrypoints = \
721                     {entrypoints:?}: part_deps = {deps:?}",
722                    modules.len(),
723                    *asset_ident
724                );
725            }
726
727            Ok(*modules[part_id as usize])
728        }
729        SplitResult::Failed { parse_result } => Ok(**parse_result),
730    }
731}