Skip to main content

turbopack_core/chunk/
mod.rs

1pub mod availability_info;
2pub mod available_modules;
3pub mod chunk_group;
4pub mod chunk_id_strategy;
5pub(crate) mod chunk_item_batch;
6pub mod chunking;
7pub(crate) mod chunking_context;
8pub(crate) mod data;
9pub(crate) mod evaluate;
10
11use std::fmt::Display;
12
13use anyhow::Result;
14use auto_hash_map::AutoSet;
15use bincode::{Decode, Encode};
16use serde::{Deserialize, Serialize};
17use turbo_rcstr::RcStr;
18use turbo_tasks::{
19    FxIndexSet, NonLocalValue, ResolvedVc, TaskInput, Upcast, ValueToString, Vc,
20    debug::ValueDebugFormat, trace::TraceRawVcs,
21};
22use turbo_tasks_hash::DeterministicHash;
23
24pub use crate::chunk::{
25    chunk_item_batch::{
26        ChunkItemBatchGroup, ChunkItemBatchWithAsyncModuleInfo,
27        ChunkItemOrBatchWithAsyncModuleInfo, batch_info,
28    },
29    chunking_context::{
30        AssetSuffix, ChunkGroupResult, ChunkGroupType, ChunkingConfig, ChunkingConfigs,
31        ChunkingContext, ChunkingContextExt, EntryChunkGroupResult, MangleType, MinifyType,
32        SourceMapSourceType, SourceMapsType, UnusedReferences, UrlBehavior,
33    },
34    data::{ChunkData, ChunkDataOption, ChunksData},
35    evaluate::{EvaluatableAsset, EvaluatableAssetExt, EvaluatableAssets},
36};
37use crate::{
38    asset::Asset,
39    chunk::availability_info::AvailabilityInfo,
40    ident::AssetIdent,
41    module::Module,
42    module_graph::{
43        ModuleGraph,
44        module_batch::{ChunkableModuleOrBatch, ModuleBatchGroup},
45    },
46    output::{OutputAssets, OutputAssetsReference},
47};
48
49#[derive(
50    Debug,
51    TaskInput,
52    Clone,
53    Copy,
54    PartialEq,
55    Eq,
56    Hash,
57    TraceRawVcs,
58    DeterministicHash,
59    NonLocalValue,
60    Encode,
61    Decode,
62)]
63pub enum ContentHashing {
64    /// Direct content hashing: Embeds the chunk content hash directly into the referencing chunk.
65    /// Benefit: No hash manifest needed.
66    /// Downside: Causes cascading hash invalidation.
67    Direct {
68        /// The length of the content hash in base40 chars. Anything lower than 7 is not
69        /// recommended due to the high risk of collisions.
70        length: u8,
71    },
72}
73
74/// A module id, which can be a number or string
75#[turbo_tasks::value(shared, operation)]
76#[derive(Debug, Clone, Hash, Ord, PartialOrd, DeterministicHash, Serialize, ValueToString)]
77#[serde(untagged)]
78pub enum ModuleId {
79    Number(u64),
80    String(RcStr),
81}
82
83impl Display for ModuleId {
84    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
85        match self {
86            ModuleId::Number(i) => write!(f, "{i}"),
87            ModuleId::String(s) => write!(f, "{s}"),
88        }
89    }
90}
91
92impl ModuleId {
93    pub fn parse(id: &str) -> Result<ModuleId> {
94        Ok(match id.parse::<u64>() {
95            Ok(i) => ModuleId::Number(i),
96            Err(_) => ModuleId::String(id.into()),
97        })
98    }
99}
100
101/// A list of module ids.
102#[turbo_tasks::value(transparent, shared)]
103pub struct ModuleIds(Vec<ModuleId>);
104
105/// A [Module] that can be converted into a [ChunkItem].
106#[turbo_tasks::value_trait]
107pub trait ChunkableModule: Module {
108    #[turbo_tasks::function]
109    fn as_chunk_item(
110        self: Vc<Self>,
111        module_graph: Vc<ModuleGraph>,
112        chunking_context: Vc<Box<dyn ChunkingContext>>,
113    ) -> Vc<Box<dyn ChunkItem>>;
114}
115
116#[turbo_tasks::value(transparent)]
117pub struct ChunkableModules(Vec<ResolvedVc<Box<dyn ChunkableModule>>>);
118
119#[turbo_tasks::value_impl]
120impl ChunkableModules {
121    #[turbo_tasks::function]
122    pub fn interned(modules: Vec<ResolvedVc<Box<dyn ChunkableModule>>>) -> Vc<Self> {
123        Vc::cell(modules)
124    }
125}
126
127/// A [Module] that can be merged with other [Module]s (to perform scope hoisting)
128// TODO currently this is only used for ecmascript modules, and with the current API cannot be used
129// with other module types (as a MergeableModule cannot prevent itself from being merged with other
130// module types)
131#[turbo_tasks::value_trait]
132pub trait MergeableModule: Module {
133    /// Even though MergeableModule is implemented, this allows a dynamic condition to determine
134    /// mergeability
135    #[turbo_tasks::function]
136    fn is_mergeable(self: Vc<Self>) -> Vc<bool> {
137        Vc::cell(true)
138    }
139
140    /// Create a new module representing the merged content of the given `modules`.
141    ///
142    /// Group entry points are not referenced by any other module in the group. This list is needed
143    /// because the merged module is created by recursively inlining modules when they are imported,
144    /// but this process has to start somewhere (= with these entry points).
145    #[turbo_tasks::function]
146    fn merge(
147        self: Vc<Self>,
148        modules: Vc<MergeableModulesExposed>,
149        entry_points: Vc<MergeableModules>,
150    ) -> Vc<Box<dyn ChunkableModule>>;
151}
152#[turbo_tasks::value(transparent)]
153pub struct MergeableModules(Vec<ResolvedVc<Box<dyn MergeableModule>>>);
154
155#[turbo_tasks::value_impl]
156impl MergeableModules {
157    #[turbo_tasks::function]
158    pub fn interned(modules: Vec<ResolvedVc<Box<dyn MergeableModule>>>) -> Vc<Self> {
159        Vc::cell(modules)
160    }
161}
162
163/// Whether a given module needs to be exposed (depending on how it is imported by other modules)
164#[derive(
165    Copy, Clone, Debug, PartialEq, Eq, TraceRawVcs, NonLocalValue, TaskInput, Hash, Encode, Decode,
166)]
167pub enum MergeableModuleExposure {
168    // This module is only used from within the current group, and only individual exports are
169    // used (and no namespace object is required).
170    None,
171    // This module is only used from within the current group, and but the namespace object is
172    // needed.
173    Internal,
174    // The exports of this module are read from outside this group (necessitating a namespace
175    // object anyway).
176    External,
177}
178
179#[turbo_tasks::value(transparent)]
180pub struct MergeableModulesExposed(
181    Vec<(
182        ResolvedVc<Box<dyn MergeableModule>>,
183        MergeableModuleExposure,
184    )>,
185);
186
187#[turbo_tasks::value_impl]
188impl MergeableModulesExposed {
189    #[turbo_tasks::function]
190    pub fn interned(
191        modules: Vec<(
192            ResolvedVc<Box<dyn MergeableModule>>,
193            MergeableModuleExposure,
194        )>,
195    ) -> Vc<Self> {
196        Vc::cell(modules)
197    }
198}
199
200#[turbo_tasks::value(transparent)]
201pub struct Chunks(Vec<ResolvedVc<Box<dyn Chunk>>>);
202
203#[turbo_tasks::value_impl]
204impl Chunks {
205    #[turbo_tasks::function]
206    pub fn empty() -> Vc<Self> {
207        Vc::cell(vec![])
208    }
209}
210
211/// Groups chunk items together into something that will become an [`OutputAsset`]. It usually
212/// contains multiple chunk items.
213///
214/// [`OutputAsset`]: crate::output::OutputAsset
215//
216// TODO: This could be simplified to and merged with OutputChunk
217#[turbo_tasks::value_trait]
218pub trait Chunk: OutputAssetsReference {
219    #[turbo_tasks::function]
220    fn ident(self: Vc<Self>) -> Vc<AssetIdent>;
221
222    #[turbo_tasks::function]
223    fn chunking_context(self: Vc<Self>) -> Vc<Box<dyn ChunkingContext>>;
224
225    #[turbo_tasks::function]
226    fn chunk_items(self: Vc<Self>) -> Vc<ChunkItems> {
227        ChunkItems(vec![]).cell()
228    }
229}
230
231/// Aggregated information about a chunk content that can be used by the runtime
232/// code to optimize chunk loading.
233#[turbo_tasks::value(shared)]
234#[derive(Default)]
235pub struct OutputChunkRuntimeInfo {
236    pub included_ids: Option<ResolvedVc<ModuleIds>>,
237    pub excluded_ids: Option<ResolvedVc<ModuleIds>>,
238    /// List of paths of chunks containing individual modules that are part of
239    /// this chunk. This is useful for selectively loading modules from a chunk
240    /// without loading the whole chunk.
241    pub module_chunks: Option<ResolvedVc<OutputAssets>>,
242    pub placeholder_for_future_extensions: (),
243}
244
245#[turbo_tasks::value_impl]
246impl OutputChunkRuntimeInfo {
247    #[turbo_tasks::function]
248    pub fn empty() -> Vc<Self> {
249        Self::default().cell()
250    }
251}
252
253#[turbo_tasks::value_trait]
254pub trait OutputChunk: Asset {
255    #[turbo_tasks::function]
256    fn runtime_info(self: Vc<Self>) -> Vc<OutputChunkRuntimeInfo>;
257}
258
259/// Specifies how a chunk interacts with other chunks when building a chunk
260/// group
261#[derive(
262    Debug,
263    Clone,
264    Hash,
265    TraceRawVcs,
266    Serialize,
267    Deserialize,
268    Eq,
269    PartialEq,
270    ValueDebugFormat,
271    NonLocalValue,
272    Encode,
273    Decode,
274)]
275pub enum ChunkingType {
276    /// The referenced module is placed in the same chunk group and is loaded in parallel.
277    Parallel {
278        /// Whether the parent module becomes an async module when the referenced module is async.
279        /// This should happen for e.g. ESM imports, but not for CommonJS requires.
280        inherit_async: bool,
281        /// Whether the referenced module is executed always immediately before the parent module
282        /// (corresponding to ESM import semantics).
283        hoisted: bool,
284    },
285    /// An async loader is placed into the referencing chunk and loads the
286    /// separate chunk group in which the module is placed.
287    Async,
288    /// Create a new chunk group in a separate context, merging references with the same tag into a
289    /// single chunk group. It does not inherit the available modules from the parent.
290    // TODO this is currently skipped in chunking
291    Isolated {
292        _ty: ChunkGroupType,
293        merge_tag: Option<RcStr>,
294    },
295    /// Create a new chunk group in a separate context, merging references with the same tag into a
296    /// single chunk group. It provides available modules to the current chunk group. It's assumed
297    /// to be loaded before the current chunk group.
298    Shared {
299        inherit_async: bool,
300        merge_tag: Option<RcStr>,
301    },
302    // Module not placed in chunk group, but its references are still followed.
303    Traced,
304}
305
306impl Display for ChunkingType {
307    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
308        match self {
309            ChunkingType::Parallel {
310                inherit_async,
311                hoisted,
312            } => {
313                write!(
314                    f,
315                    "Parallel(inherit_async: {inherit_async}, hoisted: {hoisted})",
316                )
317            }
318            ChunkingType::Async => write!(f, "Async"),
319            ChunkingType::Isolated {
320                _ty,
321                merge_tag: Some(merge_tag),
322            } => {
323                write!(f, "Isolated(merge_tag: {merge_tag})")
324            }
325            ChunkingType::Isolated {
326                _ty,
327                merge_tag: None,
328            } => {
329                write!(f, "Isolated")
330            }
331            ChunkingType::Shared {
332                inherit_async,
333                merge_tag: Some(merge_tag),
334            } => {
335                write!(
336                    f,
337                    "Shared(inherit_async: {inherit_async}, merge_tag: {merge_tag})"
338                )
339            }
340            ChunkingType::Shared {
341                inherit_async,
342                merge_tag: None,
343            } => {
344                write!(f, "Shared(inherit_async: {inherit_async})")
345            }
346            ChunkingType::Traced => write!(f, "Traced"),
347        }
348    }
349}
350
351impl ChunkingType {
352    pub fn is_inherit_async(&self) -> bool {
353        matches!(
354            self,
355            ChunkingType::Parallel {
356                inherit_async: true,
357                ..
358            } | ChunkingType::Shared {
359                inherit_async: true,
360                ..
361            }
362        )
363    }
364
365    pub fn is_parallel(&self) -> bool {
366        matches!(self, ChunkingType::Parallel { .. })
367    }
368
369    pub fn is_merged(&self) -> bool {
370        matches!(
371            self,
372            ChunkingType::Isolated {
373                merge_tag: Some(_),
374                ..
375            } | ChunkingType::Shared {
376                merge_tag: Some(_),
377                ..
378            }
379        )
380    }
381
382    pub fn without_inherit_async(&self) -> Self {
383        match self {
384            ChunkingType::Parallel { hoisted, .. } => ChunkingType::Parallel {
385                hoisted: *hoisted,
386                inherit_async: false,
387            },
388            ChunkingType::Async => ChunkingType::Async,
389            ChunkingType::Isolated { _ty, merge_tag } => ChunkingType::Isolated {
390                _ty: *_ty,
391                merge_tag: merge_tag.clone(),
392            },
393            ChunkingType::Shared {
394                inherit_async: _,
395                merge_tag,
396            } => ChunkingType::Shared {
397                inherit_async: false,
398                merge_tag: merge_tag.clone(),
399            },
400            ChunkingType::Traced => ChunkingType::Traced,
401        }
402    }
403}
404
405pub struct ChunkGroupContent {
406    pub chunkable_items: Vec<ChunkableModuleOrBatch>,
407    pub batch_groups: Vec<ResolvedVc<ModuleBatchGroup>>,
408    pub async_modules: FxIndexSet<ResolvedVc<Box<dyn ChunkableModule>>>,
409    pub traced_modules: FxIndexSet<ResolvedVc<Box<dyn Module>>>,
410    pub availability_info: AvailabilityInfo,
411}
412
413#[turbo_tasks::value_trait]
414pub trait ChunkItem: OutputAssetsReference {
415    /// The [AssetIdent] of the [Module] that this [ChunkItem] was created from.
416    /// For most chunk types this must uniquely identify the chunk item at
417    /// runtime as it's the source of the module id used at runtime.
418    #[turbo_tasks::function]
419    fn asset_ident(self: Vc<Self>) -> Vc<AssetIdent>;
420
421    /// A [AssetIdent] that uniquely identifies the content of this [ChunkItem].
422    /// It is usually identical to [ChunkItem::asset_ident] but can be
423    /// different when the chunk item content depends on available modules e. g.
424    /// for chunk loaders.
425    #[turbo_tasks::function]
426    fn content_ident(self: Vc<Self>) -> Vc<AssetIdent> {
427        self.asset_ident()
428    }
429
430    /// The type of chunk this item should be assembled into.
431    #[turbo_tasks::function]
432    fn ty(self: Vc<Self>) -> Vc<Box<dyn ChunkType>>;
433
434    /// A temporary method to retrieve the module associated with this
435    /// ChunkItem. TODO: Remove this as part of the chunk refactoring.
436    #[turbo_tasks::function]
437    fn module(self: Vc<Self>) -> Vc<Box<dyn Module>>;
438
439    #[turbo_tasks::function]
440    fn chunking_context(self: Vc<Self>) -> Vc<Box<dyn ChunkingContext>>;
441}
442
443#[turbo_tasks::value_trait]
444pub trait ChunkType: ValueToString {
445    /// Whether the source (reference) order of items needs to be retained during chunking.
446    #[turbo_tasks::function]
447    fn is_style(self: Vc<Self>) -> Vc<bool>;
448
449    /// Create a new chunk for the given chunk items
450    #[turbo_tasks::function]
451    fn chunk(
452        &self,
453        chunking_context: Vc<Box<dyn ChunkingContext>>,
454        chunk_items: Vec<ChunkItemOrBatchWithAsyncModuleInfo>,
455        batch_groups: Vec<ResolvedVc<ChunkItemBatchGroup>>,
456    ) -> Vc<Box<dyn Chunk>>;
457
458    #[turbo_tasks::function]
459    fn chunk_item_size(
460        &self,
461        chunking_context: Vc<Box<dyn ChunkingContext>>,
462        chunk_item: Vc<Box<dyn ChunkItem>>,
463        async_module_info: Option<Vc<AsyncModuleInfo>>,
464    ) -> Vc<usize>;
465}
466
467pub fn round_chunk_item_size(size: usize) -> usize {
468    let a = size.next_power_of_two();
469    size & (a | (a >> 1) | (a >> 2))
470}
471
472#[turbo_tasks::value(transparent)]
473pub struct ChunkItems(pub Vec<ResolvedVc<Box<dyn ChunkItem>>>);
474
475#[turbo_tasks::value]
476pub struct AsyncModuleInfo {
477    pub referenced_async_modules: AutoSet<ResolvedVc<Box<dyn Module>>>,
478}
479
480#[turbo_tasks::value_impl]
481impl AsyncModuleInfo {
482    #[turbo_tasks::function]
483    pub fn new(referenced_async_modules: Vec<ResolvedVc<Box<dyn Module>>>) -> Result<Vc<Self>> {
484        Ok(Self {
485            referenced_async_modules: referenced_async_modules.into_iter().collect(),
486        }
487        .cell())
488    }
489}
490
491#[derive(
492    Debug, Clone, PartialEq, Eq, Hash, TraceRawVcs, TaskInput, NonLocalValue, Encode, Decode,
493)]
494pub struct ChunkItemWithAsyncModuleInfo {
495    pub chunk_item: ResolvedVc<Box<dyn ChunkItem>>,
496    pub module: Option<ResolvedVc<Box<dyn ChunkableModule>>>,
497    pub async_info: Option<ResolvedVc<AsyncModuleInfo>>,
498}
499
500pub trait ChunkItemExt {
501    /// Returns the module id of this chunk item.
502    fn id(self: Vc<Self>) -> impl Future<Output = Result<ModuleId>> + Send;
503}
504
505impl<T> ChunkItemExt for T
506where
507    T: Upcast<Box<dyn ChunkItem>> + Send,
508{
509    /// Returns the module id of this chunk item.
510    async fn id(self: Vc<Self>) -> Result<ModuleId> {
511        let chunk_item = Vc::upcast_non_strict(self);
512        chunk_item
513            .chunking_context()
514            .chunk_item_id_strategy()
515            .await?
516            .get_id(chunk_item)
517            .await
518    }
519}
520
521pub trait ModuleChunkItemIdExt {
522    /// Returns the chunk item id of this module.
523    fn chunk_item_id(
524        self: Vc<Self>,
525        chunking_context: Vc<Box<dyn ChunkingContext>>,
526    ) -> impl Future<Output = Result<ModuleId>> + Send;
527}
528impl<T> ModuleChunkItemIdExt for T
529where
530    T: Upcast<Box<dyn Module>> + Send,
531{
532    async fn chunk_item_id(
533        self: Vc<Self>,
534        chunking_context: Vc<Box<dyn ChunkingContext>>,
535    ) -> Result<ModuleId> {
536        chunking_context
537            .chunk_item_id_strategy()
538            .await?
539            .get_id_from_module(Vc::upcast_non_strict(self))
540            .await
541    }
542}
543
544#[cfg(test)]
545mod tests {
546    use super::*;
547
548    #[test]
549    fn test_round_chunk_item_size() {
550        assert_eq!(round_chunk_item_size(0), 0);
551        assert_eq!(round_chunk_item_size(1), 1);
552        assert_eq!(round_chunk_item_size(2), 2);
553        assert_eq!(round_chunk_item_size(3), 3);
554        assert_eq!(round_chunk_item_size(4), 4);
555        assert_eq!(round_chunk_item_size(5), 4);
556        assert_eq!(round_chunk_item_size(6), 6);
557        assert_eq!(round_chunk_item_size(7), 6);
558        assert_eq!(round_chunk_item_size(8), 8);
559        assert_eq!(round_chunk_item_size(49000), 32_768);
560        assert_eq!(round_chunk_item_size(50000), 49_152);
561
562        assert_eq!(changes_in_range(0..1000), 19);
563        assert_eq!(changes_in_range(1000..2000), 2);
564        assert_eq!(changes_in_range(2000..3000), 1);
565
566        assert_eq!(changes_in_range(3000..10000), 4);
567
568        fn changes_in_range(range: std::ops::Range<usize>) -> usize {
569            let len = range.len();
570            let mut count = 0;
571            for i in range {
572                let a = round_chunk_item_size(i);
573                assert!(a >= i * 2 / 3);
574                assert!(a <= i);
575                let b = round_chunk_item_size(i + 1);
576
577                if a == b {
578                    count += 1;
579                }
580            }
581            len - count
582        }
583    }
584}