Skip to main content

turbopack/
global_module_ids.rs

1use anyhow::{Context, Result, bail};
2use rustc_hash::{FxHashMap, FxHashSet};
3use smallvec::SmallVec;
4use tracing::Instrument;
5use turbo_rcstr::RcStr;
6use turbo_tasks::{ReadRef, ResolvedVc, TryJoinIterExt, ValueToString, Vc};
7use turbo_tasks_hash::hash_xxh3_hash64;
8use turbopack_core::{
9    chunk::{
10        ChunkableModule, ChunkingType, ModuleId,
11        chunk_id_strategy::{ModuleIdFallback, ModuleIdStrategy},
12    },
13    ident::AssetIdent,
14    module::Module,
15    module_graph::{ModuleGraph, RefData},
16};
17use turbopack_ecmascript::async_chunk::module::AsyncLoaderModule;
18
19#[turbo_tasks::function]
20pub async fn get_global_module_id_strategy(
21    module_graph: ResolvedVc<ModuleGraph>,
22) -> Result<Vc<ModuleIdStrategy>> {
23    let span = tracing::info_span!("compute module id map");
24    async move {
25        let module_graph = module_graph.await?;
26
27        // All modules in the graph and additionally, all the modules that are inserted by chunking
28        // (i.e. async loaders)
29        let mut modules = FxHashSet::default();
30        let mut async_idents = vec![];
31        module_graph.traverse_edges_unordered(|parent, current| {
32            modules.insert(current);
33            if let Some((
34                _,
35                &RefData {
36                    chunking_type: ChunkingType::Async,
37                    ..
38                },
39            )) = parent
40            {
41                let module = ResolvedVc::try_sidecast::<Box<dyn ChunkableModule>>(current)
42                    .context("expected chunkable module for async reference")?;
43                async_idents.push(AsyncLoaderModule::asset_ident_for(*module));
44            }
45            Ok(())
46        })?;
47
48        let mut module_id_map = modules
49            .into_iter()
50            .map(|m| m.ident())
51            .chain(async_idents.into_iter())
52            .map(|ident| async move {
53                let ident = ident.to_resolved().await?;
54                let ident_str = ident.to_string().await?;
55                let hash = hash_xxh3_hash64(&ident_str);
56                Ok((ident, (ident_str, hash)))
57            })
58            .try_join()
59            .await?
60            .into_iter()
61            .collect::<FxHashMap<_, _>>();
62
63        finalize_module_ids(&mut module_id_map);
64
65        Ok(ModuleIdStrategy {
66            module_id_map: Some(ResolvedVc::cell(
67                module_id_map
68                    .into_iter()
69                    .map(|(ident, (_, hash))| {
70                        const JS_MAX_SAFE_INTEGER: u64 = (1u64 << 53) - 1;
71                        if hash > JS_MAX_SAFE_INTEGER {
72                            bail!("Numeric module id is too large: {}", hash);
73                        }
74                        Ok((ident, ModuleId::Number(hash)))
75                    })
76                    .collect::<Result<FxHashMap<_, _>>>()?,
77            )),
78            fallback: ModuleIdFallback::Error,
79        }
80        .cell())
81    }
82    .instrument(span)
83    .await
84}
85
86const JS_MAX_SAFE_INTEGER: u64 = (1u64 << 53) - 1;
87
88/// Shorten hashes and handle any collisions.
89fn finalize_module_ids(
90    merged_module_ids: &mut FxHashMap<ResolvedVc<AssetIdent>, (ReadRef<RcStr>, u64)>,
91) {
92    // 5% fill rate, as done in Webpack
93    // https://github.com/webpack/webpack/blob/27cf3e59f5f289dfc4d76b7a1df2edbc4e651589/lib/ids/IdHelpers.js#L366-L405
94    let optimal_range = merged_module_ids.len() * 20;
95    let digit_mask = std::cmp::min(
96        10u64.pow((optimal_range as f64).log10().ceil() as u32),
97        JS_MAX_SAFE_INTEGER,
98    );
99
100    let mut used_ids =
101        FxHashMap::<u64, SmallVec<[(ResolvedVc<AssetIdent>, ReadRef<RcStr>); 1]>>::default();
102
103    // Run in multiple passes, to not depend on the order of the `merged_module_ids` (i.e. the order
104    // of imports). Hashes could still change if modules are added or removed.
105
106    // Find pass: shorten hashes, potentially causing (more) collisions
107    for (ident, (ident_str, full_hash)) in merged_module_ids.iter_mut() {
108        let first_pass_hash = *full_hash % digit_mask;
109        used_ids
110            .entry(first_pass_hash)
111            .or_default()
112            .push((*ident, ident_str.clone()));
113        *full_hash = first_pass_hash;
114    }
115
116    // Filter conflicts
117    let mut conflicting_hashes = used_ids
118        .iter()
119        .filter(|(_, list)| list.len() > 1)
120        .map(|(hash, _)| *hash)
121        .collect::<Vec<_>>();
122    conflicting_hashes.sort();
123
124    // Second pass over the conflicts to resolve them
125    for hash in conflicting_hashes.into_iter() {
126        let list = used_ids.get_mut(&hash).unwrap();
127        // Take the vector but keep the (empty) entry, so that the "contains_key" check below works
128        let mut list = std::mem::take(list);
129        list.sort_by(|a, b| a.1.cmp(&b.1));
130
131        // Skip the first one, one module can keep the original hash
132        for (ident, _) in list.into_iter().skip(1) {
133            let hash = &mut merged_module_ids.get_mut(&ident).unwrap().1;
134
135            // the original algorithm since all that runs in deterministic order now
136            let mut i = 1;
137            let mut trimmed_hash;
138            loop {
139                // If the id is already used, find the next available hash.
140                trimmed_hash = hash_xxh3_hash64((*hash, i)) % digit_mask;
141                if !used_ids.contains_key(&trimmed_hash) {
142                    break;
143                }
144                i += 1;
145            }
146            // At this point, we don't care about the values anymore, just the keys
147            used_ids.entry(trimmed_hash).or_default();
148            *hash = trimmed_hash;
149        }
150    }
151}