turbopack/
global_module_ids.rs

1use anyhow::{Context, Result};
2use rustc_hash::FxHashMap;
3use smallvec::SmallVec;
4use tracing::Instrument;
5use turbo_rcstr::RcStr;
6use turbo_tasks::{ReadRef, ResolvedVc, TryJoinIterExt, ValueToString, Vc};
7use turbo_tasks_hash::hash_xxh3_hash64;
8use turbopack_core::{
9    chunk::{ChunkableModule, ChunkingType, module_id_strategies::GlobalModuleIdStrategy},
10    ident::AssetIdent,
11    module::Module,
12    module_graph::{ModuleGraph, RefData},
13};
14use turbopack_ecmascript::async_chunk::module::AsyncLoaderModule;
15
16#[turbo_tasks::function]
17pub async fn get_global_module_id_strategy(
18    module_graph: ResolvedVc<ModuleGraph>,
19) -> Result<Vc<GlobalModuleIdStrategy>> {
20    let span = tracing::info_span!("compute module id map");
21    async move {
22        let module_graph = module_graph.read_graphs().await?;
23        let graphs = &module_graph.graphs;
24
25        // All modules in the graph
26        let module_idents = graphs
27            .iter()
28            .flat_map(|graph| graph.iter_nodes())
29            .map(|m| m.ident());
30
31        // And additionally, all the modules that are inserted by chunking (i.e. async loaders)
32        let mut async_idents = vec![];
33        module_graph.traverse_all_edges_unordered(|parent, current| {
34            if let (
35                _,
36                &RefData {
37                    chunking_type: ChunkingType::Async,
38                    ..
39                },
40            ) = parent
41            {
42                let module = ResolvedVc::try_sidecast::<Box<dyn ChunkableModule>>(current)
43                    .context("expected chunkable module for async reference")?;
44                async_idents.push(AsyncLoaderModule::asset_ident_for(*module));
45            }
46            Ok(())
47        })?;
48
49        let mut module_id_map = module_idents
50            .chain(async_idents.into_iter())
51            .map(|ident| async move {
52                let ident = ident.to_resolved().await?;
53                let ident_str = ident.to_string().await?;
54                let hash = hash_xxh3_hash64(&ident_str);
55                Ok((ident, (ident_str, hash)))
56            })
57            .try_join()
58            .await?
59            .into_iter()
60            .collect::<FxHashMap<_, _>>();
61
62        finalize_module_ids(&mut module_id_map);
63
64        Ok(GlobalModuleIdStrategy {
65            module_id_map: module_id_map
66                .into_iter()
67                .map(|(ident, (_, hash))| (ident, hash))
68                .collect(),
69        }
70        .cell())
71    }
72    .instrument(span)
73    .await
74}
75
76const JS_MAX_SAFE_INTEGER: u64 = (1u64 << 53) - 1;
77
78/// Shorten hashes and handle any collisions.
79fn finalize_module_ids(
80    merged_module_ids: &mut FxHashMap<ResolvedVc<AssetIdent>, (ReadRef<RcStr>, u64)>,
81) {
82    // 5% fill rate, as done in Webpack
83    // https://github.com/webpack/webpack/blob/27cf3e59f5f289dfc4d76b7a1df2edbc4e651589/lib/ids/IdHelpers.js#L366-L405
84    let optimal_range = merged_module_ids.len() * 20;
85    let digit_mask = std::cmp::min(
86        10u64.pow((optimal_range as f64).log10().ceil() as u32),
87        JS_MAX_SAFE_INTEGER,
88    );
89
90    let mut used_ids =
91        FxHashMap::<u64, SmallVec<[(ResolvedVc<AssetIdent>, ReadRef<RcStr>); 1]>>::default();
92
93    // Run in multiple passes, to not depend on the order of the `merged_module_ids` (i.e. the order
94    // of imports). Hashes could still change if modules are added or removed.
95
96    // Find pass: shorten hashes, potentially causing (more) collisions
97    for (ident, (ident_str, full_hash)) in merged_module_ids.iter_mut() {
98        let first_pass_hash = *full_hash % digit_mask;
99        used_ids
100            .entry(first_pass_hash)
101            .or_default()
102            .push((*ident, ident_str.clone()));
103        *full_hash = first_pass_hash;
104    }
105
106    // Filter conflicts
107    let mut conflicting_hashes = used_ids
108        .iter()
109        .filter(|(_, list)| list.len() > 1)
110        .map(|(hash, _)| *hash)
111        .collect::<Vec<_>>();
112    conflicting_hashes.sort();
113
114    // Second pass over the conflicts to resolve them
115    for hash in conflicting_hashes.into_iter() {
116        let list = used_ids.get_mut(&hash).unwrap();
117        // Take the vector but keep the (empty) entry, so that the "contains_key" check below works
118        let mut list = std::mem::take(list);
119        list.sort_by(|a, b| a.1.cmp(&b.1));
120
121        // Skip the first one, one module can keep the original hash
122        for (ident, _) in list.into_iter().skip(1) {
123            let hash = &mut merged_module_ids.get_mut(&ident).unwrap().1;
124
125            // the original algorithm since all that runs in deterministic order now
126            let mut i = 1;
127            let mut trimmed_hash;
128            loop {
129                // If the id is already used, find the next available hash.
130                trimmed_hash = hash_xxh3_hash64((*hash, i)) % digit_mask;
131                if !used_ids.contains_key(&trimmed_hash) {
132                    break;
133                }
134                i += 1;
135            }
136            // At this point, we don't care about the values anymore, just the keys
137            used_ids.entry(trimmed_hash).or_default();
138            *hash = trimmed_hash;
139        }
140    }
141}