Skip to main content

turbopack_core/
ident.rs

1use std::{fmt::Write, sync::LazyLock};
2
3use anyhow::Result;
4use bincode::{Decode, Encode};
5use regex::Regex;
6use turbo_rcstr::RcStr;
7use turbo_tasks::{
8    ReadRef, ResolvedVc, ValueToString, ValueToStringRef, Vc, trace::TraceRawVcs, turbofmt,
9};
10use turbo_tasks_fs::FileSystemPath;
11use turbo_tasks_hash::{DeterministicHash, Xxh3Hash64Hasher, encode_base38, hash_xxh3_hash64};
12
13use crate::resolve::ModulePart;
14
15/// A layer identifies a distinct part of the module graph.
16#[turbo_tasks::task_input]
17#[derive(Clone, Hash, Debug, DeterministicHash, Eq, PartialEq, TraceRawVcs, Encode, Decode)]
18pub struct Layer {
19    name: RcStr,
20    user_friendly_name: Option<RcStr>,
21}
22
23impl Layer {
24    pub fn new(name: RcStr) -> Self {
25        debug_assert!(!name.is_empty());
26        Self {
27            name,
28            user_friendly_name: None,
29        }
30    }
31    pub fn new_with_user_friendly_name(name: RcStr, user_friendly_name: RcStr) -> Self {
32        debug_assert!(!name.is_empty());
33        debug_assert!(!user_friendly_name.is_empty());
34        Self {
35            name,
36            user_friendly_name: Some(user_friendly_name),
37        }
38    }
39
40    /// Returns a user friendly name for this layer
41    pub fn user_friendly_name(&self) -> &RcStr {
42        self.user_friendly_name.as_ref().unwrap_or(&self.name)
43    }
44
45    pub fn name(&self) -> &RcStr {
46        &self.name
47    }
48}
49
50#[turbo_tasks::value(task_input)]
51#[derive(Clone, Debug, Hash)]
52pub struct AssetIdent {
53    /// The primary path of the asset
54    pub path: FileSystemPath,
55    /// The query string of the asset this is either the empty string or a query string that starts
56    /// with a `?` (e.g. `?foo=bar`)
57    pub query: RcStr,
58    /// The fragment of the asset, this is either the empty string or a fragment string that starts
59    /// with a `#` (e.g. `#foo`)
60    pub fragment: RcStr,
61    /// The assets that are nested in this asset
62    pub assets: Vec<(RcStr, ResolvedVc<AssetIdent>)>,
63    /// The modifiers of this asset (e.g. `client chunks`)
64    pub modifiers: Vec<RcStr>,
65    /// The parts of the asset that are (ECMAScript) modules
66    pub parts: Vec<ModulePart>,
67    /// The asset layer the asset was created from.
68    pub layer: Option<Layer>,
69    /// The MIME content type, if this asset was created from a data URL.
70    pub content_type: Option<RcStr>,
71}
72
73impl AssetIdent {
74    /// Creates an [AssetIdent] from a [FileSystemPath].
75    ///
76    /// Returns an owned value; call [`AssetIdent::into_vc`] at the end of the builder chain to
77    /// turn it into a [`Vc<AssetIdent>`].
78    pub fn from_path(path: FileSystemPath) -> Self {
79        AssetIdent {
80            path,
81            query: RcStr::default(),
82            fragment: RcStr::default(),
83            assets: Vec::new(),
84            modifiers: Vec::new(),
85            parts: Vec::new(),
86            layer: None,
87            content_type: None,
88        }
89    }
90
91    /// Finalizes the builder by turning the owned [`AssetIdent`] into a cached [`Vc<AssetIdent>`].
92    pub fn into_vc(self) -> Vc<Self> {
93        // This optimizes cache misses in cold builds by only storing one copy of the AssetIdent.
94        AssetIdent::new_inner(ReadRef::new_owned(self))
95    }
96
97    pub fn with_query(mut self, query: RcStr) -> Self {
98        self.query = query;
99        self
100    }
101
102    pub fn with_fragment(mut self, fragment: RcStr) -> Self {
103        self.fragment = fragment;
104        self
105    }
106
107    pub fn with_modifier(mut self, modifier: RcStr) -> Self {
108        debug_assert!(!modifier.is_empty(), "modifiers cannot be empty.");
109        self.modifiers.push(modifier);
110        self
111    }
112
113    pub fn with_part(mut self, part: ModulePart) -> Self {
114        self.parts.push(part);
115        self
116    }
117
118    pub fn with_path(mut self, path: FileSystemPath) -> Self {
119        self.path = path;
120        self
121    }
122
123    pub fn with_layer(mut self, layer: Layer) -> Self {
124        self.layer = Some(layer);
125        self
126    }
127
128    pub fn with_content_type(mut self, content_type: RcStr) -> Self {
129        self.content_type = Some(content_type);
130        self
131    }
132
133    pub fn with_asset(mut self, key: RcStr, asset: ResolvedVc<AssetIdent>) -> Self {
134        self.assets.push((key, asset));
135        self
136    }
137
138    pub fn rename_as(mut self, pattern: &str) -> Self {
139        self.path = FileSystemPath::new_normalized_unchecked(
140            self.path.fs,
141            pattern.replace('*', &self.path.path).into(),
142        );
143        self.content_type = None;
144        self
145    }
146}
147
148#[turbo_tasks::value_impl]
149impl AssetIdent {
150    #[turbo_tasks::function]
151    fn new_inner(ident: ReadRef<AssetIdent>) -> Vc<Self> {
152        debug_assert!(
153            ident.query.is_empty() || ident.query.starts_with("?"),
154            "query should be empty or start with a `?`"
155        );
156        debug_assert!(
157            ident.fragment.is_empty() || ident.fragment.starts_with("#"),
158            "query should be empty or start with a `?`"
159        );
160        ReadRef::cell(ident)
161    }
162
163    /// Computes a unique output asset name for the given asset identifier.
164    /// TODO(alexkirsz) This is `turbopack-browser` specific, as
165    /// `turbopack-nodejs` would use a content hash instead. But for now
166    /// both are using the same name generation logic.
167    #[turbo_tasks::function]
168    pub async fn output_name(
169        &self,
170        context_path: FileSystemPath,
171        prefix: Option<RcStr>,
172        expected_extension: RcStr,
173    ) -> Result<Vc<RcStr>> {
174        debug_assert!(
175            expected_extension.starts_with("."),
176            "the extension should include the leading '.', got '{expected_extension}'"
177        );
178        // TODO(PACK-2140): restrict character set to A–Za–z0–9-_.~'()
179        // to be compatible with all operating systems + URLs.
180
181        // For clippy -- This explicit deref is necessary
182        let path = &self.path;
183        let mut name = if let Some(inner) = context_path.get_path_to(path) {
184            escape_file_path(inner)
185        } else {
186            escape_file_path(&self.path.to_string_ref().await?)
187        };
188        let removed_extension = name.ends_with(&*expected_extension);
189        if removed_extension {
190            name.truncate(name.len() - expected_extension.len());
191        }
192        // This step ensures that leading dots are not preserved in file names. This is
193        // important as some file servers do not serve files with leading dots (e.g.
194        // Next.js).
195        let mut name = clean_additional_extensions(&name);
196        if let Some(prefix) = prefix {
197            name = format!("{prefix}-{name}");
198        }
199
200        let default_modifier = match expected_extension.as_str() {
201            ".js" => Some("ecmascript"),
202            ".css" => Some("css"),
203            _ => None,
204        };
205
206        let mut hasher = Xxh3Hash64Hasher::new();
207        let mut has_hash = false;
208        let AssetIdent {
209            path: _,
210            query,
211            fragment,
212            assets,
213            modifiers,
214            parts,
215            layer,
216            content_type,
217        } = self;
218        if !query.is_empty() {
219            0_u8.deterministic_hash(&mut hasher);
220            query.deterministic_hash(&mut hasher);
221            has_hash = true;
222        }
223        if !fragment.is_empty() {
224            1_u8.deterministic_hash(&mut hasher);
225            fragment.deterministic_hash(&mut hasher);
226            has_hash = true;
227        }
228        for (key, ident) in assets.iter() {
229            2_u8.deterministic_hash(&mut hasher);
230            key.deterministic_hash(&mut hasher);
231            ident.to_string().await?.deterministic_hash(&mut hasher);
232            has_hash = true;
233        }
234        for modifier in modifiers.iter() {
235            if let Some(default_modifier) = default_modifier
236                && *modifier == default_modifier
237            {
238                continue;
239            }
240            3_u8.deterministic_hash(&mut hasher);
241            modifier.deterministic_hash(&mut hasher);
242            has_hash = true;
243        }
244        for part in parts.iter() {
245            4_u8.deterministic_hash(&mut hasher);
246            match part {
247                ModulePart::Evaluation => {
248                    1_u8.deterministic_hash(&mut hasher);
249                }
250                ModulePart::Export(export) => {
251                    2_u8.deterministic_hash(&mut hasher);
252                    export.deterministic_hash(&mut hasher);
253                }
254                ModulePart::RenamedExport {
255                    original_export,
256                    export,
257                } => {
258                    3_u8.deterministic_hash(&mut hasher);
259                    original_export.deterministic_hash(&mut hasher);
260                    export.deterministic_hash(&mut hasher);
261                }
262                ModulePart::RenamedNamespace { export } => {
263                    4_u8.deterministic_hash(&mut hasher);
264                    export.deterministic_hash(&mut hasher);
265                }
266                ModulePart::Internal(id) => {
267                    5_u8.deterministic_hash(&mut hasher);
268                    id.deterministic_hash(&mut hasher);
269                }
270                ModulePart::Locals => {
271                    6_u8.deterministic_hash(&mut hasher);
272                }
273                ModulePart::Exports => {
274                    7_u8.deterministic_hash(&mut hasher);
275                }
276                ModulePart::Facade => {
277                    8_u8.deterministic_hash(&mut hasher);
278                }
279            }
280
281            has_hash = true;
282        }
283        if let Some(layer) = layer {
284            5_u8.deterministic_hash(&mut hasher);
285            layer.deterministic_hash(&mut hasher);
286            has_hash = true;
287        }
288        if let Some(content_type) = content_type {
289            6_u8.deterministic_hash(&mut hasher);
290            content_type.deterministic_hash(&mut hasher);
291            has_hash = true;
292        }
293
294        if has_hash {
295            let hash = encode_base38(hasher.finish());
296            // 7 base38 chars ≈ 36 bits of collision resistance
297            let truncated_hash = &hash[..7];
298            write!(name, "_{truncated_hash}")?;
299        }
300
301        // Location in "path" where hashed and named parts are split.
302        // Everything before i is hashed and after i named.
303        let mut i = 0;
304        static NODE_MODULES: &str = "_node_modules_";
305        if let Some(j) = name.rfind(NODE_MODULES) {
306            i = j + NODE_MODULES.len();
307        }
308        const MAX_FILENAME: usize = 80;
309        if name.len() - i > MAX_FILENAME {
310            i = name.len() - MAX_FILENAME;
311            if let Some(j) = name[i..].find('_')
312                && j < 20
313            {
314                i += j + 1;
315            }
316        }
317        if i > 0 {
318            let hash = encode_base38(hash_xxh3_hash64(&name.as_bytes()[..i]));
319            // 4 base38 chars ≈ 21 bits — just a short disambiguator prefix
320            let truncated_hash = &hash[..4];
321            name = format!("{}_{}", truncated_hash, &name[i..]);
322        }
323        // We need to make sure that `.json` and `.json.js` doesn't end up with the same
324        // name. So when we add an extra extension when want to mark that with a "._"
325        // suffix.
326        if !removed_extension {
327            name += "._";
328        }
329        name += &expected_extension;
330        Ok(Vc::cell(name.into()))
331    }
332}
333
334#[turbo_tasks::value_impl]
335impl ValueToString for AssetIdent {
336    #[turbo_tasks::function]
337    async fn to_string(&self) -> Result<Vc<RcStr>> {
338        // The query string/fragment is either empty or non-empty starting with
339        // `?` so we can just concat
340        let mut s = turbofmt!("{}{}{}", self.path, self.query, self.fragment)
341            .await?
342            .into_owned();
343
344        if !self.assets.is_empty() {
345            s.push_str(" {");
346
347            for (i, (key, asset)) in self.assets.iter().enumerate() {
348                if i > 0 {
349                    s.push(',');
350                }
351
352                let asset_str = asset.to_string().await?;
353                write!(s, " {key} => {asset_str:?}")?;
354            }
355
356            s.push_str(" }");
357        }
358
359        if let Some(layer) = &self.layer {
360            write!(s, " [{}]", layer.name)?;
361        }
362
363        if !self.modifiers.is_empty() {
364            s.push_str(" (");
365
366            for (i, modifier) in self.modifiers.iter().enumerate() {
367                if i > 0 {
368                    s.push_str(", ");
369                }
370
371                s.push_str(modifier);
372            }
373
374            s.push(')');
375        }
376
377        if let Some(content_type) = &self.content_type {
378            write!(s, " <{content_type}>")?;
379        }
380
381        if !self.parts.is_empty() {
382            for part in self.parts.iter() {
383                if !matches!(part, ModulePart::Facade) {
384                    // facade is not included in ident as switching between facade and non-facade
385                    // shouldn't change the ident
386                    write!(s, " <{part}>")?;
387                }
388            }
389        }
390
391        Ok(Vc::cell(s.into()))
392    }
393}
394
395fn escape_file_path(s: &str) -> String {
396    static SEPARATOR_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[/#?:]").unwrap());
397    SEPARATOR_REGEX.replace_all(s, "_").to_string()
398}
399
400fn clean_additional_extensions(s: &str) -> String {
401    s.replace('.', "_")
402}
403
404#[cfg(test)]
405pub mod tests {
406    use turbo_rcstr::{RcStr, rcstr};
407    use turbo_tasks::Vc;
408    use turbo_tasks_backend::{BackendOptions, TurboTasksBackend, noop_backing_storage};
409    use turbo_tasks_fs::{FileSystem, VirtualFileSystem};
410
411    use crate::ident::AssetIdent;
412
413    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
414    async fn test_output_name_escaping() {
415        let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new(
416            BackendOptions::default(),
417            noop_backing_storage(),
418        ));
419        tt.run_once(async move {
420            #[turbo_tasks::function(operation, root)]
421            async fn output_name_operation() -> anyhow::Result<Vc<RcStr>> {
422                let fs = VirtualFileSystem::new_with_name(rcstr!("test"));
423                let root = fs.root().owned().await?;
424
425                let asset_ident = AssetIdent::from_path(root.join("a:b?c#d.js")?).into_vc();
426                let output_name = asset_ident
427                    .output_name(root, Some(rcstr!("prefix")), rcstr!(".js"))
428                    .await?;
429                Ok(Vc::cell((*output_name).clone()))
430            }
431
432            let output_name = output_name_operation().read_strongly_consistent().await?;
433            assert_eq!(&*output_name, "prefix-a_b_c_d.js");
434
435            Ok(())
436        })
437        .await
438        .unwrap();
439    }
440}