Skip to main content

turbopack_core/
ident.rs

1use std::fmt::Write;
2
3use anyhow::Result;
4use bincode::{Decode, Encode};
5use once_cell::sync::Lazy;
6use regex::Regex;
7use turbo_rcstr::RcStr;
8use turbo_tasks::{
9    NonLocalValue, ReadRef, ResolvedVc, TaskInput, ValueToString, ValueToStringRef, Vc,
10    trace::TraceRawVcs, turbofmt,
11};
12use turbo_tasks_fs::FileSystemPath;
13use turbo_tasks_hash::{DeterministicHash, Xxh3Hash64Hasher, encode_base38, hash_xxh3_hash64};
14
15use crate::resolve::ModulePart;
16
17/// A layer identifies a distinct part of the module graph.
18#[derive(
19    Clone,
20    TaskInput,
21    Hash,
22    Debug,
23    DeterministicHash,
24    Eq,
25    PartialEq,
26    TraceRawVcs,
27    NonLocalValue,
28    Encode,
29    Decode,
30)]
31pub struct Layer {
32    name: RcStr,
33    user_friendly_name: Option<RcStr>,
34}
35
36impl Layer {
37    pub fn new(name: RcStr) -> Self {
38        debug_assert!(!name.is_empty());
39        Self {
40            name,
41            user_friendly_name: None,
42        }
43    }
44    pub fn new_with_user_friendly_name(name: RcStr, user_friendly_name: RcStr) -> Self {
45        debug_assert!(!name.is_empty());
46        debug_assert!(!user_friendly_name.is_empty());
47        Self {
48            name,
49            user_friendly_name: Some(user_friendly_name),
50        }
51    }
52
53    /// Returns a user friendly name for this layer
54    pub fn user_friendly_name(&self) -> &RcStr {
55        self.user_friendly_name.as_ref().unwrap_or(&self.name)
56    }
57
58    pub fn name(&self) -> &RcStr {
59        &self.name
60    }
61}
62
63#[turbo_tasks::value]
64#[derive(Clone, Debug, Hash, TaskInput)]
65pub struct AssetIdent {
66    /// The primary path of the asset
67    pub path: FileSystemPath,
68    /// The query string of the asset this is either the empty string or a query string that starts
69    /// with a `?` (e.g. `?foo=bar`)
70    pub query: RcStr,
71    /// The fragment of the asset, this is either the empty string or a fragment string that starts
72    /// with a `#` (e.g. `#foo`)
73    pub fragment: RcStr,
74    /// The assets that are nested in this asset
75    pub assets: Vec<(RcStr, ResolvedVc<AssetIdent>)>,
76    /// The modifiers of this asset (e.g. `client chunks`)
77    pub modifiers: Vec<RcStr>,
78    /// The parts of the asset that are (ECMAScript) modules
79    pub parts: Vec<ModulePart>,
80    /// The asset layer the asset was created from.
81    pub layer: Option<Layer>,
82    /// The MIME content type, if this asset was created from a data URL.
83    pub content_type: Option<RcStr>,
84}
85
86impl AssetIdent {
87    pub fn new(ident: AssetIdent) -> Vc<Self> {
88        AssetIdent::new_inner(ReadRef::new_owned(ident))
89    }
90
91    pub fn add_modifier(&mut self, modifier: RcStr) {
92        debug_assert!(!modifier.is_empty(), "modifiers cannot be empty.");
93        self.modifiers.push(modifier);
94    }
95
96    pub fn add_asset(&mut self, key: RcStr, asset: ResolvedVc<AssetIdent>) {
97        self.assets.push((key, asset));
98    }
99
100    pub async fn rename_as_ref(&mut self, pattern: &str) -> Result<()> {
101        let root = self.path.root().await?;
102        let path = self.path.clone();
103        self.path = root.join(&pattern.replace('*', &path.path))?;
104        self.content_type = None;
105        Ok(())
106    }
107}
108
109#[turbo_tasks::value_impl]
110impl AssetIdent {
111    #[turbo_tasks::function]
112    fn new_inner(ident: ReadRef<AssetIdent>) -> Vc<Self> {
113        debug_assert!(
114            ident.query.is_empty() || ident.query.starts_with("?"),
115            "query should be empty or start with a `?`"
116        );
117        debug_assert!(
118            ident.fragment.is_empty() || ident.fragment.starts_with("#"),
119            "query should be empty or start with a `?`"
120        );
121        ReadRef::cell(ident)
122    }
123
124    /// Creates an [AssetIdent] from a [FileSystemPath]
125    #[turbo_tasks::function]
126    pub fn from_path(path: FileSystemPath) -> Vc<Self> {
127        Self::new(AssetIdent {
128            path,
129            query: RcStr::default(),
130            fragment: RcStr::default(),
131            assets: Vec::new(),
132            modifiers: Vec::new(),
133            parts: Vec::new(),
134            layer: None,
135            content_type: None,
136        })
137    }
138
139    #[turbo_tasks::function]
140    pub fn with_query(&self, query: RcStr) -> Vc<Self> {
141        let mut this = self.clone();
142        this.query = query;
143        Self::new(this)
144    }
145
146    #[turbo_tasks::function]
147    pub fn with_fragment(&self, fragment: RcStr) -> Vc<Self> {
148        let mut this = self.clone();
149        this.fragment = fragment;
150        Self::new(this)
151    }
152
153    #[turbo_tasks::function]
154    pub fn with_modifier(&self, modifier: RcStr) -> Vc<Self> {
155        let mut this = self.clone();
156        this.add_modifier(modifier);
157        Self::new(this)
158    }
159
160    #[turbo_tasks::function]
161    pub fn with_part(&self, part: ModulePart) -> Vc<Self> {
162        let mut this = self.clone();
163        this.parts.push(part);
164        Self::new(this)
165    }
166
167    #[turbo_tasks::function]
168    pub fn with_path(&self, path: FileSystemPath) -> Vc<Self> {
169        let mut this = self.clone();
170        this.path = path;
171        Self::new(this)
172    }
173
174    #[turbo_tasks::function]
175    pub fn with_layer(&self, layer: Layer) -> Vc<Self> {
176        let mut this = self.clone();
177        this.layer = Some(layer);
178        Self::new(this)
179    }
180
181    #[turbo_tasks::function]
182    pub fn with_content_type(&self, content_type: RcStr) -> Vc<Self> {
183        let mut this = self.clone();
184        this.content_type = Some(content_type);
185        Self::new(this)
186    }
187
188    #[turbo_tasks::function]
189    pub fn with_asset(&self, key: RcStr, asset: ResolvedVc<AssetIdent>) -> Vc<Self> {
190        let mut this = self.clone();
191        this.add_asset(key, asset);
192        Self::new(this)
193    }
194
195    #[turbo_tasks::function]
196    pub async fn rename_as(&self, pattern: RcStr) -> Result<Vc<Self>> {
197        let mut this = self.clone();
198        this.rename_as_ref(&pattern).await?;
199        Ok(Self::new(this))
200    }
201
202    #[turbo_tasks::function]
203    pub fn path(&self) -> Vc<FileSystemPath> {
204        self.path.clone().cell()
205    }
206
207    /// Computes a unique output asset name for the given asset identifier.
208    /// TODO(alexkirsz) This is `turbopack-browser` specific, as
209    /// `turbopack-nodejs` would use a content hash instead. But for now
210    /// both are using the same name generation logic.
211    #[turbo_tasks::function]
212    pub async fn output_name(
213        &self,
214        context_path: FileSystemPath,
215        prefix: Option<RcStr>,
216        expected_extension: RcStr,
217    ) -> Result<Vc<RcStr>> {
218        debug_assert!(
219            expected_extension.starts_with("."),
220            "the extension should include the leading '.', got '{expected_extension}'"
221        );
222        // TODO(PACK-2140): restrict character set to A–Za–z0–9-_.~'()
223        // to be compatible with all operating systems + URLs.
224
225        // For clippy -- This explicit deref is necessary
226        let path = &self.path;
227        let mut name = if let Some(inner) = context_path.get_path_to(path) {
228            escape_file_path(inner)
229        } else {
230            escape_file_path(&self.path.to_string_ref().await?)
231        };
232        let removed_extension = name.ends_with(&*expected_extension);
233        if removed_extension {
234            name.truncate(name.len() - expected_extension.len());
235        }
236        // This step ensures that leading dots are not preserved in file names. This is
237        // important as some file servers do not serve files with leading dots (e.g.
238        // Next.js).
239        let mut name = clean_additional_extensions(&name);
240        if let Some(prefix) = prefix {
241            name = format!("{prefix}-{name}");
242        }
243
244        let default_modifier = match expected_extension.as_str() {
245            ".js" => Some("ecmascript"),
246            ".css" => Some("css"),
247            _ => None,
248        };
249
250        let mut hasher = Xxh3Hash64Hasher::new();
251        let mut has_hash = false;
252        let AssetIdent {
253            path: _,
254            query,
255            fragment,
256            assets,
257            modifiers,
258            parts,
259            layer,
260            content_type,
261        } = self;
262        if !query.is_empty() {
263            0_u8.deterministic_hash(&mut hasher);
264            query.deterministic_hash(&mut hasher);
265            has_hash = true;
266        }
267        if !fragment.is_empty() {
268            1_u8.deterministic_hash(&mut hasher);
269            fragment.deterministic_hash(&mut hasher);
270            has_hash = true;
271        }
272        for (key, ident) in assets.iter() {
273            2_u8.deterministic_hash(&mut hasher);
274            key.deterministic_hash(&mut hasher);
275            ident.to_string().await?.deterministic_hash(&mut hasher);
276            has_hash = true;
277        }
278        for modifier in modifiers.iter() {
279            if let Some(default_modifier) = default_modifier
280                && *modifier == default_modifier
281            {
282                continue;
283            }
284            3_u8.deterministic_hash(&mut hasher);
285            modifier.deterministic_hash(&mut hasher);
286            has_hash = true;
287        }
288        for part in parts.iter() {
289            4_u8.deterministic_hash(&mut hasher);
290            match part {
291                ModulePart::Evaluation => {
292                    1_u8.deterministic_hash(&mut hasher);
293                }
294                ModulePart::Export(export) => {
295                    2_u8.deterministic_hash(&mut hasher);
296                    export.deterministic_hash(&mut hasher);
297                }
298                ModulePart::RenamedExport {
299                    original_export,
300                    export,
301                } => {
302                    3_u8.deterministic_hash(&mut hasher);
303                    original_export.deterministic_hash(&mut hasher);
304                    export.deterministic_hash(&mut hasher);
305                }
306                ModulePart::RenamedNamespace { export } => {
307                    4_u8.deterministic_hash(&mut hasher);
308                    export.deterministic_hash(&mut hasher);
309                }
310                ModulePart::Internal(id) => {
311                    5_u8.deterministic_hash(&mut hasher);
312                    id.deterministic_hash(&mut hasher);
313                }
314                ModulePart::Locals => {
315                    6_u8.deterministic_hash(&mut hasher);
316                }
317                ModulePart::Exports => {
318                    7_u8.deterministic_hash(&mut hasher);
319                }
320                ModulePart::Facade => {
321                    8_u8.deterministic_hash(&mut hasher);
322                }
323            }
324
325            has_hash = true;
326        }
327        if let Some(layer) = layer {
328            5_u8.deterministic_hash(&mut hasher);
329            layer.deterministic_hash(&mut hasher);
330            has_hash = true;
331        }
332        if let Some(content_type) = content_type {
333            6_u8.deterministic_hash(&mut hasher);
334            content_type.deterministic_hash(&mut hasher);
335            has_hash = true;
336        }
337
338        if has_hash {
339            let hash = encode_base38(hasher.finish());
340            // 7 base38 chars ≈ 36 bits of collision resistance
341            let truncated_hash = &hash[..7];
342            write!(name, "_{truncated_hash}")?;
343        }
344
345        // Location in "path" where hashed and named parts are split.
346        // Everything before i is hashed and after i named.
347        let mut i = 0;
348        static NODE_MODULES: &str = "_node_modules_";
349        if let Some(j) = name.rfind(NODE_MODULES) {
350            i = j + NODE_MODULES.len();
351        }
352        const MAX_FILENAME: usize = 80;
353        if name.len() - i > MAX_FILENAME {
354            i = name.len() - MAX_FILENAME;
355            if let Some(j) = name[i..].find('_')
356                && j < 20
357            {
358                i += j + 1;
359            }
360        }
361        if i > 0 {
362            let hash = encode_base38(hash_xxh3_hash64(&name.as_bytes()[..i]));
363            // 4 base38 chars ≈ 21 bits — just a short disambiguator prefix
364            let truncated_hash = &hash[..4];
365            name = format!("{}_{}", truncated_hash, &name[i..]);
366        }
367        // We need to make sure that `.json` and `.json.js` doesn't end up with the same
368        // name. So when we add an extra extension when want to mark that with a "._"
369        // suffix.
370        if !removed_extension {
371            name += "._";
372        }
373        name += &expected_extension;
374        Ok(Vc::cell(name.into()))
375    }
376}
377
378#[turbo_tasks::value_impl]
379impl ValueToString for AssetIdent {
380    #[turbo_tasks::function]
381    async fn to_string(&self) -> Result<Vc<RcStr>> {
382        // The query string/fragment is either empty or non-empty starting with
383        // `?` so we can just concat
384        let mut s = turbofmt!("{}{}{}", self.path, self.query, self.fragment)
385            .await?
386            .into_owned();
387
388        if !self.assets.is_empty() {
389            s.push_str(" {");
390
391            for (i, (key, asset)) in self.assets.iter().enumerate() {
392                if i > 0 {
393                    s.push(',');
394                }
395
396                let asset_str = asset.to_string().await?;
397                write!(s, " {key} => {asset_str:?}")?;
398            }
399
400            s.push_str(" }");
401        }
402
403        if let Some(layer) = &self.layer {
404            write!(s, " [{}]", layer.name)?;
405        }
406
407        if !self.modifiers.is_empty() {
408            s.push_str(" (");
409
410            for (i, modifier) in self.modifiers.iter().enumerate() {
411                if i > 0 {
412                    s.push_str(", ");
413                }
414
415                s.push_str(modifier);
416            }
417
418            s.push(')');
419        }
420
421        if let Some(content_type) = &self.content_type {
422            write!(s, " <{content_type}>")?;
423        }
424
425        if !self.parts.is_empty() {
426            for part in self.parts.iter() {
427                if !matches!(part, ModulePart::Facade) {
428                    // facade is not included in ident as switching between facade and non-facade
429                    // shouldn't change the ident
430                    write!(s, " <{part}>")?;
431                }
432            }
433        }
434
435        Ok(Vc::cell(s.into()))
436    }
437}
438
439fn escape_file_path(s: &str) -> String {
440    static SEPARATOR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"[/#?:]").unwrap());
441    SEPARATOR_REGEX.replace_all(s, "_").to_string()
442}
443
444fn clean_additional_extensions(s: &str) -> String {
445    s.replace('.', "_")
446}
447
448#[cfg(test)]
449pub mod tests {
450    use turbo_rcstr::{RcStr, rcstr};
451    use turbo_tasks::Vc;
452    use turbo_tasks_backend::{BackendOptions, TurboTasksBackend, noop_backing_storage};
453    use turbo_tasks_fs::{FileSystem, VirtualFileSystem};
454
455    use crate::ident::AssetIdent;
456
457    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
458    async fn test_output_name_escaping() {
459        let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new(
460            BackendOptions::default(),
461            noop_backing_storage(),
462        ));
463        tt.run_once(async move {
464            #[turbo_tasks::function(operation)]
465            async fn output_name_operation() -> anyhow::Result<Vc<RcStr>> {
466                let fs = VirtualFileSystem::new_with_name(rcstr!("test"));
467                let root = fs.root().owned().await?;
468
469                let asset_ident = AssetIdent::from_path(root.join("a:b?c#d.js")?);
470                let output_name = asset_ident
471                    .output_name(root, Some(rcstr!("prefix")), rcstr!(".js"))
472                    .await?;
473                Ok(Vc::cell((*output_name).clone()))
474            }
475
476            let output_name = output_name_operation().read_strongly_consistent().await?;
477            assert_eq!(&*output_name, "prefix-a_b_c_d.js");
478
479            Ok(())
480        })
481        .await
482        .unwrap();
483    }
484}