Skip to main content

turbopack_core/
ident.rs

1use std::fmt::Write;
2
3use anyhow::Result;
4use bincode::{Decode, Encode};
5use once_cell::sync::Lazy;
6use regex::Regex;
7use turbo_rcstr::RcStr;
8use turbo_tasks::{
9    NonLocalValue, ReadRef, ResolvedVc, TaskInput, ValueToString, Vc, trace::TraceRawVcs, turbofmt,
10};
11use turbo_tasks_fs::FileSystemPath;
12use turbo_tasks_hash::{DeterministicHash, Xxh3Hash64Hasher, encode_hex, hash_xxh3_hash64};
13
14use crate::resolve::ModulePart;
15
16/// A layer identifies a distinct part of the module graph.
17#[derive(
18    Clone,
19    TaskInput,
20    Hash,
21    Debug,
22    DeterministicHash,
23    Eq,
24    PartialEq,
25    TraceRawVcs,
26    NonLocalValue,
27    Encode,
28    Decode,
29)]
30pub struct Layer {
31    name: RcStr,
32    user_friendly_name: Option<RcStr>,
33}
34
35impl Layer {
36    pub fn new(name: RcStr) -> Self {
37        debug_assert!(!name.is_empty());
38        Self {
39            name,
40            user_friendly_name: None,
41        }
42    }
43    pub fn new_with_user_friendly_name(name: RcStr, user_friendly_name: RcStr) -> Self {
44        debug_assert!(!name.is_empty());
45        debug_assert!(!user_friendly_name.is_empty());
46        Self {
47            name,
48            user_friendly_name: Some(user_friendly_name),
49        }
50    }
51
52    /// Returns a user friendly name for this layer
53    pub fn user_friendly_name(&self) -> &RcStr {
54        self.user_friendly_name.as_ref().unwrap_or(&self.name)
55    }
56
57    pub fn name(&self) -> &RcStr {
58        &self.name
59    }
60}
61
62#[turbo_tasks::value]
63#[derive(Clone, Debug, Hash, TaskInput)]
64pub struct AssetIdent {
65    /// The primary path of the asset
66    pub path: FileSystemPath,
67    /// The query string of the asset this is either the empty string or a query string that starts
68    /// with a `?` (e.g. `?foo=bar`)
69    pub query: RcStr,
70    /// The fragment of the asset, this is either the empty string or a fragment string that starts
71    /// with a `#` (e.g. `#foo`)
72    pub fragment: RcStr,
73    /// The assets that are nested in this asset
74    pub assets: Vec<(RcStr, ResolvedVc<AssetIdent>)>,
75    /// The modifiers of this asset (e.g. `client chunks`)
76    pub modifiers: Vec<RcStr>,
77    /// The parts of the asset that are (ECMAScript) modules
78    pub parts: Vec<ModulePart>,
79    /// The asset layer the asset was created from.
80    pub layer: Option<Layer>,
81    /// The MIME content type, if this asset was created from a data URL.
82    pub content_type: Option<RcStr>,
83}
84
85impl AssetIdent {
86    pub fn new(ident: AssetIdent) -> Vc<Self> {
87        AssetIdent::new_inner(ReadRef::new_owned(ident))
88    }
89
90    pub fn add_modifier(&mut self, modifier: RcStr) {
91        debug_assert!(!modifier.is_empty(), "modifiers cannot be empty.");
92        self.modifiers.push(modifier);
93    }
94
95    pub fn add_asset(&mut self, key: RcStr, asset: ResolvedVc<AssetIdent>) {
96        self.assets.push((key, asset));
97    }
98
99    pub async fn rename_as_ref(&mut self, pattern: &str) -> Result<()> {
100        let root = self.path.root().await?;
101        let path = self.path.clone();
102        self.path = root.join(&pattern.replace('*', &path.path))?;
103        self.content_type = None;
104        Ok(())
105    }
106}
107
108#[turbo_tasks::value_impl]
109impl AssetIdent {
110    #[turbo_tasks::function]
111    fn new_inner(ident: ReadRef<AssetIdent>) -> Vc<Self> {
112        debug_assert!(
113            ident.query.is_empty() || ident.query.starts_with("?"),
114            "query should be empty or start with a `?`"
115        );
116        debug_assert!(
117            ident.fragment.is_empty() || ident.fragment.starts_with("#"),
118            "query should be empty or start with a `?`"
119        );
120        ReadRef::cell(ident)
121    }
122
123    /// Creates an [AssetIdent] from a [FileSystemPath]
124    #[turbo_tasks::function]
125    pub fn from_path(path: FileSystemPath) -> Vc<Self> {
126        Self::new(AssetIdent {
127            path,
128            query: RcStr::default(),
129            fragment: RcStr::default(),
130            assets: Vec::new(),
131            modifiers: Vec::new(),
132            parts: Vec::new(),
133            layer: None,
134            content_type: None,
135        })
136    }
137
138    #[turbo_tasks::function]
139    pub fn with_query(&self, query: RcStr) -> Vc<Self> {
140        let mut this = self.clone();
141        this.query = query;
142        Self::new(this)
143    }
144
145    #[turbo_tasks::function]
146    pub fn with_fragment(&self, fragment: RcStr) -> Vc<Self> {
147        let mut this = self.clone();
148        this.fragment = fragment;
149        Self::new(this)
150    }
151
152    #[turbo_tasks::function]
153    pub fn with_modifier(&self, modifier: RcStr) -> Vc<Self> {
154        let mut this = self.clone();
155        this.add_modifier(modifier);
156        Self::new(this)
157    }
158
159    #[turbo_tasks::function]
160    pub fn with_part(&self, part: ModulePart) -> Vc<Self> {
161        let mut this = self.clone();
162        this.parts.push(part);
163        Self::new(this)
164    }
165
166    #[turbo_tasks::function]
167    pub fn with_path(&self, path: FileSystemPath) -> Vc<Self> {
168        let mut this = self.clone();
169        this.path = path;
170        Self::new(this)
171    }
172
173    #[turbo_tasks::function]
174    pub fn with_layer(&self, layer: Layer) -> Vc<Self> {
175        let mut this = self.clone();
176        this.layer = Some(layer);
177        Self::new(this)
178    }
179
180    #[turbo_tasks::function]
181    pub fn with_content_type(&self, content_type: RcStr) -> Vc<Self> {
182        let mut this = self.clone();
183        this.content_type = Some(content_type);
184        Self::new(this)
185    }
186
187    #[turbo_tasks::function]
188    pub fn with_asset(&self, key: RcStr, asset: ResolvedVc<AssetIdent>) -> Vc<Self> {
189        let mut this = self.clone();
190        this.add_asset(key, asset);
191        Self::new(this)
192    }
193
194    #[turbo_tasks::function]
195    pub async fn rename_as(&self, pattern: RcStr) -> Result<Vc<Self>> {
196        let mut this = self.clone();
197        this.rename_as_ref(&pattern).await?;
198        Ok(Self::new(this))
199    }
200
201    #[turbo_tasks::function]
202    pub fn path(&self) -> Vc<FileSystemPath> {
203        self.path.clone().cell()
204    }
205
206    /// Computes a unique output asset name for the given asset identifier.
207    /// TODO(alexkirsz) This is `turbopack-browser` specific, as
208    /// `turbopack-nodejs` would use a content hash instead. But for now
209    /// both are using the same name generation logic.
210    #[turbo_tasks::function]
211    pub async fn output_name(
212        &self,
213        context_path: FileSystemPath,
214        prefix: Option<RcStr>,
215        expected_extension: RcStr,
216    ) -> Result<Vc<RcStr>> {
217        debug_assert!(
218            expected_extension.starts_with("."),
219            "the extension should include the leading '.', got '{expected_extension}'"
220        );
221        // TODO(PACK-2140): restrict character set to A–Za–z0–9-_.~'()
222        // to be compatible with all operating systems + URLs.
223
224        // For clippy -- This explicit deref is necessary
225        let path = &self.path;
226        let mut name = if let Some(inner) = context_path.get_path_to(path) {
227            escape_file_path(inner)
228        } else {
229            escape_file_path(&self.path.value_to_string().await?)
230        };
231        let removed_extension = name.ends_with(&*expected_extension);
232        if removed_extension {
233            name.truncate(name.len() - expected_extension.len());
234        }
235        // This step ensures that leading dots are not preserved in file names. This is
236        // important as some file servers do not serve files with leading dots (e.g.
237        // Next.js).
238        let mut name = clean_additional_extensions(&name);
239        if let Some(prefix) = prefix {
240            name = format!("{prefix}-{name}");
241        }
242
243        let default_modifier = match expected_extension.as_str() {
244            ".js" => Some("ecmascript"),
245            ".css" => Some("css"),
246            _ => None,
247        };
248
249        let mut hasher = Xxh3Hash64Hasher::new();
250        let mut has_hash = false;
251        let AssetIdent {
252            path: _,
253            query,
254            fragment,
255            assets,
256            modifiers,
257            parts,
258            layer,
259            content_type,
260        } = self;
261        if !query.is_empty() {
262            0_u8.deterministic_hash(&mut hasher);
263            query.deterministic_hash(&mut hasher);
264            has_hash = true;
265        }
266        if !fragment.is_empty() {
267            1_u8.deterministic_hash(&mut hasher);
268            fragment.deterministic_hash(&mut hasher);
269            has_hash = true;
270        }
271        for (key, ident) in assets.iter() {
272            2_u8.deterministic_hash(&mut hasher);
273            key.deterministic_hash(&mut hasher);
274            ident.to_string().await?.deterministic_hash(&mut hasher);
275            has_hash = true;
276        }
277        for modifier in modifiers.iter() {
278            if let Some(default_modifier) = default_modifier
279                && *modifier == default_modifier
280            {
281                continue;
282            }
283            3_u8.deterministic_hash(&mut hasher);
284            modifier.deterministic_hash(&mut hasher);
285            has_hash = true;
286        }
287        for part in parts.iter() {
288            4_u8.deterministic_hash(&mut hasher);
289            match part {
290                ModulePart::Evaluation => {
291                    1_u8.deterministic_hash(&mut hasher);
292                }
293                ModulePart::Export(export) => {
294                    2_u8.deterministic_hash(&mut hasher);
295                    export.deterministic_hash(&mut hasher);
296                }
297                ModulePart::RenamedExport {
298                    original_export,
299                    export,
300                } => {
301                    3_u8.deterministic_hash(&mut hasher);
302                    original_export.deterministic_hash(&mut hasher);
303                    export.deterministic_hash(&mut hasher);
304                }
305                ModulePart::RenamedNamespace { export } => {
306                    4_u8.deterministic_hash(&mut hasher);
307                    export.deterministic_hash(&mut hasher);
308                }
309                ModulePart::Internal(id) => {
310                    5_u8.deterministic_hash(&mut hasher);
311                    id.deterministic_hash(&mut hasher);
312                }
313                ModulePart::Locals => {
314                    6_u8.deterministic_hash(&mut hasher);
315                }
316                ModulePart::Exports => {
317                    7_u8.deterministic_hash(&mut hasher);
318                }
319                ModulePart::Facade => {
320                    8_u8.deterministic_hash(&mut hasher);
321                }
322            }
323
324            has_hash = true;
325        }
326        if let Some(layer) = layer {
327            5_u8.deterministic_hash(&mut hasher);
328            layer.deterministic_hash(&mut hasher);
329            has_hash = true;
330        }
331        if let Some(content_type) = content_type {
332            6_u8.deterministic_hash(&mut hasher);
333            content_type.deterministic_hash(&mut hasher);
334            has_hash = true;
335        }
336
337        if has_hash {
338            let hash = encode_hex(hasher.finish());
339            let truncated_hash = &hash[..8];
340            write!(name, "_{truncated_hash}")?;
341        }
342
343        // Location in "path" where hashed and named parts are split.
344        // Everything before i is hashed and after i named.
345        let mut i = 0;
346        static NODE_MODULES: &str = "_node_modules_";
347        if let Some(j) = name.rfind(NODE_MODULES) {
348            i = j + NODE_MODULES.len();
349        }
350        const MAX_FILENAME: usize = 80;
351        if name.len() - i > MAX_FILENAME {
352            i = name.len() - MAX_FILENAME;
353            if let Some(j) = name[i..].find('_')
354                && j < 20
355            {
356                i += j + 1;
357            }
358        }
359        if i > 0 {
360            let hash = encode_hex(hash_xxh3_hash64(&name.as_bytes()[..i]));
361            let truncated_hash = &hash[..5];
362            name = format!("{}_{}", truncated_hash, &name[i..]);
363        }
364        // We need to make sure that `.json` and `.json.js` doesn't end up with the same
365        // name. So when we add an extra extension when want to mark that with a "._"
366        // suffix.
367        if !removed_extension {
368            name += "._";
369        }
370        name += &expected_extension;
371        Ok(Vc::cell(name.into()))
372    }
373}
374
375#[turbo_tasks::value_impl]
376impl ValueToString for AssetIdent {
377    #[turbo_tasks::function]
378    async fn to_string(&self) -> Result<Vc<RcStr>> {
379        // The query string/fragment is either empty or non-empty starting with
380        // `?` so we can just concat
381        let mut s = turbofmt!("{}{}{}", self.path, self.query, self.fragment)
382            .await?
383            .into_owned();
384
385        if !self.assets.is_empty() {
386            s.push_str(" {");
387
388            for (i, (key, asset)) in self.assets.iter().enumerate() {
389                if i > 0 {
390                    s.push(',');
391                }
392
393                let asset_str = asset.to_string().await?;
394                write!(s, " {key} => {asset_str:?}")?;
395            }
396
397            s.push_str(" }");
398        }
399
400        if let Some(layer) = &self.layer {
401            write!(s, " [{}]", layer.name)?;
402        }
403
404        if !self.modifiers.is_empty() {
405            s.push_str(" (");
406
407            for (i, modifier) in self.modifiers.iter().enumerate() {
408                if i > 0 {
409                    s.push_str(", ");
410                }
411
412                s.push_str(modifier);
413            }
414
415            s.push(')');
416        }
417
418        if let Some(content_type) = &self.content_type {
419            write!(s, " <{content_type}>")?;
420        }
421
422        if !self.parts.is_empty() {
423            for part in self.parts.iter() {
424                if !matches!(part, ModulePart::Facade) {
425                    // facade is not included in ident as switching between facade and non-facade
426                    // shouldn't change the ident
427                    write!(s, " <{part}>")?;
428                }
429            }
430        }
431
432        Ok(Vc::cell(s.into()))
433    }
434}
435
436fn escape_file_path(s: &str) -> String {
437    static SEPARATOR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"[/#?:]").unwrap());
438    SEPARATOR_REGEX.replace_all(s, "_").to_string()
439}
440
441fn clean_additional_extensions(s: &str) -> String {
442    s.replace('.', "_")
443}
444
445#[cfg(test)]
446pub mod tests {
447    use turbo_rcstr::{RcStr, rcstr};
448    use turbo_tasks::Vc;
449    use turbo_tasks_backend::{BackendOptions, TurboTasksBackend, noop_backing_storage};
450    use turbo_tasks_fs::{FileSystem, VirtualFileSystem};
451
452    use crate::ident::AssetIdent;
453
454    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
455    async fn test_output_name_escaping() {
456        let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new(
457            BackendOptions::default(),
458            noop_backing_storage(),
459        ));
460        tt.run_once(async move {
461            #[turbo_tasks::function(operation)]
462            async fn output_name_operation() -> anyhow::Result<Vc<RcStr>> {
463                let fs = VirtualFileSystem::new_with_name(rcstr!("test"));
464                let root = fs.root().owned().await?;
465
466                let asset_ident = AssetIdent::from_path(root.join("a:b?c#d.js")?);
467                let output_name = asset_ident
468                    .output_name(root, Some(rcstr!("prefix")), rcstr!(".js"))
469                    .await?;
470                Ok(Vc::cell((*output_name).clone()))
471            }
472
473            let output_name = output_name_operation().read_strongly_consistent().await?;
474            assert_eq!(&*output_name, "prefix-a_b_c_d.js");
475
476            Ok(())
477        })
478        .await
479        .unwrap();
480    }
481}