turbopack_core/
ident.rs

1use std::fmt::Write;
2
3use anyhow::Result;
4use once_cell::sync::Lazy;
5use regex::Regex;
6use serde::{Deserialize, Serialize};
7use turbo_rcstr::RcStr;
8use turbo_tasks::{NonLocalValue, ResolvedVc, TaskInput, ValueToString, Vc, trace::TraceRawVcs};
9use turbo_tasks_fs::FileSystemPath;
10use turbo_tasks_hash::{DeterministicHash, Xxh3Hash64Hasher, encode_hex, hash_xxh3_hash64};
11
12use crate::resolve::ModulePart;
13
14/// A layer identifies a distinct part of the module graph.
15#[derive(
16    Clone,
17    TaskInput,
18    Hash,
19    Debug,
20    DeterministicHash,
21    Eq,
22    PartialEq,
23    TraceRawVcs,
24    Serialize,
25    Deserialize,
26    NonLocalValue,
27)]
28pub struct Layer {
29    name: RcStr,
30    user_friendly_name: Option<RcStr>,
31}
32
33impl Layer {
34    pub fn new(name: RcStr) -> Self {
35        debug_assert!(!name.is_empty());
36        Self {
37            name,
38            user_friendly_name: None,
39        }
40    }
41    pub fn new_with_user_friendly_name(name: RcStr, user_friendly_name: RcStr) -> Self {
42        debug_assert!(!name.is_empty());
43        debug_assert!(!user_friendly_name.is_empty());
44        Self {
45            name,
46            user_friendly_name: Some(user_friendly_name),
47        }
48    }
49
50    /// Returns a user friendly name for this layer
51    pub fn user_friendly_name(&self) -> &RcStr {
52        self.user_friendly_name.as_ref().unwrap_or(&self.name)
53    }
54
55    pub fn name(&self) -> &RcStr {
56        &self.name
57    }
58}
59
60#[turbo_tasks::value]
61#[derive(Clone, Debug, Hash, TaskInput)]
62pub struct AssetIdent {
63    /// The primary path of the asset
64    pub path: FileSystemPath,
65    /// The query string of the asset this is either the empty string or a query string that starts
66    /// with a `?` (e.g. `?foo=bar`)
67    pub query: RcStr,
68    /// The fragment of the asset, this is either the empty string or a fragment string that starts
69    /// with a `#` (e.g. `#foo`)
70    pub fragment: RcStr,
71    /// The assets that are nested in this asset
72    pub assets: Vec<(RcStr, ResolvedVc<AssetIdent>)>,
73    /// The modifiers of this asset (e.g. `client chunks`)
74    pub modifiers: Vec<RcStr>,
75    /// The parts of the asset that are (ECMAScript) modules
76    pub parts: Vec<ModulePart>,
77    /// The asset layer the asset was created from.
78    pub layer: Option<Layer>,
79    /// The MIME content type, if this asset was created from a data URL.
80    pub content_type: Option<RcStr>,
81}
82
83impl AssetIdent {
84    pub fn add_modifier(&mut self, modifier: RcStr) {
85        debug_assert!(!modifier.is_empty(), "modifiers cannot be empty.");
86        self.modifiers.push(modifier);
87    }
88
89    pub fn add_asset(&mut self, key: RcStr, asset: ResolvedVc<AssetIdent>) {
90        self.assets.push((key, asset));
91    }
92
93    pub async fn rename_as_ref(&mut self, pattern: &str) -> Result<()> {
94        let root = self.path.root().await?;
95        let path = self.path.clone();
96        self.path = root.join(&pattern.replace('*', &path.path))?;
97        Ok(())
98    }
99}
100
101#[turbo_tasks::value_impl]
102impl ValueToString for AssetIdent {
103    #[turbo_tasks::function]
104    async fn to_string(&self) -> Result<Vc<RcStr>> {
105        let mut s = self.path.value_to_string().owned().await?.into_owned();
106
107        // The query string is either empty or non-empty starting with `?` so we can just concat
108        s.push_str(&self.query);
109        // ditto for fragment
110        s.push_str(&self.fragment);
111
112        if !self.assets.is_empty() {
113            s.push_str(" {");
114
115            for (i, (key, asset)) in self.assets.iter().enumerate() {
116                if i > 0 {
117                    s.push(',');
118                }
119
120                let asset_str = asset.to_string().await?;
121                write!(s, " {key} => {asset_str:?}")?;
122            }
123
124            s.push_str(" }");
125        }
126
127        if let Some(layer) = &self.layer {
128            write!(s, " [{}]", layer.name)?;
129        }
130
131        if !self.modifiers.is_empty() {
132            s.push_str(" (");
133
134            for (i, modifier) in self.modifiers.iter().enumerate() {
135                if i > 0 {
136                    s.push_str(", ");
137                }
138
139                s.push_str(modifier);
140            }
141
142            s.push(')');
143        }
144
145        if let Some(content_type) = &self.content_type {
146            write!(s, " <{content_type}>")?;
147        }
148
149        if !self.parts.is_empty() {
150            for part in self.parts.iter() {
151                if !matches!(part, ModulePart::Facade) {
152                    // facade is not included in ident as switching between facade and non-facade
153                    // shouldn't change the ident
154                    write!(s, " <{part}>")?;
155                }
156            }
157        }
158
159        Ok(Vc::cell(s.into()))
160    }
161}
162
163#[turbo_tasks::value_impl]
164impl AssetIdent {
165    #[turbo_tasks::function]
166    pub fn new(ident: AssetIdent) -> Vc<Self> {
167        debug_assert!(
168            ident.query.is_empty() || ident.query.starts_with("?"),
169            "query should be empty or start with a `?`"
170        );
171        debug_assert!(
172            ident.fragment.is_empty() || ident.fragment.starts_with("#"),
173            "query should be empty or start with a `?`"
174        );
175        ident.cell()
176    }
177
178    /// Creates an [AssetIdent] from a [FileSystemPath]
179    #[turbo_tasks::function]
180    pub fn from_path(path: FileSystemPath) -> Vc<Self> {
181        Self::new(AssetIdent {
182            path,
183            query: RcStr::default(),
184            fragment: RcStr::default(),
185            assets: Vec::new(),
186            modifiers: Vec::new(),
187            parts: Vec::new(),
188            layer: None,
189            content_type: None,
190        })
191    }
192
193    #[turbo_tasks::function]
194    pub fn with_query(&self, query: RcStr) -> Vc<Self> {
195        let mut this = self.clone();
196        this.query = query;
197        Self::new(this)
198    }
199
200    #[turbo_tasks::function]
201    pub fn with_fragment(&self, fragment: RcStr) -> Vc<Self> {
202        let mut this = self.clone();
203        this.fragment = fragment;
204        Self::new(this)
205    }
206
207    #[turbo_tasks::function]
208    pub fn with_modifier(&self, modifier: RcStr) -> Vc<Self> {
209        let mut this = self.clone();
210        this.add_modifier(modifier);
211        Self::new(this)
212    }
213
214    #[turbo_tasks::function]
215    pub fn with_part(&self, part: ModulePart) -> Vc<Self> {
216        let mut this = self.clone();
217        this.parts.push(part);
218        Self::new(this)
219    }
220
221    #[turbo_tasks::function]
222    pub fn with_path(&self, path: FileSystemPath) -> Vc<Self> {
223        let mut this = self.clone();
224        this.path = path;
225        Self::new(this)
226    }
227
228    #[turbo_tasks::function]
229    pub fn with_layer(&self, layer: Layer) -> Vc<Self> {
230        let mut this = self.clone();
231        this.layer = Some(layer);
232        Self::new(this)
233    }
234
235    #[turbo_tasks::function]
236    pub fn with_content_type(&self, content_type: RcStr) -> Vc<Self> {
237        let mut this = self.clone();
238        this.content_type = Some(content_type);
239        Self::new(this)
240    }
241
242    #[turbo_tasks::function]
243    pub fn with_asset(&self, key: RcStr, asset: ResolvedVc<AssetIdent>) -> Vc<Self> {
244        let mut this = self.clone();
245        this.add_asset(key, asset);
246        Self::new(this)
247    }
248
249    #[turbo_tasks::function]
250    pub async fn rename_as(&self, pattern: RcStr) -> Result<Vc<Self>> {
251        let mut this = self.clone();
252        this.rename_as_ref(&pattern).await?;
253        Ok(Self::new(this))
254    }
255
256    #[turbo_tasks::function]
257    pub fn path(&self) -> Vc<FileSystemPath> {
258        self.path.clone().cell()
259    }
260
261    /// Computes a unique output asset name for the given asset identifier.
262    /// TODO(alexkirsz) This is `turbopack-browser` specific, as
263    /// `turbopack-nodejs` would use a content hash instead. But for now
264    /// both are using the same name generation logic.
265    #[turbo_tasks::function]
266    pub async fn output_name(
267        &self,
268        context_path: FileSystemPath,
269        expected_extension: RcStr,
270    ) -> Result<Vc<RcStr>> {
271        debug_assert!(
272            expected_extension.starts_with("."),
273            "the extension should include the leading '.', got '{expected_extension}'"
274        );
275        // TODO(PACK-2140): restrict character set to A–Za–z0–9-_.~'()
276        // to be compatible with all operating systems + URLs.
277
278        // For clippy -- This explicit deref is necessary
279        let path = &self.path;
280        let mut name = if let Some(inner) = context_path.get_path_to(path) {
281            clean_separators(inner)
282        } else {
283            clean_separators(&self.path.value_to_string().await?)
284        };
285        let removed_extension = name.ends_with(&*expected_extension);
286        if removed_extension {
287            name.truncate(name.len() - expected_extension.len());
288        }
289        // This step ensures that leading dots are not preserved in file names. This is
290        // important as some file servers do not serve files with leading dots (e.g.
291        // Next.js).
292        let mut name = clean_additional_extensions(&name);
293
294        let default_modifier = match expected_extension.as_str() {
295            ".js" => Some("ecmascript"),
296            ".css" => Some("css"),
297            _ => None,
298        };
299
300        let mut hasher = Xxh3Hash64Hasher::new();
301        let mut has_hash = false;
302        let AssetIdent {
303            path: _,
304            query,
305            fragment,
306            assets,
307            modifiers,
308            parts,
309            layer,
310            content_type,
311        } = self;
312        if !query.is_empty() {
313            0_u8.deterministic_hash(&mut hasher);
314            query.deterministic_hash(&mut hasher);
315            has_hash = true;
316        }
317        if !fragment.is_empty() {
318            1_u8.deterministic_hash(&mut hasher);
319            fragment.deterministic_hash(&mut hasher);
320            has_hash = true;
321        }
322        for (key, ident) in assets.iter() {
323            2_u8.deterministic_hash(&mut hasher);
324            key.deterministic_hash(&mut hasher);
325            ident.to_string().await?.deterministic_hash(&mut hasher);
326            has_hash = true;
327        }
328        for modifier in modifiers.iter() {
329            if let Some(default_modifier) = default_modifier
330                && *modifier == default_modifier
331            {
332                continue;
333            }
334            3_u8.deterministic_hash(&mut hasher);
335            modifier.deterministic_hash(&mut hasher);
336            has_hash = true;
337        }
338        for part in parts.iter() {
339            4_u8.deterministic_hash(&mut hasher);
340            match part {
341                ModulePart::Evaluation => {
342                    1_u8.deterministic_hash(&mut hasher);
343                }
344                ModulePart::Export(export) => {
345                    2_u8.deterministic_hash(&mut hasher);
346                    export.deterministic_hash(&mut hasher);
347                }
348                ModulePart::RenamedExport {
349                    original_export,
350                    export,
351                } => {
352                    3_u8.deterministic_hash(&mut hasher);
353                    original_export.deterministic_hash(&mut hasher);
354                    export.deterministic_hash(&mut hasher);
355                }
356                ModulePart::RenamedNamespace { export } => {
357                    4_u8.deterministic_hash(&mut hasher);
358                    export.deterministic_hash(&mut hasher);
359                }
360                ModulePart::Internal(id) => {
361                    5_u8.deterministic_hash(&mut hasher);
362                    id.deterministic_hash(&mut hasher);
363                }
364                ModulePart::Locals => {
365                    6_u8.deterministic_hash(&mut hasher);
366                }
367                ModulePart::Exports => {
368                    7_u8.deterministic_hash(&mut hasher);
369                }
370                ModulePart::Facade => {
371                    8_u8.deterministic_hash(&mut hasher);
372                }
373            }
374
375            has_hash = true;
376        }
377        if let Some(layer) = layer {
378            5_u8.deterministic_hash(&mut hasher);
379            layer.deterministic_hash(&mut hasher);
380            has_hash = true;
381        }
382        if let Some(content_type) = content_type {
383            6_u8.deterministic_hash(&mut hasher);
384            content_type.deterministic_hash(&mut hasher);
385            has_hash = true;
386        }
387
388        if has_hash {
389            let hash = encode_hex(hasher.finish());
390            let truncated_hash = &hash[..8];
391            write!(name, "_{truncated_hash}")?;
392        }
393
394        // Location in "path" where hashed and named parts are split.
395        // Everything before i is hashed and after i named.
396        let mut i = 0;
397        static NODE_MODULES: &str = "_node_modules_";
398        if let Some(j) = name.rfind(NODE_MODULES) {
399            i = j + NODE_MODULES.len();
400        }
401        const MAX_FILENAME: usize = 80;
402        if name.len() - i > MAX_FILENAME {
403            i = name.len() - MAX_FILENAME;
404            if let Some(j) = name[i..].find('_')
405                && j < 20
406            {
407                i += j + 1;
408            }
409        }
410        if i > 0 {
411            let hash = encode_hex(hash_xxh3_hash64(&name.as_bytes()[..i]));
412            let truncated_hash = &hash[..5];
413            name = format!("{}_{}", truncated_hash, &name[i..]);
414        }
415        // We need to make sure that `.json` and `.json.js` doesn't end up with the same
416        // name. So when we add an extra extension when want to mark that with a "._"
417        // suffix.
418        if !removed_extension {
419            name += "._";
420        }
421        name += &expected_extension;
422        Ok(Vc::cell(name.into()))
423    }
424}
425
426fn clean_separators(s: &str) -> String {
427    static SEPARATOR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"[/#?]").unwrap());
428    SEPARATOR_REGEX.replace_all(s, "_").to_string()
429}
430
431fn clean_additional_extensions(s: &str) -> String {
432    s.replace('.', "_")
433}