turbopack_core/source_map/
utils.rs

1use std::{borrow::Cow, collections::HashSet, iter};
2
3use anyhow::{Context, Result};
4use const_format::concatcp;
5use once_cell::sync::Lazy;
6use regex::Regex;
7use serde::{Deserialize, Serialize};
8use serde_json::value::RawValue;
9use turbo_rcstr::RcStr;
10use turbo_tasks::{ResolvedVc, ValueToString};
11use turbo_tasks_fs::{
12    DiskFileSystem, FileContent, FileSystemPath, rope::Rope, util::uri_from_path_buf,
13};
14use url::Url;
15
16use crate::SOURCE_URL_PROTOCOL;
17
18pub fn add_default_ignore_list(map: &mut swc_sourcemap::SourceMap) {
19    let mut ignored_ids = HashSet::new();
20
21    for (source_id, source) in map.sources().enumerate() {
22        if source.starts_with(concatcp!(SOURCE_URL_PROTOCOL, "///[next]"))
23            || source.starts_with(concatcp!(SOURCE_URL_PROTOCOL, "///[turbopack]"))
24            || source.contains("/node_modules/")
25            || source.ends_with("__nextjs-internal-proxy.cjs")
26            || source.ends_with("__nextjs-internal-proxy.mjs")
27        {
28            ignored_ids.insert(source_id);
29        }
30    }
31
32    for ignored_id in ignored_ids {
33        map.add_to_ignore_list(ignored_id as _);
34    }
35}
36
37#[derive(Serialize, Deserialize)]
38struct SourceMapSectionOffsetJson {
39    line: u32,
40    offset: u32,
41}
42
43#[derive(Serialize, Deserialize)]
44struct SourceMapSectionItemJson {
45    offset: SourceMapSectionOffsetJson,
46    map: SourceMapJson,
47}
48
49// Some of these values use `Box<RawValue>`: If we don't read these fields (or rarely read these
50// fields) there's no point in decoding/encoding the data. Ideally they would be a `&RawValue`
51// reference, but we deserialize using `from_reader`, which does not support that.
52#[derive(Serialize, Deserialize)]
53#[serde(rename_all = "camelCase")]
54struct SourceMapJson {
55    version: u32,
56    #[serde(skip_serializing_if = "Option::is_none")]
57    file: Option<String>,
58    #[serde(skip_serializing_if = "Option::is_none")]
59    source_root: Option<String>,
60    // Technically a required field, but we don't want to error here.
61    #[serde(skip_serializing_if = "Option::is_none")]
62    sources: Option<Vec<Option<String>>>,
63    #[serde(skip_serializing_if = "Option::is_none")]
64    sources_content: Option<Vec<Option<Box<RawValue>>>>,
65    #[serde(skip_serializing_if = "Option::is_none")]
66    names: Option<Box<RawValue>>,
67    // We just need to hold onto `mappings` for serialization/deserialization, so there's no point
68    // in decoding/encoding the string. Store it as a `RawValue`. Ideally this would be a reference
69    // to the RawValue, but we deserialize using `from_reader`, which does not support that.
70    mappings: Box<RawValue>,
71    #[serde(skip_serializing_if = "Option::is_none")]
72    ignore_list: Option<Box<RawValue>>,
73
74    // A somewhat widespread non-standard extension
75    #[serde(skip_serializing_if = "Option::is_none")]
76    debug_id: Option<Box<RawValue>>,
77
78    #[serde(skip_serializing_if = "Option::is_none")]
79    sections: Option<Vec<SourceMapSectionItemJson>>,
80}
81
82/// Replace the origin prefix in the `file` and `sources` with `turbopack:///` and read the
83/// `sourceContent`s from disk.
84pub async fn resolve_source_map_sources(
85    map: Option<&Rope>,
86    origin: &FileSystemPath,
87) -> Result<Option<Rope>> {
88    let fs_vc = origin.fs().to_resolved().await?;
89    let fs_str = &*format!("[{}]", fs_vc.to_string().await?);
90
91    let disk_fs = if let Some(fs_vc) = ResolvedVc::try_downcast_type::<DiskFileSystem>(fs_vc) {
92        Some((fs_vc, fs_vc.await?))
93    } else {
94        None
95    };
96    let disk_fs = &disk_fs;
97
98    let resolve_source =
99        async |source_url: &mut String, source_content: Option<&mut Option<Box<RawValue>>>| {
100            // original_source should always be a URL (possibly a `file://` url). If it's a relative
101            // URL, it should be relative to `origin` (the generated file that's being mapped).
102            // https://developer.mozilla.org/en-US/docs/Learn_web_development/Howto/Web_mechanics/What_is_a_URL#absolute_urls_vs._relative_urls
103            let maybe_file_url = if source_url.starts_with("//") {
104                // looks like a "scheme-relative" URL
105                // Rewrite '//scheme/relative' -> 'file:///scheme/relative' (three slashes)
106                Cow::Owned(format!("file:/{source_url}"))
107            } else if source_url.starts_with('/') {
108                // looks like a "domain-relative" (aka "server-relative") URL
109                // Rewrite '/domain/relative' -> 'file:///domain/relative' (three slashes)
110                Cow::Owned(format!("file://{source_url}"))
111            } else {
112                Cow::Borrowed(source_url)
113            };
114
115            let fs_path = if let Ok(original_source_url_obj) = Url::parse(&maybe_file_url) {
116                // We have an absolute URL, try to parse it as a `file://` URL
117                if let Ok(sys_path) = original_source_url_obj.to_file_path() {
118                    if let Some((disk_fs_vc, disk_fs)) = disk_fs {
119                        disk_fs.try_from_sys_path(*disk_fs_vc, &sys_path, Some(origin))
120                    } else {
121                        None
122                    }
123                } else {
124                    // this is an absolute URL with a non-`file://` scheme, just assume it's valid
125                    // and don't modify anything
126                    return Ok(());
127                }
128            } else {
129                // assume it's a relative URL, and just remove any percent encoding from path
130                // segments. Our internal path format is POSIX-like, without percent encoding.
131                origin.parent().try_join(
132                    &urlencoding::decode(source_url).unwrap_or(Cow::Borrowed(source_url)),
133                )?
134            };
135
136            if let Some(fs_path) = fs_path {
137                // TODO: Encode `fs_str` and `fs_path_str` using `urlencoding`, so that these are
138                // valid URLs. However, `project_trace_source_operation` (and `uri_from_file`) need
139                // to handle percent encoding correctly first.
140                let fs_path_str = &fs_path.path;
141                *source_url = format!("{SOURCE_URL_PROTOCOL}///{fs_str}/{fs_path_str}");
142
143                if let Some(source_content) = source_content
144                    && source_content.is_none()
145                {
146                    if let FileContent::Content(file) = &*fs_path.read().await? {
147                        let text = file.content().to_str()?;
148                        *source_content = Some(unencoded_str_to_raw_value(&text));
149                    } else {
150                        *source_content = Some(unencoded_str_to_raw_value(&format!(
151                            "unable to read source {fs_str}/{fs_path_str}"
152                        )));
153                    }
154                }
155            } else {
156                // The URL was broken somehow, create a dummy `turbopack://` URL and content
157                let origin_str = &origin.path;
158                if let Some(source_content) = source_content
159                    && source_content.is_none()
160                {
161                    *source_content = Some(unencoded_str_to_raw_value(&format!(
162                        "unable to access {source_url} in {fs_str}/{origin_str} (it's leaving the \
163                         filesystem root)"
164                    )));
165                }
166                static INVALID_REGEX: Lazy<Regex> =
167                    Lazy::new(|| Regex::new(r#"(?:^|/)(?:\.\.?(?:/|$))+"#).unwrap());
168                let source = INVALID_REGEX
169                    .replace_all(source_url, |s: &regex::Captures<'_>| s[0].replace('.', "_"));
170                *source_url = format!("{SOURCE_URL_PROTOCOL}///{fs_str}/{origin_str}/{source}");
171            }
172            anyhow::Ok(())
173        };
174
175    let resolve_map = async |map: &mut SourceMapJson| {
176        if let Some(sources) = &mut map.sources {
177            let mut contents = if let Some(mut contents) = map.sources_content.take() {
178                contents.resize(sources.len(), None);
179                contents
180            } else {
181                iter::repeat_n(None, sources.len()).collect()
182            };
183
184            for (source, content) in sources.iter_mut().zip(contents.iter_mut()) {
185                if let Some(source) = source {
186                    if let Some(source_root) = &map.source_root {
187                        *source = format!("{source_root}{source}");
188                    }
189                    resolve_source(source, Some(content)).await?;
190                }
191            }
192
193            map.source_root = None;
194            map.sources_content = Some(contents);
195        }
196        anyhow::Ok(())
197    };
198
199    let Some(map) = map else {
200        return Ok(None);
201    };
202
203    let Ok(mut map): serde_json::Result<SourceMapJson> = serde_json::from_reader(map.read()) else {
204        // Silently ignore invalid sourcemaps
205        return Ok(None);
206    };
207
208    if let Some(file) = &mut map.file {
209        resolve_source(file, None).await?;
210    }
211
212    resolve_map(&mut map).await?;
213    for section in map.sections.iter_mut().flatten() {
214        resolve_map(&mut section.map).await?;
215    }
216
217    let map = Rope::from(serde_json::to_vec(&map)?);
218    Ok(Some(map))
219}
220
221fn unencoded_str_to_raw_value(unencoded: &str) -> Box<RawValue> {
222    RawValue::from_string(
223        serde_json::to_string(unencoded)
224            .expect("serialization of a utf-8 string should always succeed"),
225    )
226    .expect("serde_json::to_string should produce valid JSON")
227}
228
229/// Helper function to transform turbopack:/// file references in a sourcemap.
230/// Handles parsing the sourcemap, resolving the filesystem, applying transformations, and
231/// serializing back.
232/// The transform function is given the source string as found in the sourcemap (i.e. a URI).
233async fn transform_relative_files<F>(
234    map: Option<&Rope>,
235    context_path: &FileSystemPath,
236    mut transform: F,
237) -> Result<Option<Rope>>
238where
239    F: FnMut(&DiskFileSystem, &str) -> Result<String>,
240{
241    let Some(map) = map else {
242        return Ok(None);
243    };
244
245    let Ok(mut map): serde_json::Result<SourceMapJson> = serde_json::from_reader(map.read()) else {
246        // Silently ignore invalid sourcemaps
247        return Ok(None);
248    };
249
250    let context_fs = context_path.fs;
251    let context_fs = &*ResolvedVc::try_downcast_type::<DiskFileSystem>(context_fs)
252        .context("Expected the chunking context to have a DiskFileSystem")?
253        .await?;
254
255    let prefix = format!("{}///[{}]/", SOURCE_URL_PROTOCOL, context_fs.name());
256
257    let mut apply_transform = |src: &mut String| -> Result<()> {
258        if let Some(src_rest) = src.strip_prefix(&prefix) {
259            *src = transform(context_fs, src_rest)?;
260        }
261        Ok(())
262    };
263
264    for src in map.sources.iter_mut().flatten().flatten() {
265        apply_transform(src)?;
266    }
267    for section in map.sections.iter_mut().flatten() {
268        for src in section.map.sources.iter_mut().flatten().flatten() {
269            apply_transform(src)?;
270        }
271    }
272
273    Ok(Some(Rope::from(serde_json::to_vec(&map)?)))
274}
275
276/// Turns `turbopack:///[project]` references in sourcemap sources into absolute `file://` uris. This
277/// is useful for debugging environments.
278pub async fn absolute_fileify_source_map(
279    map: Option<&Rope>,
280    context_path: FileSystemPath,
281) -> Result<Option<Rope>> {
282    transform_relative_files(map, &context_path, |context_fs, src_rest| {
283        let path = context_path.join(src_rest)?;
284
285        Ok(uri_from_path_buf(context_fs.to_sys_path(&path)))
286    })
287    .await
288}
289
290fn uri_encode_path(path: &str) -> String {
291    path.split('/')
292        .map(|s| urlencoding::encode(s))
293        .collect::<Vec<_>>()
294        .join("/")
295}
296/// Turns `turbopack:///[project]` references in sourcemap sources into relative './' prefixed uris.
297/// This is useful in server environments and especially build environments.
298pub async fn relative_fileify_source_map(
299    map: Option<&Rope>,
300    context_path: FileSystemPath,
301    relative_path_to_output_root: RcStr,
302) -> Result<Option<Rope>> {
303    let relative_path_to_output_root = relative_path_to_output_root
304        .split('/')
305        .map(|s| urlencoding::encode(s))
306        .collect::<Vec<_>>()
307        .join("/");
308    transform_relative_files(map, &context_path, |_context_fs, src_rest| {
309        // NOTE: we just include the relative path prefix here instead of using `sourceRoot`
310        // since the spec on sourceRoot is broken.
311
312        // TODO(bgw): this shouldn't be necessary to uri encode since the strings we get out of the
313        // source map should already be uri encoded, however in the case of the turbopack scheme in
314        // particular we are inconsistent so be defensive here.
315        let src_rest = uri_encode_path(src_rest);
316        if relative_path_to_output_root.is_empty() {
317            Ok(src_rest.to_string())
318        } else {
319            Ok(format!("{relative_path_to_output_root}/{src_rest}",))
320        }
321    })
322    .await
323}
324
325#[cfg(test)]
326mod tests {
327    use std::path::Path;
328
329    use turbo_rcstr::{RcStr, rcstr};
330    use turbo_tasks_backend::{BackendOptions, TurboTasksBackend, noop_backing_storage};
331    use turbo_tasks_fs::FileSystem;
332
333    use super::*;
334
335    fn source_map_rope<'a>(
336        source_root: Option<&str>,
337        sources: impl IntoIterator<Item = &'a str>,
338    ) -> Rope {
339        Rope::from(
340            serde_json::to_string_pretty(
341                &serde_json::from_value::<SourceMapJson>(serde_json::json!({
342                    "version": 3,
343                    "mappings": "",
344                    "sourceRoot": source_root,
345                    "sources": sources.into_iter().map(Some).collect::<Vec<_>>(),
346                }))
347                .unwrap(),
348            )
349            .unwrap(),
350        )
351    }
352
353    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
354    async fn test_resolve_source_map_sources() {
355        let sys_root = if cfg!(windows) {
356            Path::new(r"C:\fake\root")
357        } else {
358            Path::new(r"/fake/root")
359        };
360        let url_root = Url::from_directory_path(sys_root).unwrap();
361
362        let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new(
363            BackendOptions::default(),
364            noop_backing_storage(),
365        ));
366        tt.run_once(async move {
367            let fs_root_path =
368                DiskFileSystem::new(rcstr!("mock"), RcStr::from(sys_root.to_str().unwrap()))
369                    .root()
370                    .await?;
371
372            let resolved_source_map: SourceMapJson = serde_json::from_str(
373                &resolve_source_map_sources(
374                    Some(&source_map_rope(
375                        /* source_root */ None,
376                        [
377                            "page.js",
378                            "./current-dir-page.js",
379                            "../other%20route/page.js",
380                            // contains the file:// protocol/scheme
381                            url_root.join("absolute%20file%20url.js").unwrap().as_str(),
382                            // A server-relative path starting with `/`, potentially includes a
383                            // windows disk
384                            &format!("{}/server%20relative%20path.js", url_root.path()),
385                            // A scheme-relative path
386                            url_root
387                                .join("scheme%20relative%20path.js")
388                                .unwrap()
389                                .as_str()
390                                .strip_prefix("file:")
391                                .unwrap(),
392                            // non-file URLs are preserved
393                            "https://example.com/page%20path.js",
394                        ],
395                    )),
396                    // NOTE: the percent encoding here should NOT be decoded, as this is not part
397                    // of a `file://` URL
398                    &fs_root_path.join("app/source%20mapped/page.js").unwrap(),
399                )
400                .await?
401                .unwrap()
402                .to_str()
403                .unwrap(),
404            )
405            .unwrap();
406
407            let prefix = format!("{SOURCE_URL_PROTOCOL}///[mock]");
408            assert_eq!(
409                resolved_source_map.sources,
410                Some(vec![
411                    Some(format!("{prefix}/app/source%20mapped/page.js")),
412                    Some(format!("{prefix}/app/source%20mapped/current-dir-page.js")),
413                    Some(format!("{prefix}/app/other route/page.js")),
414                    Some(format!("{prefix}/absolute file url.js")),
415                    Some(format!("{prefix}/server relative path.js")),
416                    Some(format!("{prefix}/scheme relative path.js")),
417                    Some("https://example.com/page%20path.js".to_owned()),
418                ])
419            );
420
421            // try with a `source_root`
422            let resolved_source_map: SourceMapJson = serde_json::from_str(
423                &resolve_source_map_sources(
424                    Some(&source_map_rope(
425                        // NOTE: these should get literally concated, a slash should NOT get added.
426                        Some("../source%20root%20"),
427                        ["page.js"],
428                    )),
429                    &fs_root_path.join("app/page.js").unwrap(),
430                )
431                .await?
432                .unwrap()
433                .to_str()
434                .unwrap(),
435            )
436            .unwrap();
437
438            assert_eq!(
439                resolved_source_map.sources,
440                Some(vec![Some(format!("{prefix}/source root page.js")),])
441            );
442
443            anyhow::Ok(())
444        })
445        .await
446        .unwrap();
447    }
448}