Skip to main content

turbopack_core/source_map/
utils.rs

1use std::{borrow::Cow, collections::HashSet, iter};
2
3use anyhow::{Context, Result};
4use const_format::concatcp;
5use once_cell::sync::Lazy;
6use regex::Regex;
7use serde::{Deserialize, Serialize};
8use serde_json::value::RawValue;
9use turbo_rcstr::RcStr;
10use turbo_tasks::{ResolvedVc, turbofmt};
11use turbo_tasks_fs::{
12    DiskFileSystem, FileContent, FileSystemPath, rope::Rope, util::uri_from_path_buf,
13};
14use url::Url;
15
16use crate::SOURCE_URL_PROTOCOL;
17
18pub fn add_default_ignore_list(map: &mut swc_sourcemap::SourceMap) {
19    let mut ignored_ids = HashSet::new();
20
21    for (source_id, source) in map.sources().enumerate() {
22        if source.starts_with(concatcp!(SOURCE_URL_PROTOCOL, "///[next]"))
23            || source.starts_with(concatcp!(SOURCE_URL_PROTOCOL, "///[turbopack]"))
24            || source.contains("/node_modules/")
25            || source.ends_with("__nextjs-internal-proxy.cjs")
26            || source.ends_with("__nextjs-internal-proxy.mjs")
27        {
28            ignored_ids.insert(source_id);
29        }
30    }
31
32    for ignored_id in ignored_ids {
33        map.add_to_ignore_list(ignored_id as _);
34    }
35}
36
37#[derive(Serialize, Deserialize)]
38struct SourceMapSectionOffsetJson {
39    line: u32,
40    offset: u32,
41}
42
43#[derive(Serialize, Deserialize)]
44struct SourceMapSectionItemJson {
45    offset: SourceMapSectionOffsetJson,
46    map: SourceMapJson,
47}
48
49// Some of these values use `Box<RawValue>`: If we don't read these fields (or rarely read these
50// fields) there's no point in decoding/encoding the data. Ideally they would be a `&RawValue`
51// reference, but we deserialize using `from_reader`, which does not support that.
52#[derive(Serialize, Deserialize)]
53#[serde(rename_all = "camelCase")]
54struct SourceMapJson {
55    version: u32,
56    #[serde(skip_serializing_if = "Option::is_none")]
57    file: Option<String>,
58    #[serde(skip_serializing_if = "Option::is_none")]
59    source_root: Option<String>,
60    // Technically a required field, but we don't want to error here.
61    #[serde(skip_serializing_if = "Option::is_none")]
62    sources: Option<Vec<Option<String>>>,
63    #[serde(skip_serializing_if = "Option::is_none")]
64    sources_content: Option<Vec<Option<Box<RawValue>>>>,
65    #[serde(skip_serializing_if = "Option::is_none")]
66    names: Option<Box<RawValue>>,
67    // We just need to hold onto `mappings` for serialization/deserialization, so there's no point
68    // in decoding/encoding the string. Store it as a `RawValue`. Ideally this would be a reference
69    // to the RawValue, but we deserialize using `from_reader`, which does not support that.
70    mappings: Box<RawValue>,
71    #[serde(skip_serializing_if = "Option::is_none")]
72    ignore_list: Option<Box<RawValue>>,
73
74    // A somewhat widespread non-standard extension
75    #[serde(skip_serializing_if = "Option::is_none")]
76    debug_id: Option<Box<RawValue>>,
77
78    #[serde(skip_serializing_if = "Option::is_none")]
79    sections: Option<Vec<SourceMapSectionItemJson>>,
80}
81
82/// Replace the origin prefix in the `file` and `sources` with `turbopack:///` and read the
83/// `sourceContent`s from disk.
84pub async fn resolve_source_map_sources(
85    map: Option<&Rope>,
86    origin: &FileSystemPath,
87) -> Result<Option<Rope>> {
88    let fs_vc = origin.fs().to_resolved().await?;
89    let fs_str = &*turbofmt!("[{fs_vc}]").await?;
90
91    let disk_fs = if let Some(fs_vc) = ResolvedVc::try_downcast_type::<DiskFileSystem>(fs_vc) {
92        Some((fs_vc, fs_vc.await?))
93    } else {
94        None
95    };
96    let disk_fs = &disk_fs;
97
98    let resolve_source =
99        async |source_url: &mut String, source_content: Option<&mut Option<Box<RawValue>>>| {
100            // original_source should always be a URL (possibly a `file://` url). If it's a relative
101            // URL, it should be relative to `origin` (the generated file that's being mapped).
102            // https://developer.mozilla.org/en-US/docs/Learn_web_development/Howto/Web_mechanics/What_is_a_URL#absolute_urls_vs._relative_urls
103            let maybe_file_url = if source_url.starts_with("//") {
104                // looks like a "scheme-relative" URL
105                // Rewrite '//scheme/relative' -> 'file:///scheme/relative' (three slashes)
106                Cow::Owned(format!("file:/{source_url}"))
107            } else if source_url.starts_with('/') {
108                // looks like a "domain-relative" (aka "server-relative") URL
109                // Rewrite '/domain/relative' -> 'file:///domain/relative' (three slashes)
110                Cow::Owned(format!("file://{source_url}"))
111            } else {
112                Cow::Borrowed(source_url)
113            };
114
115            let fs_path = if let Ok(original_source_url_obj) = Url::parse(&maybe_file_url) {
116                // We have an absolute URL, try to parse it as a `file://` URL
117                if let Ok(sys_path) = original_source_url_obj.to_file_path() {
118                    if let Some((disk_fs_vc, disk_fs)) = disk_fs {
119                        disk_fs.try_from_sys_path(*disk_fs_vc, &sys_path, Some(origin))
120                    } else {
121                        None
122                    }
123                } else {
124                    // this is an absolute URL with a non-`file://` scheme, just assume it's valid
125                    // and don't modify anything
126                    return Ok(());
127                }
128            } else {
129                // assume it's a relative URL, and just remove any percent encoding from path
130                // segments. Our internal path format is POSIX-like, without percent encoding.
131                origin
132                    .parent()
133                    .try_join(&urlencoding::decode(source_url).unwrap_or(Cow::Borrowed(source_url)))
134            };
135
136            if let Some(fs_path) = fs_path {
137                // TODO: Encode `fs_str` and `fs_path_str` using `urlencoding`, so that these are
138                // valid URLs. However, `project_trace_source_operation` (and `uri_from_file`) need
139                // to handle percent encoding correctly first.
140                let fs_path_str = &fs_path.path;
141                *source_url = format!("{SOURCE_URL_PROTOCOL}///{fs_str}/{fs_path_str}");
142
143                if let Some(source_content) = source_content
144                    && source_content.is_none()
145                {
146                    if let FileContent::Content(file) = &*fs_path.read().await? {
147                        let text = file.content().to_str()?;
148                        *source_content = Some(unencoded_str_to_raw_value(&text));
149                    } else {
150                        *source_content = Some(unencoded_str_to_raw_value(&format!(
151                            "unable to read source {fs_str}/{fs_path_str}"
152                        )));
153                    }
154                }
155            } else {
156                // The URL was broken somehow, create a dummy `turbopack://` URL and content
157                let origin_str = &origin.path;
158                if let Some(source_content) = source_content
159                    && source_content.is_none()
160                {
161                    *source_content = Some(unencoded_str_to_raw_value(&format!(
162                        "unable to access {source_url} in {fs_str}/{origin_str} (it's leaving the \
163                         filesystem root)"
164                    )));
165                }
166                static INVALID_REGEX: Lazy<Regex> =
167                    Lazy::new(|| Regex::new(r#"(?:^|/)(?:\.\.?(?:/|$))+"#).unwrap());
168                let source = INVALID_REGEX
169                    .replace_all(source_url, |s: &regex::Captures<'_>| s[0].replace('.', "_"));
170                *source_url = format!("{SOURCE_URL_PROTOCOL}///{fs_str}/{origin_str}/{source}");
171            }
172            anyhow::Ok(())
173        };
174
175    let resolve_map = async |map: &mut SourceMapJson| {
176        if let Some(sources) = &mut map.sources {
177            let mut contents = if let Some(mut contents) = map.sources_content.take() {
178                contents.resize(sources.len(), None);
179                contents
180            } else {
181                iter::repeat_n(None, sources.len()).collect()
182            };
183
184            for (source, content) in sources.iter_mut().zip(contents.iter_mut()) {
185                if let Some(source) = source {
186                    if let Some(source_root) = &map.source_root {
187                        *source = format!("{source_root}{source}");
188                    }
189                    resolve_source(source, Some(content)).await?;
190                }
191            }
192
193            map.source_root = None;
194            map.sources_content = Some(contents);
195        }
196        anyhow::Ok(())
197    };
198
199    let Some(map) = map else {
200        return Ok(None);
201    };
202
203    let Ok(mut map): serde_json::Result<SourceMapJson> = serde_json::from_reader(map.read()) else {
204        // Silently ignore invalid sourcemaps
205        return Ok(None);
206    };
207
208    if let Some(file) = &mut map.file {
209        resolve_source(file, None).await?;
210    }
211
212    resolve_map(&mut map).await?;
213    for section in map.sections.iter_mut().flatten() {
214        resolve_map(&mut section.map).await?;
215    }
216
217    let map = Rope::from(serde_json::to_vec(&map)?);
218    Ok(Some(map))
219}
220
221fn unencoded_str_to_raw_value(unencoded: &str) -> Box<RawValue> {
222    RawValue::from_string(
223        serde_json::to_string(unencoded)
224            .expect("serialization of a utf-8 string should always succeed"),
225    )
226    .expect("serde_json::to_string should produce valid JSON")
227}
228
229/// Helper function to transform turbopack:/// file references in a sourcemap.
230/// Handles parsing the sourcemap, resolving the filesystem, applying transformations, and
231/// serializing back.
232/// The transform function is given the source string as found in the sourcemap (i.e. a URI).
233async fn transform_relative_files<F>(
234    map: Option<&Rope>,
235    context_path: &FileSystemPath,
236    mut transform: F,
237) -> Result<Option<Rope>>
238where
239    F: FnMut(&DiskFileSystem, &str) -> Result<String>,
240{
241    let Some(map) = map else {
242        return Ok(None);
243    };
244
245    let Ok(mut map): serde_json::Result<SourceMapJson> = serde_json::from_reader(map.read()) else {
246        // Silently ignore invalid sourcemaps
247        return Ok(None);
248    };
249
250    let context_fs = context_path.fs;
251    let context_fs = &*ResolvedVc::try_downcast_type::<DiskFileSystem>(context_fs)
252        .context("Expected the chunking context to have a DiskFileSystem")?
253        .await?;
254
255    let prefix = format!("{}///[{}]/", SOURCE_URL_PROTOCOL, context_fs.name());
256
257    let mut apply_transform = |src: &mut String| -> Result<()> {
258        if let Some(src_rest) = src.strip_prefix(&prefix) {
259            *src = transform(context_fs, src_rest)?;
260        }
261        Ok(())
262    };
263
264    for src in map.sources.iter_mut().flatten().flatten() {
265        apply_transform(src)?;
266    }
267    for section in map.sections.iter_mut().flatten() {
268        for src in section.map.sources.iter_mut().flatten().flatten() {
269            apply_transform(src)?;
270        }
271    }
272
273    Ok(Some(Rope::from(serde_json::to_vec(&map)?)))
274}
275
276/// Turns `turbopack:///[project]` references in sourcemap sources into absolute `file://` uris. This
277/// is useful for debugging environments.
278pub async fn absolute_fileify_source_map(
279    map: Option<&Rope>,
280    context_path: FileSystemPath,
281) -> Result<Option<Rope>> {
282    transform_relative_files(map, &context_path, |context_fs, src_rest| {
283        let path = context_path.join(src_rest)?;
284
285        Ok(uri_from_path_buf(context_fs.to_sys_path(&path)))
286    })
287    .await
288}
289
290fn uri_encode_path(path: &str) -> String {
291    path.split('/')
292        .map(|s| urlencoding::encode(s))
293        .collect::<Vec<_>>()
294        .join("/")
295}
296/// Turns `turbopack:///[project]` references in sourcemap sources into relative './' prefixed uris.
297/// This is useful in server environments and especially build environments.
298pub async fn relative_fileify_source_map(
299    map: Option<&Rope>,
300    context_path: FileSystemPath,
301    relative_path_to_output_root: RcStr,
302) -> Result<Option<Rope>> {
303    let relative_path_to_output_root = relative_path_to_output_root
304        .split('/')
305        .map(|s| urlencoding::encode(s))
306        .collect::<Vec<_>>()
307        .join("/");
308    transform_relative_files(map, &context_path, |_context_fs, src_rest| {
309        // NOTE: we just include the relative path prefix here instead of using `sourceRoot`
310        // since the spec on sourceRoot is broken.
311
312        // TODO(bgw): this shouldn't be necessary to uri encode since the strings we get out of the
313        // source map should already be uri encoded, however in the case of the turbopack scheme in
314        // particular we are inconsistent so be defensive here.
315        let src_rest = uri_encode_path(src_rest);
316        if relative_path_to_output_root.is_empty() {
317            Ok(src_rest.to_string())
318        } else {
319            Ok(format!("{relative_path_to_output_root}/{src_rest}",))
320        }
321    })
322    .await
323}
324
325#[cfg(test)]
326mod tests {
327    use std::path::Path;
328
329    use turbo_rcstr::{RcStr, rcstr};
330    use turbo_tasks::Vc;
331    use turbo_tasks_backend::{BackendOptions, TurboTasksBackend, noop_backing_storage};
332    use turbo_tasks_fs::FileSystem;
333
334    use super::*;
335
336    fn source_map_rope<'a>(
337        source_root: Option<&str>,
338        sources: impl IntoIterator<Item = &'a str>,
339    ) -> Rope {
340        Rope::from(
341            serde_json::to_string_pretty(
342                &serde_json::from_value::<SourceMapJson>(serde_json::json!({
343                    "version": 3,
344                    "mappings": "",
345                    "sourceRoot": source_root,
346                    "sources": sources.into_iter().map(Some).collect::<Vec<_>>(),
347                }))
348                .unwrap(),
349            )
350            .unwrap(),
351        )
352    }
353
354    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
355    async fn test_resolve_source_map_sources() {
356        let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new(
357            BackendOptions::default(),
358            noop_backing_storage(),
359        ));
360        tt.run_once(async move {
361            #[turbo_tasks::value]
362            struct SourceMapSourcesOutput {
363                resolved_sources: Vec<Option<String>>,
364                rooted_sources: Vec<Option<String>>,
365            }
366
367            #[turbo_tasks::function(operation)]
368            async fn resolve_source_map_sources_operation()
369            -> anyhow::Result<Vc<SourceMapSourcesOutput>> {
370                let sys_root = if cfg!(windows) {
371                    Path::new(r"C:\fake\root")
372                } else {
373                    Path::new(r"/fake/root")
374                };
375                let url_root = Url::from_directory_path(sys_root).unwrap();
376
377                let fs_root_path =
378                    DiskFileSystem::new(rcstr!("mock"), RcStr::from(sys_root.to_str().unwrap()))
379                        .root()
380                        .await?;
381
382                let resolved_source_map: SourceMapJson = serde_json::from_str(
383                    &resolve_source_map_sources(
384                        Some(&source_map_rope(
385                            /* source_root */ None,
386                            [
387                                "page.js",
388                                "./current-dir-page.js",
389                                "../other%20route/page.js",
390                                // contains the file:// protocol/scheme
391                                url_root.join("absolute%20file%20url.js")?.as_str(),
392                                // A server-relative path starting with `/`, potentially includes a
393                                // windows disk
394                                &format!("{}/server%20relative%20path.js", url_root.path()),
395                                // A scheme-relative path
396                                url_root
397                                    .join("scheme%20relative%20path.js")?
398                                    .as_str()
399                                    .strip_prefix("file:")
400                                    .unwrap(),
401                                // non-file URLs are preserved
402                                "https://example.com/page%20path.js",
403                            ],
404                        )),
405                        // NOTE: the percent encoding here should NOT be decoded, as this is not
406                        // part of a `file://` URL
407                        &fs_root_path.join("app/source%20mapped/page.js").unwrap(),
408                    )
409                    .await?
410                    .unwrap()
411                    .to_str()?,
412                )?;
413
414                let rooted_source_map: SourceMapJson = serde_json::from_str(
415                    &resolve_source_map_sources(
416                        Some(&source_map_rope(
417                            // NOTE: these should get literally concated, a slash should NOT get
418                            // added.
419                            Some("../source%20root%20"),
420                            ["page.js"],
421                        )),
422                        &fs_root_path.join("app/page.js").unwrap(),
423                    )
424                    .await?
425                    .unwrap()
426                    .to_str()?,
427                )?;
428
429                Ok(SourceMapSourcesOutput {
430                    resolved_sources: resolved_source_map.sources.unwrap_or_default(),
431                    rooted_sources: rooted_source_map.sources.unwrap_or_default(),
432                }
433                .cell())
434            }
435
436            let resolved_source_maps = resolve_source_map_sources_operation()
437                .read_strongly_consistent()
438                .await?;
439
440            let prefix = format!("{SOURCE_URL_PROTOCOL}///[mock]");
441            assert_eq!(
442                resolved_source_maps.resolved_sources,
443                vec![
444                    Some(format!("{prefix}/app/source%20mapped/page.js")),
445                    Some(format!("{prefix}/app/source%20mapped/current-dir-page.js")),
446                    Some(format!("{prefix}/app/other route/page.js")),
447                    Some(format!("{prefix}/absolute file url.js")),
448                    Some(format!("{prefix}/server relative path.js")),
449                    Some(format!("{prefix}/scheme relative path.js")),
450                    Some("https://example.com/page%20path.js".to_owned()),
451                ]
452            );
453
454            assert_eq!(
455                resolved_source_maps.rooted_sources,
456                vec![Some(format!("{prefix}/source root page.js"))]
457            );
458
459            anyhow::Ok(())
460        })
461        .await
462        .unwrap();
463    }
464}