Skip to main content

turbopack_core/source_map/
utils.rs

1use std::{borrow::Cow, collections::HashSet, iter, sync::LazyLock};
2
3use anyhow::{Context, Result};
4use const_format::concatcp;
5use regex::Regex;
6use serde::{Deserialize, Serialize};
7use serde_json::value::RawValue;
8use turbo_rcstr::RcStr;
9use turbo_tasks::{ResolvedVc, turbofmt};
10use turbo_tasks_fs::{
11    DiskFileSystem, FileContent, FileSystemPath, rope::Rope, util::uri_from_path_buf,
12};
13use url::Url;
14
15use crate::SOURCE_URL_PROTOCOL_STR;
16
17pub fn add_default_ignore_list(map: &mut swc_sourcemap::SourceMap) {
18    let mut ignored_ids = HashSet::new();
19
20    for (source_id, source) in map.sources().enumerate() {
21        if source.starts_with(concatcp!(SOURCE_URL_PROTOCOL_STR, "///[next]"))
22            || source.starts_with(concatcp!(SOURCE_URL_PROTOCOL_STR, "///[turbopack]"))
23            || source.contains("/node_modules/")
24            || source.ends_with("__nextjs-internal-proxy.cjs")
25            || source.ends_with("__nextjs-internal-proxy.mjs")
26        {
27            ignored_ids.insert(source_id);
28        }
29    }
30
31    for ignored_id in ignored_ids {
32        map.add_to_ignore_list(ignored_id as _);
33    }
34}
35
36#[derive(Serialize, Deserialize)]
37struct SourceMapSectionOffsetJson {
38    line: u32,
39    offset: u32,
40}
41
42#[derive(Serialize, Deserialize)]
43struct SourceMapSectionItemJson {
44    offset: SourceMapSectionOffsetJson,
45    map: SourceMapJson,
46}
47
48// Some of these values use `Box<RawValue>`: If we don't read these fields (or rarely read these
49// fields) there's no point in decoding/encoding the data. Ideally they would be a `&RawValue`
50// reference, but we deserialize using `from_reader`, which does not support that.
51#[derive(Serialize, Deserialize)]
52#[serde(rename_all = "camelCase")]
53struct SourceMapJson {
54    version: u32,
55    #[serde(skip_serializing_if = "Option::is_none")]
56    file: Option<String>,
57    #[serde(skip_serializing_if = "Option::is_none")]
58    source_root: Option<String>,
59    // Technically a required field, but we don't want to error here.
60    #[serde(skip_serializing_if = "Option::is_none")]
61    sources: Option<Vec<Option<String>>>,
62    #[serde(skip_serializing_if = "Option::is_none")]
63    sources_content: Option<Vec<Option<Box<RawValue>>>>,
64    #[serde(skip_serializing_if = "Option::is_none")]
65    names: Option<Box<RawValue>>,
66    // We just need to hold onto `mappings` for serialization/deserialization, so there's no point
67    // in decoding/encoding the string. Store it as a `RawValue`. Ideally this would be a reference
68    // to the RawValue, but we deserialize using `from_reader`, which does not support that.
69    mappings: Box<RawValue>,
70    #[serde(skip_serializing_if = "Option::is_none")]
71    ignore_list: Option<Box<RawValue>>,
72
73    // A somewhat widespread non-standard extension
74    #[serde(skip_serializing_if = "Option::is_none")]
75    debug_id: Option<Box<RawValue>>,
76
77    #[serde(skip_serializing_if = "Option::is_none")]
78    sections: Option<Vec<SourceMapSectionItemJson>>,
79}
80
81/// Replace the origin prefix in the `file` and `sources` with `turbopack:///` and read the
82/// `sourceContent`s from disk.
83pub async fn resolve_source_map_sources(
84    map: Option<&Rope>,
85    origin: &FileSystemPath,
86) -> Result<Option<Rope>> {
87    let fs_vc = origin.fs().to_resolved().await?;
88    let fs_str = &*turbofmt!("[{fs_vc}]").await?;
89
90    let disk_fs = if let Some(fs_vc) = ResolvedVc::try_downcast_type::<DiskFileSystem>(fs_vc) {
91        Some((fs_vc, fs_vc.await?))
92    } else {
93        None
94    };
95    let disk_fs = &disk_fs;
96
97    let resolve_source =
98        async |source_url: &mut String, source_content: Option<&mut Option<Box<RawValue>>>| {
99            // original_source should always be a URL (possibly a `file://` url). If it's a relative
100            // URL, it should be relative to `origin` (the generated file that's being mapped).
101            // https://developer.mozilla.org/en-US/docs/Learn_web_development/Howto/Web_mechanics/What_is_a_URL#absolute_urls_vs._relative_urls
102            let maybe_file_url = if source_url.starts_with("//") {
103                // looks like a "scheme-relative" URL
104                // Rewrite '//scheme/relative' -> 'file:///scheme/relative' (three slashes)
105                Cow::Owned(format!("file:/{source_url}"))
106            } else if source_url.starts_with('/') {
107                // looks like a "domain-relative" (aka "server-relative") URL
108                // Rewrite '/domain/relative' -> 'file:///domain/relative' (three slashes)
109                Cow::Owned(format!("file://{source_url}"))
110            } else {
111                Cow::Borrowed(source_url)
112            };
113
114            let fs_path = if let Ok(original_source_url_obj) = Url::parse(&maybe_file_url) {
115                // We have an absolute URL, try to parse it as a `file://` URL
116                if let Ok(sys_path) = original_source_url_obj.to_file_path() {
117                    if let Some((disk_fs_vc, disk_fs)) = disk_fs {
118                        disk_fs.try_from_sys_path(*disk_fs_vc, &sys_path, Some(origin))
119                    } else {
120                        None
121                    }
122                } else {
123                    // this is an absolute URL with a non-`file://` scheme, just assume it's valid
124                    // and don't modify anything
125                    return Ok(());
126                }
127            } else {
128                // assume it's a relative URL, and just remove any percent encoding from path
129                // segments. Our internal path format is POSIX-like, without percent encoding.
130                origin
131                    .parent()
132                    .try_join(&urlencoding::decode(source_url).unwrap_or(Cow::Borrowed(source_url)))
133            };
134
135            if let Some(fs_path) = fs_path {
136                // TODO: Encode `fs_str` and `fs_path_str` using `urlencoding`, so that these are
137                // valid URLs. However, `project_trace_source_operation` (and `uri_from_file`) need
138                // to handle percent encoding correctly first.
139                let fs_path_str = &fs_path.path;
140                *source_url = format!("{SOURCE_URL_PROTOCOL_STR}///{fs_str}/{fs_path_str}");
141
142                if let Some(source_content) = source_content
143                    && source_content.is_none()
144                {
145                    if let FileContent::Content(file) = &*fs_path.read().await? {
146                        let text = file.content().to_str()?;
147                        *source_content = Some(unencoded_str_to_raw_value(&text));
148                    } else {
149                        *source_content = Some(unencoded_str_to_raw_value(&format!(
150                            "unable to read source {fs_str}/{fs_path_str}"
151                        )));
152                    }
153                }
154            } else {
155                // The URL was broken somehow, create a dummy `turbopack://` URL and content
156                let origin_str = &origin.path;
157                if let Some(source_content) = source_content
158                    && source_content.is_none()
159                {
160                    *source_content = Some(unencoded_str_to_raw_value(&format!(
161                        "unable to access {source_url} in {fs_str}/{origin_str} (it's leaving the \
162                         filesystem root)"
163                    )));
164                }
165                static INVALID_REGEX: LazyLock<Regex> =
166                    LazyLock::new(|| Regex::new(r#"(?:^|/)(?:\.\.?(?:/|$))+"#).unwrap());
167                let source = INVALID_REGEX
168                    .replace_all(source_url, |s: &regex::Captures<'_>| s[0].replace('.', "_"));
169                *source_url = format!("{SOURCE_URL_PROTOCOL_STR}///{fs_str}/{origin_str}/{source}");
170            }
171            anyhow::Ok(())
172        };
173
174    let resolve_map = async |map: &mut SourceMapJson| {
175        if let Some(sources) = &mut map.sources {
176            let mut contents = if let Some(mut contents) = map.sources_content.take() {
177                contents.resize(sources.len(), None);
178                contents
179            } else {
180                iter::repeat_n(None, sources.len()).collect()
181            };
182
183            for (source, content) in sources.iter_mut().zip(contents.iter_mut()) {
184                if let Some(source) = source {
185                    if let Some(source_root) = &map.source_root {
186                        *source = format!("{source_root}{source}");
187                    }
188                    resolve_source(source, Some(content)).await?;
189                }
190            }
191
192            map.source_root = None;
193            map.sources_content = Some(contents);
194        }
195        anyhow::Ok(())
196    };
197
198    let Some(map) = map else {
199        return Ok(None);
200    };
201
202    let Ok(mut map): serde_json::Result<SourceMapJson> = serde_json::from_reader(map.read()) else {
203        // Silently ignore invalid sourcemaps
204        return Ok(None);
205    };
206
207    if let Some(file) = &mut map.file {
208        resolve_source(file, None).await?;
209    }
210
211    resolve_map(&mut map).await?;
212    for section in map.sections.iter_mut().flatten() {
213        resolve_map(&mut section.map).await?;
214    }
215
216    let map = Rope::from(serde_json::to_vec(&map)?);
217    Ok(Some(map))
218}
219
220fn unencoded_str_to_raw_value(unencoded: &str) -> Box<RawValue> {
221    RawValue::from_string(
222        serde_json::to_string(unencoded)
223            .expect("serialization of a utf-8 string should always succeed"),
224    )
225    .expect("serde_json::to_string should produce valid JSON")
226}
227
228/// Helper function to transform turbopack:/// file references in a sourcemap.
229/// Handles parsing the sourcemap, resolving the filesystem, applying transformations, and
230/// serializing back.
231/// The transform function is given the source string as found in the sourcemap (i.e. a URI).
232async fn transform_relative_files<F>(
233    map: Option<&Rope>,
234    context_path: &FileSystemPath,
235    mut transform: F,
236) -> Result<Option<Rope>>
237where
238    F: FnMut(&DiskFileSystem, &str) -> Result<String>,
239{
240    let Some(map) = map else {
241        return Ok(None);
242    };
243
244    let Ok(mut map): serde_json::Result<SourceMapJson> = serde_json::from_reader(map.read()) else {
245        // Silently ignore invalid sourcemaps
246        return Ok(None);
247    };
248
249    let context_fs = context_path.fs;
250    let context_fs = &*ResolvedVc::try_downcast_type::<DiskFileSystem>(context_fs)
251        .context("Expected the chunking context to have a DiskFileSystem")?
252        .await?;
253
254    let prefix = format!("{}///[{}]/", SOURCE_URL_PROTOCOL_STR, context_fs.name());
255
256    let mut apply_transform = |src: &mut String| -> Result<()> {
257        if let Some(src_rest) = src.strip_prefix(&prefix) {
258            *src = transform(context_fs, src_rest)?;
259        }
260        Ok(())
261    };
262
263    for src in map.sources.iter_mut().flatten().flatten() {
264        apply_transform(src)?;
265    }
266    for section in map.sections.iter_mut().flatten() {
267        for src in section.map.sources.iter_mut().flatten().flatten() {
268            apply_transform(src)?;
269        }
270    }
271
272    Ok(Some(Rope::from(serde_json::to_vec(&map)?)))
273}
274
275/// Turns `turbopack:///[project]` references in sourcemap sources into absolute `file://` uris. This
276/// is useful for debugging environments.
277pub async fn absolute_fileify_source_map(
278    map: Option<&Rope>,
279    context_path: FileSystemPath,
280) -> Result<Option<Rope>> {
281    transform_relative_files(map, &context_path, |context_fs, src_rest| {
282        let path = context_path.join(src_rest)?;
283
284        Ok(uri_from_path_buf(context_fs.to_sys_path(&path)))
285    })
286    .await
287}
288
289fn uri_encode_path(path: &str) -> String {
290    path.split('/')
291        .map(|s| urlencoding::encode(s))
292        .collect::<Vec<_>>()
293        .join("/")
294}
295/// Turns `turbopack:///[project]` references in sourcemap sources into relative './' prefixed uris.
296/// This is useful in server environments and especially build environments.
297pub async fn relative_fileify_source_map(
298    map: Option<&Rope>,
299    context_path: FileSystemPath,
300    relative_path_to_output_root: RcStr,
301) -> Result<Option<Rope>> {
302    let relative_path_to_output_root = relative_path_to_output_root
303        .split('/')
304        .map(|s| urlencoding::encode(s))
305        .collect::<Vec<_>>()
306        .join("/");
307    transform_relative_files(map, &context_path, |_context_fs, src_rest| {
308        // NOTE: we just include the relative path prefix here instead of using `sourceRoot`
309        // since the spec on sourceRoot is broken.
310
311        // TODO(bgw): this shouldn't be necessary to uri encode since the strings we get out of the
312        // source map should already be uri encoded, however in the case of the turbopack scheme in
313        // particular we are inconsistent so be defensive here.
314        let src_rest = uri_encode_path(src_rest);
315        if relative_path_to_output_root.is_empty() {
316            Ok(src_rest.to_string())
317        } else {
318            Ok(format!("{relative_path_to_output_root}/{src_rest}",))
319        }
320    })
321    .await
322}
323
324#[cfg(test)]
325mod tests {
326    use std::path::Path;
327
328    use turbo_rcstr::{RcStr, rcstr};
329    use turbo_tasks::Vc;
330    use turbo_tasks_backend::{BackendOptions, TurboTasksBackend, noop_backing_storage};
331    use turbo_tasks_fs::FileSystem;
332
333    use super::*;
334
335    fn source_map_rope<'a>(
336        source_root: Option<&str>,
337        sources: impl IntoIterator<Item = &'a str>,
338    ) -> Rope {
339        Rope::from(
340            serde_json::to_string_pretty(
341                &serde_json::from_value::<SourceMapJson>(serde_json::json!({
342                    "version": 3,
343                    "mappings": "",
344                    "sourceRoot": source_root,
345                    "sources": sources.into_iter().map(Some).collect::<Vec<_>>(),
346                }))
347                .unwrap(),
348            )
349            .unwrap(),
350        )
351    }
352
353    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
354    async fn test_resolve_source_map_sources() {
355        let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new(
356            BackendOptions::default(),
357            noop_backing_storage(),
358        ));
359        tt.run_once(async move {
360            #[turbo_tasks::value]
361            struct SourceMapSourcesOutput {
362                resolved_sources: Vec<Option<String>>,
363                rooted_sources: Vec<Option<String>>,
364            }
365
366            #[turbo_tasks::function(operation, root)]
367            async fn resolve_source_map_sources_operation()
368            -> anyhow::Result<Vc<SourceMapSourcesOutput>> {
369                let sys_root = if cfg!(windows) {
370                    Path::new(r"C:\fake\root")
371                } else {
372                    Path::new(r"/fake/root")
373                };
374                let url_root = Url::from_directory_path(sys_root).unwrap();
375
376                let fs_root_path = DiskFileSystem::new(
377                    rcstr!("mock"),
378                    Vc::cell(RcStr::from(sys_root.to_str().unwrap())),
379                )
380                .root()
381                .await?;
382
383                let resolved_source_map: SourceMapJson = serde_json::from_str(
384                    &resolve_source_map_sources(
385                        Some(&source_map_rope(
386                            /* source_root */ None,
387                            [
388                                "page.js",
389                                "./current-dir-page.js",
390                                "../other%20route/page.js",
391                                // contains the file:// protocol/scheme
392                                url_root.join("absolute%20file%20url.js")?.as_str(),
393                                // A server-relative path starting with `/`, potentially includes a
394                                // windows disk
395                                &format!("{}/server%20relative%20path.js", url_root.path()),
396                                // A scheme-relative path
397                                url_root
398                                    .join("scheme%20relative%20path.js")?
399                                    .as_str()
400                                    .strip_prefix("file:")
401                                    .unwrap(),
402                                // non-file URLs are preserved
403                                "https://example.com/page%20path.js",
404                            ],
405                        )),
406                        // NOTE: the percent encoding here should NOT be decoded, as this is not
407                        // part of a `file://` URL
408                        &fs_root_path.join("app/source%20mapped/page.js").unwrap(),
409                    )
410                    .await?
411                    .unwrap()
412                    .to_str()?,
413                )?;
414
415                let rooted_source_map: SourceMapJson = serde_json::from_str(
416                    &resolve_source_map_sources(
417                        Some(&source_map_rope(
418                            // NOTE: these should get literally concated, a slash should NOT get
419                            // added.
420                            Some("../source%20root%20"),
421                            ["page.js"],
422                        )),
423                        &fs_root_path.join("app/page.js").unwrap(),
424                    )
425                    .await?
426                    .unwrap()
427                    .to_str()?,
428                )?;
429
430                Ok(SourceMapSourcesOutput {
431                    resolved_sources: resolved_source_map.sources.unwrap_or_default(),
432                    rooted_sources: rooted_source_map.sources.unwrap_or_default(),
433                }
434                .cell())
435            }
436
437            let resolved_source_maps = resolve_source_map_sources_operation()
438                .read_strongly_consistent()
439                .await?;
440
441            let prefix = format!("{SOURCE_URL_PROTOCOL_STR}///[mock]");
442            assert_eq!(
443                resolved_source_maps.resolved_sources,
444                vec![
445                    Some(format!("{prefix}/app/source%20mapped/page.js")),
446                    Some(format!("{prefix}/app/source%20mapped/current-dir-page.js")),
447                    Some(format!("{prefix}/app/other route/page.js")),
448                    Some(format!("{prefix}/absolute file url.js")),
449                    Some(format!("{prefix}/server relative path.js")),
450                    Some(format!("{prefix}/scheme relative path.js")),
451                    Some("https://example.com/page%20path.js".to_owned()),
452                ]
453            );
454
455            assert_eq!(
456                resolved_source_maps.rooted_sources,
457                vec![Some(format!("{prefix}/source root page.js"))]
458            );
459
460            anyhow::Ok(())
461        })
462        .await
463        .unwrap();
464    }
465}