Skip to main content

next_core/next_app/metadata/
mod.rs

1use std::sync::LazyLock;
2
3use anyhow::Result;
4use phf::phf_map;
5use regex::Regex;
6use turbo_rcstr::RcStr;
7use turbo_tasks_fs::FileSystemPath;
8
9use crate::next_app::{AppPage, AppPath, PageSegment, PageType};
10
11pub mod image;
12pub mod route;
13
14pub static STATIC_LOCAL_METADATA: phf::Map<&'static str, &'static [&'static str]> = phf_map! {
15    "icon" => &["ico", "jpg", "jpeg", "png", "svg"],
16    "apple-icon" => &["jpg", "jpeg", "png"],
17    "opengraph-image" => &["jpg", "jpeg", "png", "gif"],
18    "twitter-image" => &["jpg", "jpeg", "png", "gif"],
19    "sitemap" => &["xml"],
20};
21
22pub static STATIC_GLOBAL_METADATA: phf::Map<&'static str, &'static [&'static str]> = phf_map! {
23    "favicon" => &["ico"],
24    "manifest" => &["webmanifest", "json"],
25    "robots" => &["txt"],
26};
27
28pub struct MetadataFileMatch<'a> {
29    pub metadata_type: &'a str,
30    pub number: Option<u32>,
31    pub dynamic: bool,
32}
33
34fn match_numbered_metadata(stem: &str) -> Option<(&str, &str)> {
35    static NUMBERED_METADATA_RE: LazyLock<Regex> = LazyLock::new(|| {
36        Regex::new("^(icon|apple-icon|opengraph-image|twitter-image)(\\d+)$").unwrap()
37    });
38    let captures = NUMBERED_METADATA_RE.captures(stem)?;
39    // these captures must be defined if `captures` is `Some(...)`.
40    let (stem, number) = (
41        captures.get(1).unwrap().as_str(),
42        captures.get(2).unwrap().as_str(),
43    );
44    Some((stem, number))
45}
46
47fn match_metadata_file<'a>(
48    filename: &'a str,
49    page_extensions: &[RcStr],
50    metadata: &phf::Map<&'static str, &'static [&'static str]>,
51) -> Option<MetadataFileMatch<'a>> {
52    let (stem, ext) = filename.split_once('.')?;
53
54    let (stem, number) = match match_numbered_metadata(stem) {
55        Some((stem, number)) => {
56            let number: u32 = number.parse().ok()?;
57            (stem, Some(number))
58        }
59        _ => (stem, None),
60    };
61
62    let exts = metadata.get(stem)?;
63
64    // favicon can't be dynamic
65    if stem != "favicon" && page_extensions.iter().any(|e| e == ext) {
66        return Some(MetadataFileMatch {
67            metadata_type: stem,
68            number,
69            dynamic: true,
70        });
71    }
72
73    exts.contains(&ext).then_some(MetadataFileMatch {
74        metadata_type: stem,
75        number,
76        dynamic: false,
77    })
78}
79
80pub(crate) async fn get_content_type(path: FileSystemPath) -> Result<String> {
81    let stem = path.file_stem();
82    let mut ext = path.extension();
83
84    let name = stem.unwrap_or_default();
85    if ext == Some("jpg") {
86        ext = Some("jpeg");
87    }
88
89    if name == "favicon" && ext == Some("ico") {
90        return Ok("image/x-icon".to_string());
91    }
92    if name == "sitemap" {
93        return Ok("application/xml".to_string());
94    }
95    if name == "robots" {
96        return Ok("text/plain".to_string());
97    }
98    if name == "manifest" {
99        return Ok("application/manifest+json".to_string());
100    }
101
102    if let Some(ext) = ext
103        && matches!(ext, "png" | "jpeg" | "ico" | "svg")
104    {
105        return Ok(mime_guess::from_ext(ext)
106            .first_or_octet_stream()
107            .to_string());
108    }
109
110    Ok("text/plain".to_string())
111}
112
113pub fn match_local_metadata_file<'a>(
114    basename: &'a str,
115    page_extensions: &[RcStr],
116) -> Option<MetadataFileMatch<'a>> {
117    match_metadata_file(basename, page_extensions, &STATIC_LOCAL_METADATA)
118}
119
120pub struct GlobalMetadataFileMatch<'a> {
121    pub metadata_type: &'a str,
122    pub dynamic: bool,
123}
124
125pub fn match_global_metadata_file<'a>(
126    basename: &'a str,
127    page_extensions: &[RcStr],
128) -> Option<GlobalMetadataFileMatch<'a>> {
129    match_metadata_file(basename, page_extensions, &STATIC_GLOBAL_METADATA).map(|m| {
130        GlobalMetadataFileMatch {
131            metadata_type: m.metadata_type,
132            dynamic: m.dynamic,
133        }
134    })
135}
136
137/// Regular expression pattern used to match route parameters.
138/// Matches both single parameters and parameter groups.
139/// Examples:
140///   - `[[...slug]]` matches parameter group with key 'slug', repeat: true, optional: true
141///   - `[...slug]` matches parameter group with key 'slug', repeat: true, optional: false
142///   - `[[foo]]` matches parameter with key 'foo', repeat: false, optional: true
143///   - `[bar]` matches parameter with key 'bar', repeat: false, optional: false
144static PARAMETER_PATTERN: LazyLock<Regex> =
145    LazyLock::new(|| Regex::new(r"^([^\[]*)\[((?:\[[^\]]*\])|[^\]]+)\](.*)$").unwrap());
146
147fn split_directory(path: &str) -> (Option<&str>, &str) {
148    if let Some((dir, basename)) = path.rsplit_once('/') {
149        if dir.is_empty() {
150            return (Some("/"), basename);
151        }
152
153        (Some(dir), basename)
154    } else {
155        (None, path)
156    }
157}
158
159fn filename(path: &str) -> &str {
160    split_directory(path).1
161}
162
163pub(crate) fn split_extension(path: &str) -> (&str, Option<&str>) {
164    let filename = filename(path);
165    if let Some((filename_before_extension, ext)) = filename.rsplit_once('.') {
166        if filename_before_extension.is_empty() {
167            return (filename, None);
168        }
169
170        (filename_before_extension, Some(ext))
171    } else {
172        (filename, None)
173    }
174}
175
176fn file_stem(path: &str) -> &str {
177    split_extension(path).0
178}
179
180fn join_path(dir: &str, basename: &str) -> String {
181    if dir.is_empty() || dir == "/" {
182        format!("/{basename}")
183    } else {
184        format!("{}/{}", dir.trim_end_matches('/'), basename)
185    }
186}
187
188fn normalize_static_metadata_route_segment(segment: &str) -> String {
189    let mut normalized_segment = segment.to_string();
190
191    while let Some(captures) = PARAMETER_PATTERN.captures(&normalized_segment) {
192        let prefix = captures.get(1).map(|m| m.as_str()).unwrap_or_default();
193        let suffix = captures.get(3).map(|m| m.as_str()).unwrap_or_default();
194        normalized_segment = format!("{prefix}-{suffix}");
195    }
196
197    normalized_segment
198}
199
200fn get_static_metadata_route(segment: &str) -> String {
201    let pathname = AppPath::from(AppPage::parse(segment).unwrap_or_default()).to_string();
202    let mut route = String::new();
203
204    for segment in pathname.split('/').filter(|segment| !segment.is_empty()) {
205        route.push('/');
206        route.push_str(&normalize_static_metadata_route_segment(segment));
207    }
208
209    if route.is_empty() {
210        "/".to_string()
211    } else {
212        route
213    }
214}
215
216/// When you only pass the file extension as `[]`, it will only match the static
217/// convention files e.g. `/robots.txt`, `/sitemap.xml`, `/favicon.ico`,
218/// `/manifest.json`.
219///
220/// When you pass the file extension as `['js', 'jsx', 'ts',
221/// 'tsx']`, it will also match the dynamic convention files e.g. /robots.js,
222/// /sitemap.tsx, /favicon.jsx, /manifest.ts.
223///
224/// When `withExtension` is false, it will match the static convention files
225/// without the extension, by default it's true e.g. /robots, /sitemap,
226/// /favicon, /manifest, use to match dynamic API routes like app/robots.ts.
227pub fn is_metadata_route_file(
228    app_dir_relative_path: &str,
229    page_extensions: &[RcStr],
230    with_extension: bool,
231) -> bool {
232    let (dir, filename) = split_directory(app_dir_relative_path);
233
234    if with_extension {
235        if match_local_metadata_file(filename, page_extensions).is_some() {
236            return true;
237        }
238    } else {
239        let stem = file_stem(filename);
240        let stem = match_numbered_metadata(stem)
241            .map(|(stem, _)| stem)
242            .unwrap_or(stem);
243
244        if STATIC_LOCAL_METADATA.contains_key(stem) {
245            return true;
246        }
247    }
248
249    if dir != Some("/") {
250        return false;
251    }
252
253    if with_extension {
254        if match_global_metadata_file(filename, page_extensions).is_some() {
255            return true;
256        }
257    } else {
258        let base_name = file_stem(filename);
259        if STATIC_GLOBAL_METADATA.contains_key(base_name) {
260            return true;
261        }
262    }
263
264    false
265}
266
267/// Remove the 'app' prefix or '/route' suffix, only check the route name since
268/// they're only allowed in root app directory
269///
270/// e.g.
271/// - /app/robots -> /robots
272/// - app/robots -> /robots
273/// - /robots -> /robots
274pub fn is_metadata_route(mut route: &str) -> bool {
275    if let Some(stripped) = route.strip_prefix("/app/") {
276        route = stripped;
277    } else if let Some(stripped) = route.strip_prefix("app/") {
278        route = stripped;
279    }
280
281    if let Some(stripped) = route.strip_suffix("/route") {
282        route = stripped;
283    }
284
285    let mut page = route.to_string();
286    if !page.starts_with('/') {
287        page = format!("/{page}");
288    }
289
290    !page.ends_with("/page") && is_metadata_route_file(&page, &[], false)
291}
292
293/// djb_2 hash implementation referenced from [here](http://www.cse.yorku.ca/~oz/hash.html)
294fn djb2_hash(str: &str) -> u32 {
295    str.chars().fold(5381, |hash, c| {
296        ((hash << 5).wrapping_add(hash)).wrapping_add(c as u32) // hash * 33 + c
297    })
298}
299
300// this is here to mirror next.js behaviour (`toString(36).slice(0, 6)`)
301fn format_radix(mut x: u32, radix: u32) -> String {
302    let mut result = vec![];
303
304    loop {
305        let m = x % radix;
306        x /= radix;
307
308        // will panic if you use a bad radix (< 2 or > 36).
309        result.push(std::char::from_digit(m, radix).unwrap());
310        if x == 0 {
311            break;
312        }
313    }
314
315    result.reverse();
316
317    // We only need the first 6 characters of the hash but sometimes the hash is too short.
318    // In JavaScript, we use `toString(36).slice(0, 6)` to get the first 6 characters of the hash,
319    // but it will automatically take the minimum of the length of the hash and 6. Rust will panic.
320    let len = result.len().min(6);
321    result[..len].iter().collect()
322}
323
324/// If there's special convention like (...) or @ in the page path,
325/// Give it a unique hash suffix to avoid conflicts
326///
327/// e.g.
328/// /opengraph-image -> /opengraph-image
329/// /(post)/opengraph-image.tsx -> /opengraph-image-[0-9a-z]{6}
330///
331/// Sitemap is an exception, it should not have a suffix.
332/// As the generated urls are for indexer and usually one sitemap contains all the urls of the sub
333/// routes. The sitemap should be unique in each level and not have a suffix.
334///
335/// /sitemap -> /sitemap
336/// /(post)/sitemap -> /sitemap
337fn get_metadata_route_suffix(page: &str) -> Option<String> {
338    // skip sitemap
339    if page.ends_with("/sitemap") || page.ends_with("/sitemap.xml") {
340        return None;
341    }
342
343    // Get the parent pathname of the page
344    let parent_pathname = split_directory(page).0.unwrap_or_default();
345    let segments = parent_pathname.split('/').collect::<Vec<&str>>();
346
347    // if any segment is group or parallel route segment, we should add a suffix.
348    if segments.iter().any(|segment| {
349        segment.starts_with('(') && segment.ends_with(')')
350            || segment.starts_with('@') && *segment != "@children"
351    }) {
352        Some(format_radix(djb2_hash(parent_pathname), 36))
353    } else {
354        None
355    }
356}
357
358pub fn fill_static_metadata_segment(segment: &str, last_segment: &str) -> String {
359    let route = get_static_metadata_route(segment);
360    let (name, ext) = split_extension(last_segment);
361    let page_path = join_path(segment, name);
362    let route_suffix = get_metadata_route_suffix(&page_path)
363        .map(|suffix| format!("-{suffix}"))
364        .unwrap_or_default();
365    let filename = format!(
366        "{}{}{}",
367        name,
368        route_suffix,
369        ext.map(|ext| format!(".{ext}")).unwrap_or_default()
370    );
371
372    join_path(&route, &filename)
373}
374
375/// Map metadata page key to the corresponding route
376///
377/// static file page key:    /app/robots.txt -> /robots.txt -> /robots.txt/route
378/// dynamic route page key:  /app/robots.tsx -> /robots -> /robots.txt/route
379pub fn normalize_metadata_route(mut page: AppPage) -> Result<AppPage> {
380    if !is_metadata_route(&format!("{page}")) {
381        return Ok(page);
382    }
383
384    let mut route = page.to_string();
385    let mut suffix: Option<String> = None;
386    if route == "/robots" {
387        route += ".txt"
388    } else if route == "/manifest" {
389        route += ".webmanifest"
390    } else if route.ends_with("/sitemap") {
391        route += ".xml"
392    } else {
393        suffix = get_metadata_route_suffix(&route);
394    }
395
396    // Support both /<metadata-route.ext> and custom routes
397    // /<metadata-route>/route.ts. If it's a metadata file route, we need to
398    // append /[id]/route to the page.
399    if !route.ends_with("/route") {
400        let (base_name, ext) = split_extension(&route);
401
402        page.0.pop();
403
404        page.push(PageSegment::Static(
405            format!(
406                "{}{}{}",
407                base_name,
408                suffix
409                    .map(|suffix| format!("-{suffix}"))
410                    .unwrap_or_default(),
411                ext.map(|ext| format!(".{ext}")).unwrap_or_default(),
412            )
413            .into(),
414        ))?;
415
416        page.push(PageSegment::PageType(PageType::Route))?;
417    }
418
419    Ok(page)
420}
421
422#[cfg(test)]
423mod test {
424    use super::{djb2_hash, fill_static_metadata_segment, format_radix, normalize_metadata_route};
425    use crate::next_app::AppPage;
426
427    #[test]
428    fn test_normalize_metadata_route() {
429        let cases = vec![
430            [
431                "/client/(meme)/more-route/twitter-image",
432                "/client/(meme)/more-route/twitter-image-769mad/route",
433            ],
434            [
435                "/client/(meme)/more-route/twitter-image2",
436                "/client/(meme)/more-route/twitter-image2-769mad/route",
437            ],
438            ["/robots.txt", "/robots.txt/route"],
439            ["/manifest.webmanifest", "/manifest.webmanifest/route"],
440            ["/sitemap", "/sitemap.xml/route"],
441            ["/sitemap.xml", "/sitemap.xml/route"],
442            ["/blog/sitemap", "/blog/sitemap.xml/route"],
443        ];
444
445        for [input, expected] in cases {
446            let page = AppPage::parse(input).unwrap();
447            let normalized = normalize_metadata_route(page).unwrap();
448
449            assert_eq!(&normalized.to_string(), expected);
450        }
451    }
452
453    #[test]
454    fn test_format_radix_doesnt_panic_with_result_less_than_6_characters() {
455        let hash = format_radix(djb2_hash("/lookup/[domain]/(dns)"), 36);
456        assert!(hash.len() < 6);
457    }
458
459    #[test]
460    fn test_fill_static_metadata_segment() {
461        assert_eq!(
462            fill_static_metadata_segment("/", "favicon.ico"),
463            "/favicon.ico"
464        );
465        assert_eq!(
466            fill_static_metadata_segment("/blog/[slug]", "favicon.ico"),
467            "/blog/-/favicon.ico"
468        );
469        assert_eq!(
470            fill_static_metadata_segment("/client/(meme)/more-route", "twitter-image.png"),
471            "/client/more-route/twitter-image-769mad.png"
472        );
473        assert_eq!(
474            fill_static_metadata_segment("/(group)/group", "icon.png"),
475            "/group/icon-131tc6.png"
476        );
477        assert_eq!(
478            fill_static_metadata_segment("/parallel/@parallel", "icon.png"),
479            "/parallel/icon-kzjltp.png"
480        );
481    }
482}