next_core/next_app/metadata/
mod.rs

1use std::{ops::Deref, sync::LazyLock};
2
3use anyhow::Result;
4use once_cell::sync::Lazy;
5use regex::Regex;
6use rustc_hash::FxHashMap;
7use turbo_rcstr::RcStr;
8use turbo_tasks_fs::FileSystemPath;
9
10use crate::next_app::{AppPage, PageSegment, PageType};
11
12pub mod image;
13pub mod route;
14
15pub static STATIC_LOCAL_METADATA: Lazy<FxHashMap<&'static str, &'static [&'static str]>> =
16    Lazy::new(|| {
17        FxHashMap::from_iter([
18            (
19                "icon",
20                &["ico", "jpg", "jpeg", "png", "svg"] as &'static [&'static str],
21            ),
22            ("apple-icon", &["jpg", "jpeg", "png"]),
23            ("opengraph-image", &["jpg", "jpeg", "png", "gif"]),
24            ("twitter-image", &["jpg", "jpeg", "png", "gif"]),
25            ("sitemap", &["xml"]),
26        ])
27    });
28
29pub static STATIC_GLOBAL_METADATA: Lazy<FxHashMap<&'static str, &'static [&'static str]>> =
30    Lazy::new(|| {
31        FxHashMap::from_iter([
32            ("favicon", &["ico"] as &'static [&'static str]),
33            ("manifest", &["webmanifest", "json"]),
34            ("robots", &["txt"]),
35        ])
36    });
37
38pub struct MetadataFileMatch<'a> {
39    pub metadata_type: &'a str,
40    pub number: Option<u32>,
41    pub dynamic: bool,
42}
43
44fn match_numbered_metadata(stem: &str) -> Option<(&str, &str)> {
45    static NUMBERED_METADATA_RE: LazyLock<Regex> = LazyLock::new(|| {
46        Regex::new("^(icon|apple-icon|opengraph-image|twitter-image)(\\d+)$").unwrap()
47    });
48    let captures = NUMBERED_METADATA_RE.captures(stem)?;
49    // these captures must be defined if `captures` is `Some(...)`.
50    let (stem, number) = (
51        captures.get(1).unwrap().as_str(),
52        captures.get(2).unwrap().as_str(),
53    );
54    Some((stem, number))
55}
56
57fn match_metadata_file<'a>(
58    filename: &'a str,
59    page_extensions: &[RcStr],
60    metadata: &FxHashMap<&str, &[&str]>,
61) -> Option<MetadataFileMatch<'a>> {
62    let (stem, ext) = filename.split_once('.')?;
63
64    let (stem, number) = match match_numbered_metadata(stem) {
65        Some((stem, number)) => {
66            let number: u32 = number.parse().ok()?;
67            (stem, Some(number))
68        }
69        _ => (stem, None),
70    };
71
72    let exts = metadata.get(stem)?;
73
74    // favicon can't be dynamic
75    if stem != "favicon" && page_extensions.iter().any(|e| e == ext) {
76        return Some(MetadataFileMatch {
77            metadata_type: stem,
78            number,
79            dynamic: true,
80        });
81    }
82
83    exts.contains(&ext).then_some(MetadataFileMatch {
84        metadata_type: stem,
85        number,
86        dynamic: false,
87    })
88}
89
90pub(crate) async fn get_content_type(path: FileSystemPath) -> Result<String> {
91    let stem = path.file_stem();
92    let mut ext = path.extension();
93
94    let name = stem.unwrap_or_default();
95    if ext == "jpg" {
96        ext = "jpeg"
97    }
98
99    if name == "favicon" && ext == "ico" {
100        return Ok("image/x-icon".to_string());
101    }
102    if name == "sitemap" {
103        return Ok("application/xml".to_string());
104    }
105    if name == "robots" {
106        return Ok("text/plain".to_string());
107    }
108    if name == "manifest" {
109        return Ok("application/manifest+json".to_string());
110    }
111
112    if ext == "png" || ext == "jpeg" || ext == "ico" || ext == "svg" {
113        return Ok(mime_guess::from_ext(ext)
114            .first_or_octet_stream()
115            .to_string());
116    }
117
118    Ok("text/plain".to_string())
119}
120
121pub fn match_local_metadata_file<'a>(
122    basename: &'a str,
123    page_extensions: &[RcStr],
124) -> Option<MetadataFileMatch<'a>> {
125    match_metadata_file(basename, page_extensions, STATIC_LOCAL_METADATA.deref())
126}
127
128pub struct GlobalMetadataFileMatch<'a> {
129    pub metadata_type: &'a str,
130    pub dynamic: bool,
131}
132
133pub fn match_global_metadata_file<'a>(
134    basename: &'a str,
135    page_extensions: &[RcStr],
136) -> Option<GlobalMetadataFileMatch<'a>> {
137    match_metadata_file(basename, page_extensions, STATIC_GLOBAL_METADATA.deref()).map(|m| {
138        GlobalMetadataFileMatch {
139            metadata_type: m.metadata_type,
140            dynamic: m.dynamic,
141        }
142    })
143}
144
145fn split_directory(path: &str) -> (Option<&str>, &str) {
146    if let Some((dir, basename)) = path.rsplit_once('/') {
147        if dir.is_empty() {
148            return (Some("/"), basename);
149        }
150
151        (Some(dir), basename)
152    } else {
153        (None, path)
154    }
155}
156
157fn filename(path: &str) -> &str {
158    split_directory(path).1
159}
160
161pub(crate) fn split_extension(path: &str) -> (&str, Option<&str>) {
162    let filename = filename(path);
163    if let Some((filename_before_extension, ext)) = filename.rsplit_once('.') {
164        if filename_before_extension.is_empty() {
165            return (filename, None);
166        }
167
168        (filename_before_extension, Some(ext))
169    } else {
170        (filename, None)
171    }
172}
173
174fn file_stem(path: &str) -> &str {
175    split_extension(path).0
176}
177
178/// When you only pass the file extension as `[]`, it will only match the static
179/// convention files e.g. `/robots.txt`, `/sitemap.xml`, `/favicon.ico`,
180/// `/manifest.json`.
181///
182/// When you pass the file extension as `['js', 'jsx', 'ts',
183/// 'tsx']`, it will also match the dynamic convention files e.g. /robots.js,
184/// /sitemap.tsx, /favicon.jsx, /manifest.ts.
185///
186/// When `withExtension` is false, it will match the static convention files
187/// without the extension, by default it's true e.g. /robots, /sitemap,
188/// /favicon, /manifest, use to match dynamic API routes like app/robots.ts.
189pub fn is_metadata_route_file(
190    app_dir_relative_path: &str,
191    page_extensions: &[RcStr],
192    with_extension: bool,
193) -> bool {
194    let (dir, filename) = split_directory(app_dir_relative_path);
195
196    if with_extension {
197        if match_local_metadata_file(filename, page_extensions).is_some() {
198            return true;
199        }
200    } else {
201        let stem = file_stem(filename);
202        let stem = match_numbered_metadata(stem)
203            .map(|(stem, _)| stem)
204            .unwrap_or(stem);
205
206        if STATIC_LOCAL_METADATA.contains_key(stem) {
207            return true;
208        }
209    }
210
211    if dir != Some("/") {
212        return false;
213    }
214
215    if with_extension {
216        if match_global_metadata_file(filename, page_extensions).is_some() {
217            return true;
218        }
219    } else {
220        let base_name = file_stem(filename);
221        if STATIC_GLOBAL_METADATA.contains_key(base_name) {
222            return true;
223        }
224    }
225
226    false
227}
228
229pub fn is_static_metadata_route_file(app_dir_relative_path: &str) -> bool {
230    is_metadata_route_file(app_dir_relative_path, &[], true)
231}
232
233/// Remove the 'app' prefix or '/route' suffix, only check the route name since
234/// they're only allowed in root app directory
235///
236/// e.g.
237/// - /app/robots -> /robots
238/// - app/robots -> /robots
239/// - /robots -> /robots
240pub fn is_metadata_route(mut route: &str) -> bool {
241    if let Some(stripped) = route.strip_prefix("/app/") {
242        route = stripped;
243    } else if let Some(stripped) = route.strip_prefix("app/") {
244        route = stripped;
245    }
246
247    if let Some(stripped) = route.strip_suffix("/route") {
248        route = stripped;
249    }
250
251    let mut page = route.to_string();
252    if !page.starts_with('/') {
253        page = format!("/{page}");
254    }
255
256    !page.ends_with("/page") && is_metadata_route_file(&page, &[], false)
257}
258
259/// djb_2 hash implementation referenced from [here](http://www.cse.yorku.ca/~oz/hash.html)
260fn djb2_hash(str: &str) -> u32 {
261    str.chars().fold(5381, |hash, c| {
262        ((hash << 5).wrapping_add(hash)).wrapping_add(c as u32) // hash * 33 + c
263    })
264}
265
266// this is here to mirror next.js behaviour (`toString(36).slice(0, 6)`)
267fn format_radix(mut x: u32, radix: u32) -> String {
268    let mut result = vec![];
269
270    loop {
271        let m = x % radix;
272        x /= radix;
273
274        // will panic if you use a bad radix (< 2 or > 36).
275        result.push(std::char::from_digit(m, radix).unwrap());
276        if x == 0 {
277            break;
278        }
279    }
280
281    result.reverse();
282
283    // We only need the first 6 characters of the hash but sometimes the hash is too short.
284    // In JavaScript, we use `toString(36).slice(0, 6)` to get the first 6 characters of the hash,
285    // but it will automatically take the minimum of the length of the hash and 6. Rust will panic.
286    let len = result.len().min(6);
287    result[..len].iter().collect()
288}
289
290/// If there's special convention like (...) or @ in the page path,
291/// Give it a unique hash suffix to avoid conflicts
292///
293/// e.g.
294/// /opengraph-image -> /opengraph-image
295/// /(post)/opengraph-image.tsx -> /opengraph-image-[0-9a-z]{6}
296///
297/// Sitemap is an exception, it should not have a suffix.
298/// As the generated urls are for indexer and usually one sitemap contains all the urls of the sub
299/// routes. The sitemap should be unique in each level and not have a suffix.
300///
301/// /sitemap -> /sitemap
302/// /(post)/sitemap -> /sitemap
303fn get_metadata_route_suffix(page: &str) -> Option<String> {
304    // skip sitemap
305    if page.ends_with("/sitemap") {
306        return None;
307    }
308
309    // Get the parent pathname of the page
310    let parent_pathname = split_directory(page).0.unwrap_or_default();
311    let segments = parent_pathname.split('/').collect::<Vec<&str>>();
312
313    // if any segment is group or parallel route segment, we should add a suffix.
314    if segments.iter().any(|segment| {
315        segment.starts_with('(') && segment.ends_with(')')
316            || segment.starts_with('@') && *segment != "@children"
317    }) {
318        Some(format_radix(djb2_hash(parent_pathname), 36))
319    } else {
320        None
321    }
322}
323
324/// Map metadata page key to the corresponding route
325///
326/// static file page key:    /app/robots.txt -> /robots.txt -> /robots.txt/route
327/// dynamic route page key:  /app/robots.tsx -> /robots -> /robots.txt/route
328pub fn normalize_metadata_route(mut page: AppPage) -> Result<AppPage> {
329    if !is_metadata_route(&format!("{page}")) {
330        return Ok(page);
331    }
332
333    let mut route = page.to_string();
334    let mut suffix: Option<String> = None;
335    if route == "/robots" {
336        route += ".txt"
337    } else if route == "/manifest" {
338        route += ".webmanifest"
339    } else {
340        suffix = get_metadata_route_suffix(&route);
341    }
342
343    // Support both /<metadata-route.ext> and custom routes
344    // /<metadata-route>/route.ts. If it's a metadata file route, we need to
345    // append /[id]/route to the page.
346    if !route.ends_with("/route") {
347        let (base_name, ext) = split_extension(&route);
348
349        page.0.pop();
350
351        page.push(PageSegment::Static(
352            format!(
353                "{}{}{}",
354                base_name,
355                suffix
356                    .map(|suffix| format!("-{suffix}"))
357                    .unwrap_or_default(),
358                ext.map(|ext| format!(".{ext}")).unwrap_or_default(),
359            )
360            .into(),
361        ))?;
362
363        page.push(PageSegment::PageType(PageType::Route))?;
364    }
365
366    Ok(page)
367}
368
369#[cfg(test)]
370mod test {
371    use super::{djb2_hash, format_radix, normalize_metadata_route};
372    use crate::next_app::AppPage;
373
374    #[test]
375    fn test_normalize_metadata_route() {
376        let cases = vec![
377            [
378                "/client/(meme)/more-route/twitter-image",
379                "/client/(meme)/more-route/twitter-image-769mad/route",
380            ],
381            [
382                "/client/(meme)/more-route/twitter-image2",
383                "/client/(meme)/more-route/twitter-image2-769mad/route",
384            ],
385            ["/robots.txt", "/robots.txt/route"],
386            ["/manifest.webmanifest", "/manifest.webmanifest/route"],
387        ];
388
389        for [input, expected] in cases {
390            let page = AppPage::parse(input).unwrap();
391            let normalized = normalize_metadata_route(page).unwrap();
392
393            assert_eq!(&normalized.to_string(), expected);
394        }
395    }
396
397    #[test]
398    fn test_format_radix_doesnt_panic_with_result_less_than_6_characters() {
399        let hash = format_radix(djb2_hash("/lookup/[domain]/(dns)"), 36);
400        assert!(hash.len() < 6);
401    }
402}