next_core/next_app/metadata/
mod.rs

1use std::{ops::Deref, sync::LazyLock};
2
3use anyhow::Result;
4use once_cell::sync::Lazy;
5use regex::Regex;
6use rustc_hash::FxHashMap;
7use turbo_rcstr::RcStr;
8use turbo_tasks_fs::FileSystemPath;
9
10use crate::next_app::{AppPage, PageSegment, PageType};
11
12pub mod image;
13pub mod route;
14
15pub static STATIC_LOCAL_METADATA: Lazy<FxHashMap<&'static str, &'static [&'static str]>> =
16    Lazy::new(|| {
17        FxHashMap::from_iter([
18            (
19                "icon",
20                &["ico", "jpg", "jpeg", "png", "svg"] as &'static [&'static str],
21            ),
22            ("apple-icon", &["jpg", "jpeg", "png"]),
23            ("opengraph-image", &["jpg", "jpeg", "png", "gif"]),
24            ("twitter-image", &["jpg", "jpeg", "png", "gif"]),
25            ("sitemap", &["xml"]),
26        ])
27    });
28
29pub static STATIC_GLOBAL_METADATA: Lazy<FxHashMap<&'static str, &'static [&'static str]>> =
30    Lazy::new(|| {
31        FxHashMap::from_iter([
32            ("favicon", &["ico"] as &'static [&'static str]),
33            ("manifest", &["webmanifest", "json"]),
34            ("robots", &["txt"]),
35        ])
36    });
37
38pub struct MetadataFileMatch<'a> {
39    pub metadata_type: &'a str,
40    pub number: Option<u32>,
41    pub dynamic: bool,
42}
43
44fn match_numbered_metadata(stem: &str) -> Option<(&str, &str)> {
45    static NUMBERED_METADATA_RE: LazyLock<Regex> = LazyLock::new(|| {
46        Regex::new("^(icon|apple-icon|opengraph-image|twitter-image)(\\d+)$").unwrap()
47    });
48    let captures = NUMBERED_METADATA_RE.captures(stem)?;
49    // these captures must be defined if `captures` is `Some(...)`.
50    let (stem, number) = (
51        captures.get(1).unwrap().as_str(),
52        captures.get(2).unwrap().as_str(),
53    );
54    Some((stem, number))
55}
56
57fn match_metadata_file<'a>(
58    filename: &'a str,
59    page_extensions: &[RcStr],
60    metadata: &FxHashMap<&str, &[&str]>,
61) -> Option<MetadataFileMatch<'a>> {
62    let (stem, ext) = filename.split_once('.')?;
63
64    let (stem, number) = match match_numbered_metadata(stem) {
65        Some((stem, number)) => {
66            let number: u32 = number.parse().ok()?;
67            (stem, Some(number))
68        }
69        _ => (stem, None),
70    };
71
72    let exts = metadata.get(stem)?;
73
74    // favicon can't be dynamic
75    if stem != "favicon" && page_extensions.iter().any(|e| e == ext) {
76        return Some(MetadataFileMatch {
77            metadata_type: stem,
78            number,
79            dynamic: true,
80        });
81    }
82
83    exts.contains(&ext).then_some(MetadataFileMatch {
84        metadata_type: stem,
85        number,
86        dynamic: false,
87    })
88}
89
90pub(crate) async fn get_content_type(path: FileSystemPath) -> Result<String> {
91    let stem = path.file_stem();
92    let mut ext = path.extension();
93
94    let name = stem.unwrap_or_default();
95    if ext == "jpg" {
96        ext = "jpeg"
97    }
98
99    if name == "favicon" && ext == "ico" {
100        return Ok("image/x-icon".to_string());
101    }
102    if name == "sitemap" {
103        return Ok("application/xml".to_string());
104    }
105    if name == "robots" {
106        return Ok("text/plain".to_string());
107    }
108    if name == "manifest" {
109        return Ok("application/manifest+json".to_string());
110    }
111
112    if ext == "png" || ext == "jpeg" || ext == "ico" || ext == "svg" {
113        return Ok(mime_guess::from_ext(ext)
114            .first_or_octet_stream()
115            .to_string());
116    }
117
118    Ok("text/plain".to_string())
119}
120
121pub fn match_local_metadata_file<'a>(
122    basename: &'a str,
123    page_extensions: &[RcStr],
124) -> Option<MetadataFileMatch<'a>> {
125    match_metadata_file(basename, page_extensions, STATIC_LOCAL_METADATA.deref())
126}
127
128pub struct GlobalMetadataFileMatch<'a> {
129    pub metadata_type: &'a str,
130    pub dynamic: bool,
131}
132
133pub fn match_global_metadata_file<'a>(
134    basename: &'a str,
135    page_extensions: &[RcStr],
136) -> Option<GlobalMetadataFileMatch<'a>> {
137    match_metadata_file(basename, page_extensions, STATIC_GLOBAL_METADATA.deref()).map(|m| {
138        GlobalMetadataFileMatch {
139            metadata_type: m.metadata_type,
140            dynamic: m.dynamic,
141        }
142    })
143}
144
145fn split_directory(path: &str) -> (Option<&str>, &str) {
146    if let Some((dir, basename)) = path.rsplit_once('/') {
147        if dir.is_empty() {
148            return (Some("/"), basename);
149        }
150
151        (Some(dir), basename)
152    } else {
153        (None, path)
154    }
155}
156
157fn filename(path: &str) -> &str {
158    split_directory(path).1
159}
160
161pub(crate) fn split_extension(path: &str) -> (&str, Option<&str>) {
162    let filename = filename(path);
163    if let Some((filename_before_extension, ext)) = filename.rsplit_once('.') {
164        if filename_before_extension.is_empty() {
165            return (filename, None);
166        }
167
168        (filename_before_extension, Some(ext))
169    } else {
170        (filename, None)
171    }
172}
173
174fn file_stem(path: &str) -> &str {
175    split_extension(path).0
176}
177
178/// When you only pass the file extension as `[]`, it will only match the static
179/// convention files e.g. `/robots.txt`, `/sitemap.xml`, `/favicon.ico`,
180/// `/manifest.json`.
181///
182/// When you pass the file extension as `['js', 'jsx', 'ts',
183/// 'tsx']`, it will also match the dynamic convention files e.g. /robots.js,
184/// /sitemap.tsx, /favicon.jsx, /manifest.ts.
185///
186/// When `withExtension` is false, it will match the static convention files
187/// without the extension, by default it's true e.g. /robots, /sitemap,
188/// /favicon, /manifest, use to match dynamic API routes like app/robots.ts.
189pub fn is_metadata_route_file(
190    app_dir_relative_path: &str,
191    page_extensions: &[RcStr],
192    with_extension: bool,
193) -> bool {
194    let (dir, filename) = split_directory(app_dir_relative_path);
195
196    if with_extension {
197        if match_local_metadata_file(filename, page_extensions).is_some() {
198            return true;
199        }
200    } else {
201        let stem = file_stem(filename);
202        let stem = match_numbered_metadata(stem)
203            .map(|(stem, _)| stem)
204            .unwrap_or(stem);
205
206        if STATIC_LOCAL_METADATA.contains_key(stem) {
207            return true;
208        }
209    }
210
211    if dir != Some("/") {
212        return false;
213    }
214
215    if with_extension {
216        if match_global_metadata_file(filename, page_extensions).is_some() {
217            return true;
218        }
219    } else {
220        let base_name = file_stem(filename);
221        if STATIC_GLOBAL_METADATA.contains_key(base_name) {
222            return true;
223        }
224    }
225
226    false
227}
228
229/// Remove the 'app' prefix or '/route' suffix, only check the route name since
230/// they're only allowed in root app directory
231///
232/// e.g.
233/// - /app/robots -> /robots
234/// - app/robots -> /robots
235/// - /robots -> /robots
236pub fn is_metadata_route(mut route: &str) -> bool {
237    if let Some(stripped) = route.strip_prefix("/app/") {
238        route = stripped;
239    } else if let Some(stripped) = route.strip_prefix("app/") {
240        route = stripped;
241    }
242
243    if let Some(stripped) = route.strip_suffix("/route") {
244        route = stripped;
245    }
246
247    let mut page = route.to_string();
248    if !page.starts_with('/') {
249        page = format!("/{page}");
250    }
251
252    !page.ends_with("/page") && is_metadata_route_file(&page, &[], false)
253}
254
255/// djb_2 hash implementation referenced from [here](http://www.cse.yorku.ca/~oz/hash.html)
256fn djb2_hash(str: &str) -> u32 {
257    str.chars().fold(5381, |hash, c| {
258        ((hash << 5).wrapping_add(hash)).wrapping_add(c as u32) // hash * 33 + c
259    })
260}
261
262// this is here to mirror next.js behaviour (`toString(36).slice(0, 6)`)
263fn format_radix(mut x: u32, radix: u32) -> String {
264    let mut result = vec![];
265
266    loop {
267        let m = x % radix;
268        x /= radix;
269
270        // will panic if you use a bad radix (< 2 or > 36).
271        result.push(std::char::from_digit(m, radix).unwrap());
272        if x == 0 {
273            break;
274        }
275    }
276
277    result.reverse();
278
279    // We only need the first 6 characters of the hash but sometimes the hash is too short.
280    // In JavaScript, we use `toString(36).slice(0, 6)` to get the first 6 characters of the hash,
281    // but it will automatically take the minimum of the length of the hash and 6. Rust will panic.
282    let len = result.len().min(6);
283    result[..len].iter().collect()
284}
285
286/// If there's special convention like (...) or @ in the page path,
287/// Give it a unique hash suffix to avoid conflicts
288///
289/// e.g.
290/// /opengraph-image -> /opengraph-image
291/// /(post)/opengraph-image.tsx -> /opengraph-image-[0-9a-z]{6}
292///
293/// Sitemap is an exception, it should not have a suffix.
294/// As the generated urls are for indexer and usually one sitemap contains all the urls of the sub
295/// routes. The sitemap should be unique in each level and not have a suffix.
296///
297/// /sitemap -> /sitemap
298/// /(post)/sitemap -> /sitemap
299fn get_metadata_route_suffix(page: &str) -> Option<String> {
300    // skip sitemap
301    if page.ends_with("/sitemap") || page.ends_with("/sitemap.xml") {
302        return None;
303    }
304
305    // Get the parent pathname of the page
306    let parent_pathname = split_directory(page).0.unwrap_or_default();
307    let segments = parent_pathname.split('/').collect::<Vec<&str>>();
308
309    // if any segment is group or parallel route segment, we should add a suffix.
310    if segments.iter().any(|segment| {
311        segment.starts_with('(') && segment.ends_with(')')
312            || segment.starts_with('@') && *segment != "@children"
313    }) {
314        Some(format_radix(djb2_hash(parent_pathname), 36))
315    } else {
316        None
317    }
318}
319
320/// Map metadata page key to the corresponding route
321///
322/// static file page key:    /app/robots.txt -> /robots.txt -> /robots.txt/route
323/// dynamic route page key:  /app/robots.tsx -> /robots -> /robots.txt/route
324pub fn normalize_metadata_route(mut page: AppPage) -> Result<AppPage> {
325    if !is_metadata_route(&format!("{page}")) {
326        return Ok(page);
327    }
328
329    let mut route = page.to_string();
330    let mut suffix: Option<String> = None;
331    if route == "/robots" {
332        route += ".txt"
333    } else if route == "/manifest" {
334        route += ".webmanifest"
335    } else {
336        suffix = get_metadata_route_suffix(&route);
337    }
338
339    // Support both /<metadata-route.ext> and custom routes
340    // /<metadata-route>/route.ts. If it's a metadata file route, we need to
341    // append /[id]/route to the page.
342    if !route.ends_with("/route") {
343        let (base_name, ext) = split_extension(&route);
344
345        page.0.pop();
346
347        page.push(PageSegment::Static(
348            format!(
349                "{}{}{}",
350                base_name,
351                suffix
352                    .map(|suffix| format!("-{suffix}"))
353                    .unwrap_or_default(),
354                ext.map(|ext| format!(".{ext}")).unwrap_or_default(),
355            )
356            .into(),
357        ))?;
358
359        page.push(PageSegment::PageType(PageType::Route))?;
360    }
361
362    Ok(page)
363}
364
365#[cfg(test)]
366mod test {
367    use super::{djb2_hash, format_radix, normalize_metadata_route};
368    use crate::next_app::AppPage;
369
370    #[test]
371    fn test_normalize_metadata_route() {
372        let cases = vec![
373            [
374                "/client/(meme)/more-route/twitter-image",
375                "/client/(meme)/more-route/twitter-image-769mad/route",
376            ],
377            [
378                "/client/(meme)/more-route/twitter-image2",
379                "/client/(meme)/more-route/twitter-image2-769mad/route",
380            ],
381            ["/robots.txt", "/robots.txt/route"],
382            ["/manifest.webmanifest", "/manifest.webmanifest/route"],
383        ];
384
385        for [input, expected] in cases {
386            let page = AppPage::parse(input).unwrap();
387            let normalized = normalize_metadata_route(page).unwrap();
388
389            assert_eq!(&normalized.to_string(), expected);
390        }
391    }
392
393    #[test]
394    fn test_format_radix_doesnt_panic_with_result_less_than_6_characters() {
395        let hash = format_radix(djb2_hash("/lookup/[domain]/(dns)"), 36);
396        assert!(hash.len() < 6);
397    }
398}