turbo_tasks_fs/
read_glob.rs

1use anyhow::{Result, bail};
2use futures::try_join;
3use rustc_hash::FxHashMap;
4use turbo_rcstr::RcStr;
5use turbo_tasks::{Completion, ResolvedVc, TryJoinIterExt, Vc};
6
7use crate::{DirectoryContent, DirectoryEntry, FileSystem, FileSystemPath, glob::Glob};
8
9#[turbo_tasks::value]
10#[derive(Default, Debug)]
11pub struct ReadGlobResult {
12    pub results: FxHashMap<String, DirectoryEntry>,
13    pub inner: FxHashMap<String, ResolvedVc<ReadGlobResult>>,
14}
15
16/// Reads matches of a glob pattern.
17///
18/// DETERMINISM: Result is in random order. Either sort result or do not depend
19/// on the order.
20#[turbo_tasks::function(fs)]
21pub async fn read_glob(directory: FileSystemPath, glob: Vc<Glob>) -> Result<Vc<ReadGlobResult>> {
22    read_glob_internal("", directory, glob).await
23}
24
25#[turbo_tasks::function(fs)]
26async fn read_glob_inner(
27    prefix: RcStr,
28    directory: FileSystemPath,
29    glob: Vc<Glob>,
30) -> Result<Vc<ReadGlobResult>> {
31    read_glob_internal(&prefix, directory, glob).await
32}
33
34// The `prefix` represents the relative directory path where symlinks are not resolve.
35async fn read_glob_internal(
36    prefix: &str,
37    directory: FileSystemPath,
38    glob: Vc<Glob>,
39) -> Result<Vc<ReadGlobResult>> {
40    let dir = directory.read_dir().await?;
41    let mut result = ReadGlobResult::default();
42    let glob_value = glob.await?;
43    match &*dir {
44        DirectoryContent::Entries(entries) => {
45            for (segment, entry) in entries.iter() {
46                // This is redundant with logic inside of `read_dir` but here we track it separately
47                // so we don't follow symlinks.
48                let entry_path: RcStr = if prefix.is_empty() {
49                    segment.clone()
50                } else {
51                    format!("{prefix}/{segment}").into()
52                };
53                let entry = resolve_symlink_safely(entry.clone()).await?;
54                if glob_value.matches(&entry_path) {
55                    result.results.insert(entry_path.to_string(), entry.clone());
56                }
57                if let DirectoryEntry::Directory(path) = entry
58                    && glob_value.can_match_in_directory(&entry_path)
59                {
60                    result.inner.insert(
61                        entry_path.to_string(),
62                        read_glob_inner(entry_path, path.clone(), glob)
63                            .to_resolved()
64                            .await?,
65                    );
66                }
67            }
68        }
69        DirectoryContent::NotFound => {}
70    }
71    Ok(ReadGlobResult::cell(result))
72}
73
74// Resolve a symlink checking for recursion.
75async fn resolve_symlink_safely(entry: DirectoryEntry) -> Result<DirectoryEntry> {
76    let resolved_entry = entry.clone().resolve_symlink().await?;
77    if resolved_entry != entry && matches!(&resolved_entry, DirectoryEntry::Directory(_)) {
78        // We followed a symlink to a directory
79        // To prevent an infinite loop, which in the case of turbo-tasks would simply
80        // exhaust RAM or go into an infinite loop with the GC we need to check for a
81        // recursive symlink, we need to check for recursion.
82
83        // Recursion can only occur if the symlink is a directory and points to an
84        // ancestor of the current path, which can be detected via a simple prefix
85        // match.
86        let source_path = entry.path().unwrap();
87        if source_path.is_inside_or_equal(&resolved_entry.clone().path().unwrap()) {
88            bail!(
89                "'{}' is a symlink causes that causes an infinite loop!",
90                source_path.path.to_string()
91            )
92        }
93    }
94    Ok(resolved_entry)
95}
96
97/// Traverses all directories that match the given `glob`.
98///
99/// This ensures that the calling task will be invalidated
100/// whenever the directories or contents of the directories change,
101///  but unlike read_glob doesn't accumulate data.
102#[turbo_tasks::function(fs)]
103pub async fn track_glob(
104    directory: FileSystemPath,
105    glob: Vc<Glob>,
106    include_dot_files: bool,
107) -> Result<Vc<Completion>> {
108    track_glob_internal("", directory, glob, include_dot_files).await
109}
110
111#[turbo_tasks::function(fs)]
112async fn track_glob_inner(
113    prefix: RcStr,
114    directory: FileSystemPath,
115    glob: Vc<Glob>,
116    include_dot_files: bool,
117) -> Result<Vc<Completion>> {
118    track_glob_internal(&prefix, directory, glob, include_dot_files).await
119}
120
121async fn track_glob_internal(
122    prefix: &str,
123    directory: FileSystemPath,
124    glob: Vc<Glob>,
125    include_dot_files: bool,
126) -> Result<Vc<Completion>> {
127    let dir = directory.read_dir().await?;
128    let glob_value = glob.await?;
129    let fs = directory.fs().to_resolved().await?;
130    let mut reads = Vec::new();
131    let mut completions = Vec::new();
132    let mut types = Vec::new();
133    match &*dir {
134        DirectoryContent::Entries(entries) => {
135            for (segment, entry) in entries.iter() {
136                if !include_dot_files && segment.starts_with('.') {
137                    continue;
138                }
139                // This is redundant with logic inside of `read_dir` but here we track it separately
140                // so we don't follow symlinks.
141                let entry_path = if prefix.is_empty() {
142                    segment.clone()
143                } else {
144                    format!("{prefix}/{segment}").into()
145                };
146
147                match resolve_symlink_safely(entry.clone()).await? {
148                    DirectoryEntry::Directory(path) => {
149                        if glob_value.can_match_in_directory(&entry_path) {
150                            completions.push(track_glob_inner(
151                                entry_path,
152                                path.clone(),
153                                glob,
154                                include_dot_files,
155                            ));
156                        }
157                    }
158                    DirectoryEntry::File(path) => {
159                        if glob_value.matches(&entry_path) {
160                            reads.push(fs.read(path.clone()))
161                        }
162                    }
163                    DirectoryEntry::Symlink(symlink_path) => unreachable!(
164                        "resolve_symlink_safely() should have resolved all symlinks, but found \
165                         unresolved symlink at path: '{}'. Found path: '{}'. Please report this \
166                         as a bug.",
167                        entry_path, symlink_path
168                    ),
169                    DirectoryEntry::Other(path) => {
170                        if glob_value.matches(&entry_path) {
171                            types.push(path.get_type())
172                        }
173                    }
174                    DirectoryEntry::Error => {}
175                }
176            }
177        }
178        DirectoryContent::NotFound => {}
179    }
180    try_join!(
181        reads.iter().try_join(),
182        types.iter().try_join(),
183        completions.iter().try_join()
184    )?;
185    Ok(Completion::new())
186}
187
188#[cfg(test)]
189pub mod tests {
190
191    use std::{
192        fs::{File, create_dir},
193        io::prelude::*,
194    };
195
196    use turbo_rcstr::RcStr;
197    use turbo_tasks::{Completion, ReadRef, Vc, apply_effects};
198    use turbo_tasks_backend::{BackendOptions, TurboTasksBackend, noop_backing_storage};
199
200    use crate::{
201        DirectoryEntry, DiskFileSystem, FileContent, FileSystem, FileSystemPath, glob::Glob,
202    };
203
204    #[tokio::test]
205    async fn read_glob_basic() {
206        crate::register();
207        let scratch = tempfile::tempdir().unwrap();
208        {
209            // Create a simple directory with 2 files, a subdirectory and a dotfile
210            let path = scratch.path();
211            File::create_new(path.join("foo"))
212                .unwrap()
213                .write_all(b"foo")
214                .unwrap();
215            create_dir(path.join("sub")).unwrap();
216            File::create_new(path.join("sub/bar"))
217                .unwrap()
218                .write_all(b"bar")
219                .unwrap();
220        }
221        let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new(
222            BackendOptions::default(),
223            noop_backing_storage(),
224        ));
225        let path: RcStr = scratch.path().to_str().unwrap().into();
226        tt.run_once(async {
227            let fs = Vc::upcast::<Box<dyn FileSystem>>(DiskFileSystem::new(
228                "temp".into(),
229                path,
230                Vec::new(),
231            ));
232            let read_dir = fs
233                .root()
234                .await?
235                .read_glob(Glob::new("**".into()))
236                .await
237                .unwrap();
238            assert_eq!(read_dir.results.len(), 2);
239            assert_eq!(
240                read_dir.results.get("foo"),
241                Some(&DirectoryEntry::File(fs.root().await?.join("foo")?))
242            );
243            assert_eq!(
244                read_dir.results.get("sub"),
245                Some(&DirectoryEntry::Directory(fs.root().await?.join("sub")?))
246            );
247            assert_eq!(read_dir.inner.len(), 1);
248            let inner = &*read_dir.inner.get("sub").unwrap().await?;
249            assert_eq!(inner.results.len(), 1);
250            assert_eq!(
251                inner.results.get("sub/bar"),
252                Some(&DirectoryEntry::File(fs.root().await?.join("sub/bar")?))
253            );
254            assert_eq!(inner.inner.len(), 0);
255
256            // Now with a more specific pattern
257            let read_dir = fs
258                .root()
259                .await?
260                .read_glob(Glob::new("**/bar".into()))
261                .await
262                .unwrap();
263            assert_eq!(read_dir.results.len(), 0);
264            assert_eq!(read_dir.inner.len(), 1);
265            let inner = &*read_dir.inner.get("sub").unwrap().await?;
266            assert_eq!(inner.results.len(), 1);
267            assert_eq!(
268                inner.results.get("sub/bar"),
269                Some(&DirectoryEntry::File(fs.root().await?.join("sub/bar")?))
270            );
271            assert_eq!(inner.inner.len(), 0);
272
273            anyhow::Ok(())
274        })
275        .await
276        .unwrap();
277    }
278
279    #[cfg(unix)]
280    #[tokio::test]
281    async fn read_glob_symlinks() {
282        crate::register();
283        let scratch = tempfile::tempdir().unwrap();
284        {
285            use std::os::unix::fs::symlink;
286
287            // Create a simple directory with 1 file and a symlink pointing at at a file in a
288            // subdirectory
289            let path = scratch.path();
290            create_dir(path.join("sub")).unwrap();
291            let foo = path.join("sub/foo.js");
292            File::create_new(&foo).unwrap().write_all(b"foo").unwrap();
293            symlink(&foo, path.join("link.js")).unwrap();
294        }
295        let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new(
296            BackendOptions::default(),
297            noop_backing_storage(),
298        ));
299        let path: RcStr = scratch.path().to_str().unwrap().into();
300        tt.run_once(async {
301            let fs = Vc::upcast::<Box<dyn FileSystem>>(DiskFileSystem::new(
302                "temp".into(),
303                path,
304                Vec::new(),
305            ));
306            let read_dir = fs
307                .root()
308                .await?
309                .read_glob(Glob::new("*.js".into()))
310                .await
311                .unwrap();
312            assert_eq!(read_dir.results.len(), 1);
313            assert_eq!(
314                read_dir.results.get("link.js"),
315                Some(&DirectoryEntry::File(fs.root().await?.join("sub/foo.js")?))
316            );
317            assert_eq!(read_dir.inner.len(), 0);
318
319            anyhow::Ok(())
320        })
321        .await
322        .unwrap();
323    }
324
325    #[turbo_tasks::function(operation)]
326    pub async fn delete(path: FileSystemPath) -> anyhow::Result<()> {
327        path.write(FileContent::NotFound.cell()).await?;
328        Ok(())
329    }
330
331    #[turbo_tasks::function(operation)]
332    pub async fn write(path: FileSystemPath, contents: RcStr) -> anyhow::Result<()> {
333        path.write(
334            FileContent::Content(crate::File::from_bytes(contents.to_string().into_bytes())).cell(),
335        )
336        .await?;
337        Ok(())
338    }
339
340    #[turbo_tasks::function(operation)]
341    pub fn track_star_star_glob(path: FileSystemPath) -> Vc<Completion> {
342        path.track_glob(Glob::new("**".into()), false)
343    }
344
345    #[cfg(unix)]
346    #[tokio::test]
347    async fn track_glob_invalidations() {
348        use std::os::unix::fs::symlink;
349        crate::register();
350        let scratch = tempfile::tempdir().unwrap();
351
352        // Create a simple directory with 2 files, a subdirectory and a dotfile
353        let path = scratch.path();
354        let dir = path.join("dir");
355        create_dir(&dir).unwrap();
356        File::create_new(dir.join("foo"))
357            .unwrap()
358            .write_all(b"foo")
359            .unwrap();
360        create_dir(dir.join("sub")).unwrap();
361        File::create_new(dir.join("sub/bar"))
362            .unwrap()
363            .write_all(b"bar")
364            .unwrap();
365        // Add a dotfile
366        create_dir(dir.join("sub/.vim")).unwrap();
367        let gitignore = dir.join("sub/.vim/.gitignore");
368        File::create_new(&gitignore)
369            .unwrap()
370            .write_all(b"ignore")
371            .unwrap();
372        // put a link in the dir that points at a file in the root.
373        let link_target = path.join("link_target.js");
374        File::create_new(&link_target)
375            .unwrap()
376            .write_all(b"link_target")
377            .unwrap();
378        symlink(&link_target, dir.join("link.js")).unwrap();
379
380        let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new(
381            BackendOptions::default(),
382            noop_backing_storage(),
383        ));
384        let path: RcStr = scratch.path().to_str().unwrap().into();
385        tt.run_once(async {
386            let fs = Vc::upcast::<Box<dyn FileSystem>>(DiskFileSystem::new(
387                "temp".into(),
388                path,
389                Vec::new(),
390            ));
391            let dir = fs.root().await?.join("dir")?;
392            let read_dir = track_star_star_glob(dir.clone())
393                .read_strongly_consistent()
394                .await?;
395
396            // Delete a file that we shouldn't be tracking
397            let delete_result = delete(fs.root().await?.join("dir/sub/.vim/.gitignore")?);
398            delete_result.read_strongly_consistent().await?;
399            apply_effects(delete_result).await?;
400
401            let read_dir2 = track_star_star_glob(dir.clone())
402                .read_strongly_consistent()
403                .await?;
404            assert!(ReadRef::ptr_eq(&read_dir, &read_dir2));
405
406            // Delete a file that we should be tracking
407            let delete_result = delete(fs.root().await?.join("dir/foo")?);
408            delete_result.read_strongly_consistent().await?;
409            apply_effects(delete_result).await?;
410
411            let read_dir2 = track_star_star_glob(dir.clone())
412                .read_strongly_consistent()
413                .await?;
414
415            assert!(!ReadRef::ptr_eq(&read_dir, &read_dir2));
416
417            // Modify a symlink target file
418            let write_result = write(
419                fs.root().await?.join("link_target.js")?,
420                "new_contents".into(),
421            );
422            write_result.read_strongly_consistent().await?;
423            apply_effects(write_result).await?;
424            let read_dir3 = track_star_star_glob(dir.clone())
425                .read_strongly_consistent()
426                .await?;
427
428            assert!(!ReadRef::ptr_eq(&read_dir3, &read_dir2));
429
430            anyhow::Ok(())
431        })
432        .await
433        .unwrap();
434    }
435
436    #[cfg(unix)]
437    #[tokio::test]
438    async fn track_glob_symlinks_loop() {
439        crate::register();
440        let scratch = tempfile::tempdir().unwrap();
441        {
442            use std::os::unix::fs::symlink;
443
444            // Create a simple directory with 1 file and a symlink pointing at at a file in a
445            // subdirectory
446            let path = scratch.path();
447            let sub = &path.join("sub");
448            create_dir(sub).unwrap();
449            let foo = sub.join("foo.js");
450            File::create_new(&foo).unwrap().write_all(b"foo").unwrap();
451            // put a link in sub that points back at its parent director
452            symlink(sub, sub.join("link")).unwrap();
453        }
454        let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new(
455            BackendOptions::default(),
456            noop_backing_storage(),
457        ));
458        let path: RcStr = scratch.path().to_str().unwrap().into();
459        tt.run_once(async {
460            let fs = Vc::upcast::<Box<dyn FileSystem>>(DiskFileSystem::new(
461                "temp".into(),
462                path,
463                Vec::new(),
464            ));
465            let err = fs
466                .root()
467                .await?
468                .track_glob(Glob::new("**".into()), false)
469                .await
470                .expect_err("Should have detected an infinite loop");
471
472            assert_eq!(
473                "'sub/link' is a symlink causes that causes an infinite loop!",
474                format!("{}", err.root_cause())
475            );
476
477            // Same when calling track glob
478            let err = fs
479                .root()
480                .await?
481                .track_glob(Glob::new("**".into()), false)
482                .await
483                .expect_err("Should have detected an infinite loop");
484
485            assert_eq!(
486                "'sub/link' is a symlink causes that causes an infinite loop!",
487                format!("{}", err.root_cause())
488            );
489
490            anyhow::Ok(())
491        })
492        .await
493        .unwrap();
494    }
495
496    #[cfg(unix)]
497    #[tokio::test]
498    async fn read_glob_symlinks_loop() {
499        crate::register();
500        let scratch = tempfile::tempdir().unwrap();
501        {
502            use std::os::unix::fs::symlink;
503
504            // Create a simple directory with 1 file and a symlink pointing at at a file in a
505            // subdirectory
506            let path = scratch.path();
507            let sub = &path.join("sub");
508            create_dir(sub).unwrap();
509            let foo = sub.join("foo.js");
510            File::create_new(&foo).unwrap().write_all(b"foo").unwrap();
511            // put a link in sub that points back at its parent director
512            symlink(sub, sub.join("link")).unwrap();
513        }
514        let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new(
515            BackendOptions::default(),
516            noop_backing_storage(),
517        ));
518        let path: RcStr = scratch.path().to_str().unwrap().into();
519        tt.run_once(async {
520            let fs = Vc::upcast::<Box<dyn FileSystem>>(DiskFileSystem::new(
521                "temp".into(),
522                path,
523                Vec::new(),
524            ));
525            let err = fs
526                .root()
527                .await?
528                .read_glob(Glob::new("**".into()))
529                .await
530                .expect_err("Should have detected an infinite loop");
531
532            assert_eq!(
533                "'sub/link' is a symlink causes that causes an infinite loop!",
534                format!("{}", err.root_cause())
535            );
536
537            // Same when calling track glob
538            let err = fs
539                .root()
540                .await?
541                .track_glob(Glob::new("**".into()), false)
542                .await
543                .expect_err("Should have detected an infinite loop");
544
545            assert_eq!(
546                "'sub/link' is a symlink causes that causes an infinite loop!",
547                format!("{}", err.root_cause())
548            );
549
550            anyhow::Ok(())
551        })
552        .await
553        .unwrap();
554    }
555}