turbo_tasks_backend/database/
db_versioning.rs

1use std::{
2    env,
3    ffi::{OsStr, OsString},
4    fs::{DirEntry, read_dir, remove_dir_all, rename},
5    path::{Path, PathBuf},
6    time::Duration,
7};
8
9use anyhow::Result;
10
11/// Information gathered by `vergen_gitcl` in the top-level binary crate and passed down. This
12/// information must be computed in the top-level crate for cargo incremental compilation to work
13/// correctly.
14///
15/// See `crates/napi/build.rs` for details.
16pub struct GitVersionInfo<'a> {
17    /// Output of `git describe --match 'v[0-9]' --dirty`.
18    pub describe: &'a str,
19    /// Is the git repository dirty? Always forced to `false` when the `CI` environment variable is
20    /// set and non-empty.
21    pub dirty: bool,
22}
23
24/// Specifies many databases that have a different version than the current one are retained.
25/// For example if `DEFAULT_MAX_OTHER_DB_VERSIONS` is 2, there can be at most 3 databases in the
26/// directory, the current one and two older/newer ones. On CI it never keeps any other versions.
27const DEFAULT_MAX_OTHER_DB_VERSIONS: usize = 2;
28
29/// Directories are prefixed with this before being deleted, so that if we fail to fully delete the
30/// directory, we can pick up where we left off last time.
31const DELETION_PREFIX: &str = "__stale_";
32
33/// Given a base path, creates a version directory for the given `version_info`. Automatically
34/// cleans up old/stale databases.
35///
36/// **Environment Variables**
37/// - `TURBO_ENGINE_VERSION`: Forces use of a specific database version.
38/// - `TURBO_ENGINE_IGNORE_DIRTY`: Enable persistent caching in a dirty git repository. Otherwise a
39///   temporary directory is created.
40/// - `TURBO_ENGINE_DISABLE_VERSIONING`: Ignores versioning and always uses the same "unversioned"
41///   database when set.
42pub fn handle_db_versioning(
43    base_path: &Path,
44    version_info: &GitVersionInfo,
45    is_ci: bool,
46) -> Result<PathBuf> {
47    if let Ok(version) = env::var("TURBO_ENGINE_VERSION") {
48        return Ok(base_path.join(version));
49    }
50    let ignore_dirty = env::var("TURBO_ENGINE_IGNORE_DIRTY").ok().is_some();
51    let disabled_versioning = env::var("TURBO_ENGINE_DISABLE_VERSIONING").ok().is_some();
52    let version = if disabled_versioning {
53        println!(
54            "WARNING: Persistent Caching versioning is disabled. Manual removal of the persistent \
55             caching database might be required."
56        );
57        Some("unversioned")
58    } else if !version_info.dirty {
59        Some(version_info.describe)
60    } else if ignore_dirty {
61        println!(
62            "WARNING: The git repository is dirty, but Persistent Caching is still enabled. \
63             Manual removal of the persistent caching database might be required."
64        );
65        Some(version_info.describe)
66    } else {
67        println!(
68            "WARNING: The git repository is dirty: Persistent Caching is disabled. Use \
69             TURBO_ENGINE_IGNORE_DIRTY=1 to ignore dirtiness of the repository."
70        );
71        None
72    };
73    let path;
74    if let Some(version) = version {
75        path = base_path.join(version);
76
77        let max_other_db_versions = if is_ci {
78            0
79        } else {
80            DEFAULT_MAX_OTHER_DB_VERSIONS
81        };
82
83        if let Ok(read_dir) = read_dir(base_path) {
84            let mut old_dbs = Vec::new();
85            for entry in read_dir {
86                let Ok(entry) = entry else { continue };
87
88                // skip our target version (if it exists)
89                let name = entry.file_name();
90                if name == version {
91                    continue;
92                }
93
94                // skip non-directories
95                let Ok(file_type) = entry.file_type() else {
96                    continue;
97                };
98                if !file_type.is_dir() {
99                    continue;
100                }
101
102                // Find and try to finish removing any partially deleted directories
103                if name
104                    .as_encoded_bytes()
105                    .starts_with(AsRef::<OsStr>::as_ref(DELETION_PREFIX).as_encoded_bytes())
106                {
107                    // failures during cleanup of a cache directory are not fatal
108                    let _ = remove_dir_all(entry.path());
109                    continue;
110                }
111
112                old_dbs.push(entry);
113            }
114
115            if old_dbs.len() > max_other_db_versions {
116                old_dbs.sort_by_cached_key(|entry| {
117                    fn get_age(e: &DirEntry) -> Result<Duration> {
118                        let m = e.metadata()?;
119                        // Maybe change this: We care more about the atime/mtime of the files inside
120                        // the directory than the directory itself. atime is also fragile because it
121                        // can be impacted by recursive scanning tools (e.g. ripgrep). It might be
122                        // better for us to always explicitly touch a specific file inside the
123                        // versioned directory when reading the cache, and then use that file's
124                        // mtime.
125                        Ok(m.accessed().or_else(|_| m.modified())?.elapsed()?)
126                    }
127                    get_age(entry).unwrap_or(Duration::MAX)
128                });
129                for entry in old_dbs.into_iter().skip(max_other_db_versions) {
130                    let mut new_name = OsString::from(DELETION_PREFIX);
131                    new_name.push(entry.file_name());
132                    let new_path = base_path.join(new_name);
133                    // rename first, it's an atomic operation
134                    let rename_result = rename(entry.path(), &new_path);
135                    // Only try to delete the files if the rename succeeded, it's not safe to delete
136                    // contents if we didn't manage to first poison the directory by renaming it.
137                    if rename_result.is_ok() {
138                        // It's okay if this fails, as we've already poisoned the directory.
139                        let _ = remove_dir_all(&new_path);
140                    }
141                }
142            }
143        }
144    } else {
145        path = base_path.join("temp");
146        // propagate errors: if this fails we may have stale files left over in the temp directory
147        remove_dir_all(&path)?;
148    }
149
150    Ok(path)
151}
152
153#[cfg(test)]
154mod tests {
155    use std::{fs, thread::sleep};
156
157    use rstest::rstest;
158    use tempfile::TempDir;
159
160    use super::*;
161
162    fn count_entries(base_path: &Path) -> usize {
163        fs::read_dir(base_path)
164            .unwrap()
165            .collect::<Result<Vec<_>, _>>()
166            .unwrap()
167            .len()
168    }
169
170    #[rstest]
171    #[case::not_ci(false, DEFAULT_MAX_OTHER_DB_VERSIONS)]
172    #[case::ci(true, 0)]
173    fn test_max_versions(#[case] is_ci: bool, #[case] max_other_db_versions: usize) {
174        let tmp_dir = TempDir::new().unwrap();
175        let base_path = tmp_dir.path();
176        let current_version_name = "mock-version";
177
178        let version_info = GitVersionInfo {
179            describe: current_version_name,
180            dirty: false,
181        };
182
183        fs::create_dir(base_path.join(current_version_name)).unwrap();
184
185        // sleep to ensure `current_version_name` has the oldest atime/mtime
186        // it should be preserved regardless of atime/mtime
187        sleep(Duration::from_millis(100));
188
189        let num_other_dirs = max_other_db_versions + 3;
190        for i in 0..num_other_dirs {
191            fs::create_dir(base_path.join(format!("other-dir-{i}"))).unwrap();
192        }
193
194        assert_eq!(
195            count_entries(base_path),
196            num_other_dirs + 1, // +1 for current version
197        );
198
199        let versioned_path = handle_db_versioning(base_path, &version_info, is_ci).unwrap();
200
201        assert_eq!(versioned_path, base_path.join(current_version_name));
202        assert!(base_path.join(current_version_name).exists());
203        assert_eq!(
204            count_entries(base_path),
205            max_other_db_versions + 1, // +1 for current version
206        );
207    }
208
209    #[test]
210    fn test_cleanup_of_prefixed_items() {
211        let tmp_dir = TempDir::new().unwrap();
212        let base_path = tmp_dir.path();
213        let current_version_name = "mock-version";
214
215        let version_info = GitVersionInfo {
216            describe: current_version_name,
217            dirty: false,
218        };
219
220        for i in 0..5 {
221            fs::create_dir(base_path.join(format!("{DELETION_PREFIX}other-dir-{i}"))).unwrap();
222        }
223
224        assert_eq!(count_entries(base_path), 5);
225
226        handle_db_versioning(base_path, &version_info, /* is_ci */ false).unwrap();
227
228        assert_eq!(count_entries(base_path), 0);
229    }
230}