turbo_rcstr/
lib.rs

1use std::{
2    borrow::{Borrow, Cow},
3    ffi::OsStr,
4    fmt::{Debug, Display},
5    hash::{Hash, Hasher},
6    mem::{ManuallyDrop, forget},
7    num::NonZeroU8,
8    ops::Deref,
9    path::{Path, PathBuf},
10};
11
12use bytes_str::BytesStr;
13use debug_unreachable::debug_unreachable;
14use serde::{Deserialize, Deserializer, Serialize, Serializer};
15use shrink_to_fit::ShrinkToFit;
16use triomphe::Arc;
17use turbo_tasks_hash::{DeterministicHash, DeterministicHasher};
18
19use crate::{
20    dynamic::{deref_from, new_atom},
21    tagged_value::TaggedValue,
22};
23
24mod dynamic;
25mod tagged_value;
26
27/// An immutable reference counted [`String`], similar to [`Arc<String>`][std::sync::Arc].
28///
29/// This is the preferred immutable string type for [`turbo_tasks::function`][func] arguments and
30/// inside of [`turbo_tasks::value`][value].
31///
32/// As turbo-tasks must store copies of function arguments to enable caching, non-reference counted
33/// [`String`]s would incur frequent cloning. Reference counting typically decreases memory
34/// consumption and CPU time in these cases.
35///
36/// [func]: https://turbopack-rust-docs.vercel.sh/rustdoc/turbo_tasks/attr.function.html
37/// [value]: https://turbopack-rust-docs.vercel.sh/rustdoc/turbo_tasks/attr.value.html
38///
39/// ## Conversion
40///
41/// Converting a `String` or `&str` to an `RcStr` can be performed using `.into()`,
42/// `RcStr::from(...)`, or the `rcstr!` macro.
43///
44/// ```
45/// # use turbo_rcstr::RcStr;
46/// #
47/// let s = "foo";
48/// let rc_s1: RcStr = s.into();
49/// let rc_s2 = RcStr::from(s);
50/// let rc_s3 = rcstr!("foo");
51/// assert_eq!(rc_s1, rc_s2);
52/// ```
53///
54/// Generally speaking you should
55///  * use `rcstr!` when converting a `const`-compatible `str`
56///  * use `RcStr::from` for readability
57///  * use `.into()` when context makes it clear.
58///
59/// Converting from an [`RcStr`] to a `&str` should be done with [`RcStr::as_str`]. Converting to a
60/// `String` should be done with [`RcStr::into_owned`].
61///
62/// ## Future Optimizations
63///
64/// This type is intentionally opaque to allow for optimizations to the underlying representation.
65/// Future implementations may use inline representations or interning.
66//
67// If you want to change the underlying string type to `Arc<str>`, please ensure that you profile
68// performance. The current implementation offers very cheap `String -> RcStr -> String`, meaning we
69// only pay for the allocation for `Arc` when we pass `format!("").into()` to a function.
70pub struct RcStr {
71    unsafe_data: TaggedValue,
72}
73
74unsafe impl Send for RcStr {}
75unsafe impl Sync for RcStr {}
76
77const DYNAMIC_TAG: u8 = 0b_00;
78const INLINE_TAG: u8 = 0b_01; // len in upper nybble
79const INLINE_TAG_INIT: NonZeroU8 = NonZeroU8::new(INLINE_TAG).unwrap();
80const TAG_MASK: u8 = 0b_11;
81const LEN_OFFSET: usize = 4;
82const LEN_MASK: u8 = 0xf0;
83
84impl RcStr {
85    #[inline(always)]
86    fn tag(&self) -> u8 {
87        self.unsafe_data.tag() & TAG_MASK
88    }
89
90    #[inline(never)]
91    pub fn as_str(&self) -> &str {
92        match self.tag() {
93            DYNAMIC_TAG => unsafe { dynamic::deref_from(self.unsafe_data).value.as_str() },
94            INLINE_TAG => {
95                let len = (self.unsafe_data.tag() & LEN_MASK) >> LEN_OFFSET;
96                let src = self.unsafe_data.data();
97                unsafe { std::str::from_utf8_unchecked(&src[..(len as usize)]) }
98            }
99            _ => unsafe { debug_unreachable!() },
100        }
101    }
102
103    /// Returns an owned mutable [`String`].
104    ///
105    /// This implementation is more efficient than [`ToString::to_string`]:
106    ///
107    /// - If the reference count is 1, the `Arc` can be unwrapped, giving ownership of the
108    ///   underlying string without cloning in `O(1)` time.
109    /// - This avoids some of the potential overhead of the `Display` trait.
110    pub fn into_owned(self) -> String {
111        match self.tag() {
112            DYNAMIC_TAG => {
113                // convert `self` into `arc`
114                let arc = unsafe { dynamic::restore_arc(ManuallyDrop::new(self).unsafe_data) };
115                match Arc::try_unwrap(arc) {
116                    Ok(v) => v.value,
117                    Err(arc) => arc.value.to_string(),
118                }
119            }
120            INLINE_TAG => self.as_str().to_string(),
121            _ => unsafe { debug_unreachable!() },
122        }
123    }
124
125    pub fn map(self, f: impl FnOnce(String) -> String) -> Self {
126        RcStr::from(Cow::Owned(f(self.into_owned())))
127    }
128
129    #[inline]
130    pub(crate) fn from_alias(alias: TaggedValue) -> Self {
131        if alias.tag() & TAG_MASK == DYNAMIC_TAG {
132            unsafe {
133                let arc = dynamic::restore_arc(alias);
134                forget(arc.clone());
135                forget(arc);
136            }
137        }
138
139        Self { unsafe_data: alias }
140    }
141}
142
143impl DeterministicHash for RcStr {
144    fn deterministic_hash<H: DeterministicHasher>(&self, state: &mut H) {
145        state.write_usize(self.len());
146        state.write_bytes(self.as_bytes());
147    }
148}
149
150impl Deref for RcStr {
151    type Target = str;
152
153    fn deref(&self) -> &Self::Target {
154        self.as_str()
155    }
156}
157
158impl Borrow<str> for RcStr {
159    fn borrow(&self) -> &str {
160        self.as_str()
161    }
162}
163
164impl From<BytesStr> for RcStr {
165    fn from(s: BytesStr) -> Self {
166        let bytes: Vec<u8> = s.into_bytes().into();
167        RcStr::from(unsafe {
168            // Safety: BytesStr are valid utf-8
169            String::from_utf8_unchecked(bytes)
170        })
171    }
172}
173
174impl From<Arc<String>> for RcStr {
175    fn from(s: Arc<String>) -> Self {
176        match Arc::try_unwrap(s) {
177            Ok(v) => new_atom(Cow::Owned(v)),
178            Err(arc) => new_atom(Cow::Borrowed(&**arc)),
179        }
180    }
181}
182
183impl From<String> for RcStr {
184    fn from(s: String) -> Self {
185        new_atom(Cow::Owned(s))
186    }
187}
188
189impl From<&'_ str> for RcStr {
190    fn from(s: &str) -> Self {
191        new_atom(Cow::Borrowed(s))
192    }
193}
194
195impl From<Cow<'_, str>> for RcStr {
196    fn from(s: Cow<str>) -> Self {
197        new_atom(s)
198    }
199}
200
201/// Mimic `&str`
202impl AsRef<Path> for RcStr {
203    fn as_ref(&self) -> &Path {
204        self.as_str().as_ref()
205    }
206}
207
208/// Mimic `&str`
209impl AsRef<OsStr> for RcStr {
210    fn as_ref(&self) -> &OsStr {
211        self.as_str().as_ref()
212    }
213}
214
215/// Mimic `&str`
216impl AsRef<[u8]> for RcStr {
217    fn as_ref(&self) -> &[u8] {
218        self.as_str().as_ref()
219    }
220}
221
222impl PartialEq<str> for RcStr {
223    fn eq(&self, other: &str) -> bool {
224        self.as_str() == other
225    }
226}
227
228impl PartialEq<&'_ str> for RcStr {
229    fn eq(&self, other: &&str) -> bool {
230        self.as_str() == *other
231    }
232}
233
234impl PartialEq<String> for RcStr {
235    fn eq(&self, other: &String) -> bool {
236        self.as_str() == other.as_str()
237    }
238}
239
240impl Debug for RcStr {
241    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
242        Debug::fmt(&self.as_str(), f)
243    }
244}
245
246impl Display for RcStr {
247    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
248        Display::fmt(&self.as_str(), f)
249    }
250}
251
252impl From<RcStr> for String {
253    fn from(s: RcStr) -> Self {
254        s.into_owned()
255    }
256}
257
258impl From<RcStr> for PathBuf {
259    fn from(s: RcStr) -> Self {
260        String::from(s).into()
261    }
262}
263
264impl Clone for RcStr {
265    #[inline(always)]
266    fn clone(&self) -> Self {
267        Self::from_alias(self.unsafe_data)
268    }
269}
270
271impl Default for RcStr {
272    fn default() -> Self {
273        rcstr!("")
274    }
275}
276
277impl PartialEq for RcStr {
278    fn eq(&self, other: &Self) -> bool {
279        match (self.tag(), other.tag()) {
280            (DYNAMIC_TAG, DYNAMIC_TAG) => {
281                let l = unsafe { deref_from(self.unsafe_data) };
282                let r = unsafe { deref_from(other.unsafe_data) };
283                l.hash == r.hash && l.value == r.value
284            }
285            (INLINE_TAG, INLINE_TAG) => self.unsafe_data == other.unsafe_data,
286            _ => false,
287        }
288    }
289}
290
291impl Eq for RcStr {}
292
293impl PartialOrd for RcStr {
294    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
295        Some(self.cmp(other))
296    }
297}
298
299impl Ord for RcStr {
300    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
301        self.as_str().cmp(other.as_str())
302    }
303}
304
305impl Hash for RcStr {
306    fn hash<H: Hasher>(&self, state: &mut H) {
307        match self.tag() {
308            DYNAMIC_TAG => {
309                let l = unsafe { deref_from(self.unsafe_data) };
310                state.write_u64(l.hash);
311                state.write_u8(0xff);
312            }
313            INLINE_TAG => {
314                self.as_str().hash(state);
315            }
316            _ => unsafe { debug_unreachable!() },
317        }
318    }
319}
320
321impl Serialize for RcStr {
322    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
323        serializer.serialize_str(self.as_str())
324    }
325}
326
327impl<'de> Deserialize<'de> for RcStr {
328    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
329        let s = String::deserialize(deserializer)?;
330        Ok(RcStr::from(s))
331    }
332}
333
334impl Drop for RcStr {
335    fn drop(&mut self) {
336        if self.tag() == DYNAMIC_TAG {
337            unsafe { drop(dynamic::restore_arc(self.unsafe_data)) }
338        }
339    }
340}
341
342#[doc(hidden)]
343pub const fn inline_atom(s: &str) -> Option<RcStr> {
344    dynamic::inline_atom(s)
345}
346
347/// Create an rcstr from a string literal.
348/// allocates the RcStr inline when possible otherwise uses a `LazyLock` to manage the allocation.
349#[macro_export]
350macro_rules! rcstr {
351    ($s:expr) => {{
352        const INLINE: core::option::Option<$crate::RcStr> = $crate::inline_atom($s);
353        // this condition should be able to be compile time evaluated and inlined.
354        if INLINE.is_some() {
355            INLINE.unwrap()
356        } else {
357            #[inline(never)]
358            fn get_rcstr() -> $crate::RcStr {
359                static CACHE: std::sync::LazyLock<$crate::RcStr> =
360                    std::sync::LazyLock::new(|| $crate::RcStr::from($s));
361
362                (*CACHE).clone()
363            }
364            get_rcstr()
365        }
366    }};
367}
368
369/// noop
370impl ShrinkToFit for RcStr {
371    #[inline(always)]
372    fn shrink_to_fit(&mut self) {}
373}
374
375#[cfg(all(feature = "napi", target_family = "wasm"))]
376compile_error!("The napi feature cannot be enabled for wasm targets");
377
378#[cfg(all(feature = "napi", not(target_family = "wasm")))]
379mod napi_impl {
380    use napi::{
381        bindgen_prelude::{FromNapiValue, ToNapiValue, TypeName, ValidateNapiValue},
382        sys::{napi_env, napi_value},
383    };
384
385    use super::*;
386
387    impl TypeName for RcStr {
388        fn type_name() -> &'static str {
389            String::type_name()
390        }
391
392        fn value_type() -> napi::ValueType {
393            String::value_type()
394        }
395    }
396
397    impl ToNapiValue for RcStr {
398        unsafe fn to_napi_value(env: napi_env, val: Self) -> napi::Result<napi_value> {
399            unsafe { ToNapiValue::to_napi_value(env, val.as_str()) }
400        }
401    }
402
403    impl FromNapiValue for RcStr {
404        unsafe fn from_napi_value(env: napi_env, napi_val: napi_value) -> napi::Result<Self> {
405            Ok(RcStr::from(unsafe {
406                String::from_napi_value(env, napi_val)
407            }?))
408        }
409    }
410
411    impl ValidateNapiValue for RcStr {
412        unsafe fn validate(env: napi_env, napi_val: napi_value) -> napi::Result<napi_value> {
413            unsafe { String::validate(env, napi_val) }
414        }
415    }
416}
417
418#[cfg(test)]
419mod tests {
420    use std::mem::ManuallyDrop;
421
422    use super::*;
423
424    #[test]
425    fn test_refcount() {
426        fn refcount(str: &RcStr) -> usize {
427            assert!(str.tag() == DYNAMIC_TAG);
428            let arc = ManuallyDrop::new(unsafe { dynamic::restore_arc(str.unsafe_data) });
429            triomphe::Arc::count(&arc)
430        }
431
432        let str = RcStr::from("this is a long string that won't be inlined");
433
434        assert_eq!(refcount(&str), 1);
435        assert_eq!(refcount(&str), 1); // refcount should not modify the refcount itself
436
437        let cloned_str = str.clone();
438        assert_eq!(refcount(&str), 2);
439
440        drop(cloned_str);
441        assert_eq!(refcount(&str), 1);
442
443        let _ = str.clone().into_owned();
444        assert_eq!(refcount(&str), 1);
445    }
446
447    #[test]
448    fn test_rcstr() {
449        // Test enough to exceed the small string optimization
450        assert_eq!(rcstr!(""), RcStr::default());
451        assert_eq!(rcstr!(""), RcStr::from(""));
452        assert_eq!(rcstr!("a"), RcStr::from("a"));
453        assert_eq!(rcstr!("ab"), RcStr::from("ab"));
454        assert_eq!(rcstr!("abc"), RcStr::from("abc"));
455        assert_eq!(rcstr!("abcd"), RcStr::from("abcd"));
456        assert_eq!(rcstr!("abcde"), RcStr::from("abcde"));
457        assert_eq!(rcstr!("abcdef"), RcStr::from("abcdef"));
458        assert_eq!(rcstr!("abcdefg"), RcStr::from("abcdefg"));
459        assert_eq!(rcstr!("abcdefgh"), RcStr::from("abcdefgh"));
460        assert_eq!(rcstr!("abcdefghi"), RcStr::from("abcdefghi"));
461    }
462
463    #[test]
464    fn test_inline_atom() {
465        // This is a silly test, just asserts that we can evaluate this in a constant context.
466        const STR: RcStr = {
467            let inline = inline_atom("hello");
468            if inline.is_some() {
469                inline.unwrap()
470            } else {
471                unreachable!();
472            }
473        };
474        assert_eq!(STR, RcStr::from("hello"));
475    }
476}