turbo_rcstr/
lib.rs

1use std::{
2    borrow::{Borrow, Cow},
3    ffi::OsStr,
4    fmt::{Debug, Display},
5    hash::{Hash, Hasher},
6    mem::{ManuallyDrop, forget},
7    num::NonZeroU8,
8    ops::Deref,
9    path::{Path, PathBuf},
10};
11
12use bytes_str::BytesStr;
13use debug_unreachable::debug_unreachable;
14use serde::{Deserialize, Deserializer, Serialize, Serializer};
15use shrink_to_fit::ShrinkToFit;
16use triomphe::Arc;
17use turbo_tasks_hash::{DeterministicHash, DeterministicHasher};
18
19use crate::{
20    dynamic::{deref_from, hash_bytes, new_atom},
21    tagged_value::TaggedValue,
22};
23
24mod dynamic;
25mod tagged_value;
26
27/// An immutable reference counted [`String`], similar to [`Arc<String>`][std::sync::Arc].
28///
29/// This is the preferred immutable string type for [`turbo_tasks::function`][func] arguments and
30/// inside of [`turbo_tasks::value`][value].
31///
32/// As turbo-tasks must store copies of function arguments to enable caching, non-reference counted
33/// [`String`]s would incur frequent cloning. Reference counting typically decreases memory
34/// consumption and CPU time in these cases.
35///
36/// [func]: https://turbopack-rust-docs.vercel.sh/rustdoc/turbo_tasks/attr.function.html
37/// [value]: https://turbopack-rust-docs.vercel.sh/rustdoc/turbo_tasks/attr.value.html
38///
39/// ## Conversion
40///
41/// Converting a `String` or `&str` to an `RcStr` can be performed using `.into()`,
42/// `RcStr::from(...)`, or the `rcstr!` macro.
43///
44/// ```
45/// # use turbo_rcstr::RcStr;
46/// #
47/// let s = "foo";
48/// let rc_s1: RcStr = s.into();
49/// let rc_s2 = RcStr::from(s);
50/// let rc_s3 = rcstr!("foo");
51/// assert_eq!(rc_s1, rc_s2);
52/// ```
53///
54/// Generally speaking you should
55///  * use `rcstr!` when converting a `const`-compatible `str`
56///  * use `RcStr::from` for readability
57///  * use `.into()` when context makes it clear.
58///
59/// Converting from an [`RcStr`] to a `&str` should be done with [`RcStr::as_str`]. Converting to a
60/// `String` should be done with [`RcStr::into_owned`].
61///
62/// ## Future Optimizations
63///
64/// This type is intentionally opaque to allow for optimizations to the underlying representation.
65/// Future implementations may use inline representations or interning.
66//
67// If you want to change the underlying string type to `Arc<str>`, please ensure that you profile
68// performance. The current implementation offers very cheap `String -> RcStr -> String`, meaning we
69// only pay for the allocation for `Arc` when we pass `format!("").into()` to a function.
70pub struct RcStr {
71    unsafe_data: TaggedValue,
72}
73
74const _: () = {
75    // Enforce that RcStr triggers the non-zero size optimization.
76    assert!(std::mem::size_of::<RcStr>() == std::mem::size_of::<Option<RcStr>>());
77};
78
79unsafe impl Send for RcStr {}
80unsafe impl Sync for RcStr {}
81
82// Marks a payload that is stored in an Arc
83const DYNAMIC_TAG: u8 = 0b_00;
84const PREHASHED_STRING_LOCATION: u8 = 0b_0;
85// Marks a payload that has been leaked since it has a static lifetime
86const STATIC_TAG: u8 = 0b_10;
87// The payload is stored inline
88const INLINE_TAG: u8 = 0b_01; // len in upper nybble
89const INLINE_LOCATION: u8 = 0b_1;
90const INLINE_TAG_INIT: NonZeroU8 = NonZeroU8::new(INLINE_TAG).unwrap();
91const TAG_MASK: u8 = 0b_11;
92const LOCATION_MASK: u8 = 0b_1;
93// For inline tags the length is stored in the upper 4 bits of the tag byte
94const LEN_OFFSET: usize = 4;
95const LEN_MASK: u8 = 0xf0;
96
97impl RcStr {
98    #[inline(always)]
99    fn tag(&self) -> u8 {
100        self.unsafe_data.tag_byte() & TAG_MASK
101    }
102    #[inline(always)]
103    fn location(&self) -> u8 {
104        self.unsafe_data.tag_byte() & LOCATION_MASK
105    }
106
107    #[inline(never)]
108    pub fn as_str(&self) -> &str {
109        match self.location() {
110            PREHASHED_STRING_LOCATION => self.prehashed_string_as_str(),
111            INLINE_LOCATION => self.inline_as_str(),
112            _ => unsafe { debug_unreachable!() },
113        }
114    }
115
116    fn inline_as_str(&self) -> &str {
117        debug_assert!(self.location() == INLINE_LOCATION);
118        let len = (self.unsafe_data.tag_byte() & LEN_MASK) >> LEN_OFFSET;
119        let src = self.unsafe_data.data();
120        unsafe { std::str::from_utf8_unchecked(&src[..(len as usize)]) }
121    }
122
123    // Extract the str reference from a string stored in a PrehashedString
124    fn prehashed_string_as_str(&self) -> &str {
125        debug_assert!(self.location() == PREHASHED_STRING_LOCATION);
126        unsafe { dynamic::deref_from(self.unsafe_data).value.as_str() }
127    }
128
129    /// Returns an owned mutable [`String`].
130    ///
131    /// This implementation is more efficient than [`ToString::to_string`]:
132    ///
133    /// - If the reference count is 1, the `Arc` can be unwrapped, giving ownership of the
134    ///   underlying string without cloning in `O(1)` time.
135    /// - This avoids some of the potential overhead of the `Display` trait.
136    pub fn into_owned(self) -> String {
137        match self.tag() {
138            DYNAMIC_TAG => {
139                // convert `self` into `arc`
140                let arc = unsafe { dynamic::restore_arc(ManuallyDrop::new(self).unsafe_data) };
141                match Arc::try_unwrap(arc) {
142                    Ok(v) => v.value.into_string(),
143                    Err(arc) => arc.value.as_str().to_string(),
144                }
145            }
146            INLINE_TAG => self.inline_as_str().to_string(),
147            STATIC_TAG => self.prehashed_string_as_str().to_string(),
148            _ => unsafe { debug_unreachable!() },
149        }
150    }
151
152    pub fn map(self, f: impl FnOnce(String) -> String) -> Self {
153        RcStr::from(Cow::Owned(f(self.into_owned())))
154    }
155}
156
157impl DeterministicHash for RcStr {
158    fn deterministic_hash<H: DeterministicHasher>(&self, state: &mut H) {
159        state.write_usize(self.len());
160        state.write_bytes(self.as_bytes());
161    }
162}
163
164impl Deref for RcStr {
165    type Target = str;
166
167    fn deref(&self) -> &Self::Target {
168        self.as_str()
169    }
170}
171
172impl Borrow<str> for RcStr {
173    fn borrow(&self) -> &str {
174        self.as_str()
175    }
176}
177
178impl From<BytesStr> for RcStr {
179    fn from(s: BytesStr) -> Self {
180        let bytes: Vec<u8> = s.into_bytes().into();
181        RcStr::from(unsafe {
182            // Safety: BytesStr are valid utf-8
183            String::from_utf8_unchecked(bytes)
184        })
185    }
186}
187
188impl From<Arc<String>> for RcStr {
189    fn from(s: Arc<String>) -> Self {
190        match Arc::try_unwrap(s) {
191            Ok(v) => new_atom(Cow::Owned(v)),
192            Err(arc) => new_atom(Cow::Borrowed(&**arc)),
193        }
194    }
195}
196
197impl From<String> for RcStr {
198    fn from(s: String) -> Self {
199        new_atom(Cow::Owned(s))
200    }
201}
202
203impl From<&'_ str> for RcStr {
204    fn from(s: &str) -> Self {
205        new_atom(Cow::Borrowed(s))
206    }
207}
208
209impl From<Cow<'_, str>> for RcStr {
210    fn from(s: Cow<str>) -> Self {
211        new_atom(s)
212    }
213}
214
215/// Mimic `&str`
216impl AsRef<Path> for RcStr {
217    fn as_ref(&self) -> &Path {
218        self.as_str().as_ref()
219    }
220}
221
222/// Mimic `&str`
223impl AsRef<OsStr> for RcStr {
224    fn as_ref(&self) -> &OsStr {
225        self.as_str().as_ref()
226    }
227}
228
229/// Mimic `&str`
230impl AsRef<[u8]> for RcStr {
231    fn as_ref(&self) -> &[u8] {
232        self.as_str().as_ref()
233    }
234}
235
236impl From<RcStr> for BytesStr {
237    fn from(value: RcStr) -> Self {
238        Self::from_str_slice(value.as_str())
239    }
240}
241
242impl PartialEq<str> for RcStr {
243    fn eq(&self, other: &str) -> bool {
244        self.as_str() == other
245    }
246}
247
248impl PartialEq<&'_ str> for RcStr {
249    fn eq(&self, other: &&str) -> bool {
250        self.as_str() == *other
251    }
252}
253
254impl PartialEq<String> for RcStr {
255    fn eq(&self, other: &String) -> bool {
256        self.as_str() == other.as_str()
257    }
258}
259
260impl Debug for RcStr {
261    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
262        Debug::fmt(&self.as_str(), f)
263    }
264}
265
266impl Display for RcStr {
267    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
268        Display::fmt(&self.as_str(), f)
269    }
270}
271
272impl From<RcStr> for String {
273    fn from(s: RcStr) -> Self {
274        s.into_owned()
275    }
276}
277
278impl From<RcStr> for PathBuf {
279    fn from(s: RcStr) -> Self {
280        String::from(s).into()
281    }
282}
283
284impl Clone for RcStr {
285    #[inline(always)]
286    fn clone(&self) -> Self {
287        let alias = self.unsafe_data;
288        // We only need to increment the ref count for DYNAMIC_TAG values
289        // For STATIC_TAG and INLINE_TAG we can just copy the value.
290        if alias.tag_byte() & TAG_MASK == DYNAMIC_TAG {
291            unsafe {
292                let arc = dynamic::restore_arc(alias);
293                forget(arc.clone());
294                forget(arc);
295            }
296        }
297
298        RcStr { unsafe_data: alias }
299    }
300}
301
302impl Default for RcStr {
303    fn default() -> Self {
304        rcstr!("")
305    }
306}
307
308impl PartialEq for RcStr {
309    fn eq(&self, other: &Self) -> bool {
310        // For inline RcStrs this is sufficient and for out of line values it handles a simple
311        // identity cases
312        if self.unsafe_data == other.unsafe_data {
313            return true;
314        }
315        // They can still be equal if they are both stored on the heap
316        match (self.location(), other.location()) {
317            (PREHASHED_STRING_LOCATION, PREHASHED_STRING_LOCATION) => {
318                let l = unsafe { deref_from(self.unsafe_data) };
319                let r = unsafe { deref_from(other.unsafe_data) };
320                l.hash == r.hash && l.value == r.value
321            }
322            // NOTE: it is never possible for an inline storage string to compare equal to a dynamic
323            // allocated string, the construction routines separate the strings based on length.
324            _ => false,
325        }
326    }
327}
328
329impl Eq for RcStr {}
330
331impl PartialOrd for RcStr {
332    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
333        Some(self.cmp(other))
334    }
335}
336
337impl Ord for RcStr {
338    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
339        self.as_str().cmp(other.as_str())
340    }
341}
342
343impl Hash for RcStr {
344    fn hash<H: Hasher>(&self, state: &mut H) {
345        match self.location() {
346            PREHASHED_STRING_LOCATION => {
347                let l = unsafe { deref_from(self.unsafe_data) };
348                state.write_u64(l.hash);
349                state.write_u8(0xff); // matches the implementation of the `str` Hash impl
350            }
351            INLINE_LOCATION => {
352                self.inline_as_str().hash(state);
353            }
354            _ => unsafe { debug_unreachable!() },
355        }
356    }
357}
358
359impl Serialize for RcStr {
360    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
361        serializer.serialize_str(self.as_str())
362    }
363}
364
365impl<'de> Deserialize<'de> for RcStr {
366    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
367        let s = String::deserialize(deserializer)?;
368        Ok(RcStr::from(s))
369    }
370}
371
372impl Drop for RcStr {
373    fn drop(&mut self) {
374        match self.tag() {
375            DYNAMIC_TAG => unsafe { drop(dynamic::restore_arc(self.unsafe_data)) },
376            STATIC_TAG => {
377                // do nothing, these are never deallocated
378            }
379            INLINE_TAG => {
380                // do nothing, these payloads need no drop logic
381            }
382            _ => unsafe { debug_unreachable!() },
383        }
384    }
385}
386
387// Exports for our macro
388#[doc(hidden)]
389pub const fn inline_atom(s: &str) -> Option<RcStr> {
390    dynamic::inline_atom(s)
391}
392
393#[doc(hidden)]
394#[inline(always)]
395pub fn from_static(s: &'static PrehashedString) -> RcStr {
396    dynamic::new_static_atom(s)
397}
398#[doc(hidden)]
399pub use dynamic::PrehashedString;
400
401#[doc(hidden)]
402pub const fn make_const_prehashed_string(text: &'static str) -> PrehashedString {
403    PrehashedString {
404        value: dynamic::Payload::Ref(text),
405        hash: hash_bytes(text.as_bytes()),
406    }
407}
408
409/// Create an rcstr from a string literal.
410/// allocates the RcStr inline when possible otherwise uses a `LazyLock` to manage the allocation.
411#[macro_export]
412macro_rules! rcstr {
413    ($s:expr) => {{
414        const INLINE: core::option::Option<$crate::RcStr> = $crate::inline_atom($s);
415        // This condition can be compile time evaluated and inlined.
416        if INLINE.is_some() {
417            INLINE.unwrap()
418        } else {
419            fn get_rcstr() -> $crate::RcStr {
420                // Allocate static storage for the PrehashedString
421                static RCSTR_STORAGE: $crate::PrehashedString =
422                    $crate::make_const_prehashed_string($s);
423                // This basically just tags a bit onto the raw pointer and wraps it in an RcStr
424                // should be fast enough to do every time.
425                $crate::from_static(&RCSTR_STORAGE)
426            }
427            get_rcstr()
428        }
429    }};
430}
431
432/// noop
433impl ShrinkToFit for RcStr {
434    #[inline(always)]
435    fn shrink_to_fit(&mut self) {}
436}
437
438#[cfg(all(feature = "napi", target_family = "wasm"))]
439compile_error!("The napi feature cannot be enabled for wasm targets");
440
441#[cfg(all(feature = "napi", not(target_family = "wasm")))]
442mod napi_impl {
443    use napi::{
444        bindgen_prelude::{FromNapiValue, ToNapiValue, TypeName, ValidateNapiValue},
445        sys::{napi_env, napi_value},
446    };
447
448    use super::*;
449
450    impl TypeName for RcStr {
451        fn type_name() -> &'static str {
452            String::type_name()
453        }
454
455        fn value_type() -> napi::ValueType {
456            String::value_type()
457        }
458    }
459
460    impl ToNapiValue for RcStr {
461        unsafe fn to_napi_value(env: napi_env, val: Self) -> napi::Result<napi_value> {
462            unsafe { ToNapiValue::to_napi_value(env, val.as_str()) }
463        }
464    }
465
466    impl FromNapiValue for RcStr {
467        unsafe fn from_napi_value(env: napi_env, napi_val: napi_value) -> napi::Result<Self> {
468            Ok(RcStr::from(unsafe {
469                String::from_napi_value(env, napi_val)
470            }?))
471        }
472    }
473
474    impl ValidateNapiValue for RcStr {
475        unsafe fn validate(env: napi_env, napi_val: napi_value) -> napi::Result<napi_value> {
476            unsafe { String::validate(env, napi_val) }
477        }
478    }
479}
480
481#[cfg(test)]
482mod tests {
483    use std::mem::ManuallyDrop;
484
485    use super::*;
486
487    #[test]
488    fn test_refcount() {
489        fn refcount(str: &RcStr) -> usize {
490            assert!(str.tag() == DYNAMIC_TAG);
491            let arc = ManuallyDrop::new(unsafe { dynamic::restore_arc(str.unsafe_data) });
492            triomphe::Arc::count(&arc)
493        }
494
495        let str = RcStr::from("this is a long string that won't be inlined");
496
497        assert_eq!(refcount(&str), 1);
498        assert_eq!(refcount(&str), 1); // refcount should not modify the refcount itself
499
500        let cloned_str = str.clone();
501        assert_eq!(refcount(&str), 2);
502
503        drop(cloned_str);
504        assert_eq!(refcount(&str), 1);
505
506        let _ = str.clone().into_owned();
507        assert_eq!(refcount(&str), 1);
508    }
509
510    #[test]
511    fn test_rcstr() {
512        // Test enough to exceed the small string optimization
513        assert_eq!(rcstr!(""), RcStr::default());
514        assert_eq!(rcstr!(""), RcStr::from(""));
515        assert_eq!(rcstr!("a"), RcStr::from("a"));
516        assert_eq!(rcstr!("ab"), RcStr::from("ab"));
517        assert_eq!(rcstr!("abc"), RcStr::from("abc"));
518        assert_eq!(rcstr!("abcd"), RcStr::from("abcd"));
519        assert_eq!(rcstr!("abcde"), RcStr::from("abcde"));
520        assert_eq!(rcstr!("abcdef"), RcStr::from("abcdef"));
521        assert_eq!(rcstr!("abcdefg"), RcStr::from("abcdefg"));
522        assert_eq!(rcstr!("abcdefgh"), RcStr::from("abcdefgh"));
523        assert_eq!(rcstr!("abcdefghi"), RcStr::from("abcdefghi"));
524    }
525
526    #[test]
527    fn test_static_atom() {
528        const LONG: &str = "a very long string that lives forever";
529        let leaked = rcstr!(LONG);
530        let not_leaked = RcStr::from(LONG);
531        assert_ne!(leaked.tag(), not_leaked.tag());
532        assert_eq!(leaked, not_leaked);
533    }
534
535    #[test]
536    fn test_inline_atom() {
537        // This is a silly test, just asserts that we can evaluate this in a constant context.
538        const STR: RcStr = {
539            let inline = inline_atom("hello");
540            if inline.is_some() {
541                inline.unwrap()
542            } else {
543                unreachable!();
544            }
545        };
546        assert_eq!(STR, RcStr::from("hello"));
547    }
548}