Skip to main content

turbo_rcstr/
lib.rs

1use std::{
2    borrow::{Borrow, Cow},
3    ffi::OsStr,
4    fmt::{Debug, Display},
5    hash::{Hash, Hasher},
6    mem::{ManuallyDrop, forget},
7    num::NonZeroU8,
8    ops::Deref,
9    path::{Path, PathBuf},
10};
11
12use bincode::{
13    Decode, Encode,
14    de::Decoder,
15    enc::Encoder,
16    error::{DecodeError, EncodeError},
17    impl_borrow_decode,
18};
19use bytes_str::BytesStr;
20use debug_unreachable::debug_unreachable;
21use serde::{Deserialize, Deserializer, Serialize, Serializer};
22use shrink_to_fit::ShrinkToFit;
23use triomphe::Arc;
24use turbo_tasks_hash::{DeterministicHash, DeterministicHasher};
25
26use crate::{
27    dynamic::{deref_from, hash_bytes, new_atom},
28    tagged_value::TaggedValue,
29};
30
31mod dynamic;
32mod tagged_value;
33
34/// An immutable reference counted [`String`], similar to [`Arc<String>`][std::sync::Arc].
35///
36/// This is the preferred immutable string type for [`turbo_tasks::function`][func] arguments and
37/// inside of [`turbo_tasks::value`][value].
38///
39/// As turbo-tasks must store copies of function arguments to enable caching, non-reference counted
40/// [`String`]s would incur frequent cloning. Reference counting typically decreases memory
41/// consumption and CPU time in these cases.
42///
43/// [func]: https://turbopack-rust-docs.vercel.sh/rustdoc/turbo_tasks/attr.function.html
44/// [value]: https://turbopack-rust-docs.vercel.sh/rustdoc/turbo_tasks/attr.value.html
45///
46/// ## Conversion
47///
48/// Converting a `String` or `&str` to an `RcStr` can be performed using `.into()`,
49/// `RcStr::from(...)`, or the `rcstr!` macro.
50///
51/// ```
52/// # use turbo_rcstr::RcStr;
53/// #
54/// let s = "foo";
55/// let rc_s1: RcStr = s.into();
56/// let rc_s2 = RcStr::from(s);
57/// let rc_s3 = rcstr!("foo");
58/// assert_eq!(rc_s1, rc_s2);
59/// ```
60///
61/// Generally speaking you should
62///  * use `rcstr!` when converting a `const`-compatible `str`
63///  * use `RcStr::from` for readability
64///  * use `.into()` when context makes it clear.
65///
66/// Converting from an [`RcStr`] to a `&str` should be done with [`RcStr::as_str`]. Converting to a
67/// `String` should be done with [`RcStr::into_owned`].
68///
69/// ## Future Optimizations
70///
71/// This type is intentionally opaque to allow for optimizations to the underlying representation.
72/// Future implementations may use inline representations or interning.
73//
74// If you want to change the underlying string type to `Arc<str>`, please ensure that you profile
75// performance. The current implementation offers very cheap `String -> RcStr -> String`, meaning we
76// only pay for the allocation for `Arc` when we pass `format!("").into()` to a function.
77pub struct RcStr {
78    unsafe_data: TaggedValue,
79}
80
81const _: () = {
82    // Enforce that RcStr triggers the non-zero size optimization.
83    assert!(std::mem::size_of::<RcStr>() == std::mem::size_of::<Option<RcStr>>());
84};
85
86unsafe impl Send for RcStr {}
87unsafe impl Sync for RcStr {}
88
89// Marks a payload that is stored in an Arc
90const DYNAMIC_TAG: u8 = 0b_00;
91const PREHASHED_STRING_LOCATION: u8 = 0b_0;
92// Marks a payload that has been leaked since it has a static lifetime
93const STATIC_TAG: u8 = 0b_10;
94// The payload is stored inline
95const INLINE_TAG: u8 = 0b_01; // len in upper nybble
96const INLINE_LOCATION: u8 = 0b_1;
97const INLINE_TAG_INIT: NonZeroU8 = NonZeroU8::new(INLINE_TAG).unwrap();
98const TAG_MASK: u8 = 0b_11;
99const LOCATION_MASK: u8 = 0b_1;
100// For inline tags the length is stored in the upper 4 bits of the tag byte
101const LEN_OFFSET: usize = 4;
102const LEN_MASK: u8 = 0xf0;
103
104impl RcStr {
105    #[inline(always)]
106    fn tag(&self) -> u8 {
107        self.unsafe_data.tag_byte() & TAG_MASK
108    }
109    #[inline(always)]
110    fn location(&self) -> u8 {
111        self.unsafe_data.tag_byte() & LOCATION_MASK
112    }
113
114    #[inline(never)]
115    pub fn as_str(&self) -> &str {
116        match self.location() {
117            PREHASHED_STRING_LOCATION => self.prehashed_string_as_str(),
118            INLINE_LOCATION => self.inline_as_str(),
119            _ => unsafe { debug_unreachable!() },
120        }
121    }
122
123    fn inline_as_str(&self) -> &str {
124        debug_assert!(self.location() == INLINE_LOCATION);
125        let len = (self.unsafe_data.tag_byte() & LEN_MASK) >> LEN_OFFSET;
126        let src = self.unsafe_data.data();
127        unsafe { std::str::from_utf8_unchecked(&src[..(len as usize)]) }
128    }
129
130    // Extract the str reference from a string stored in a PrehashedString
131    fn prehashed_string_as_str(&self) -> &str {
132        debug_assert!(self.location() == PREHASHED_STRING_LOCATION);
133        unsafe { dynamic::deref_from(self.unsafe_data).value.as_str() }
134    }
135
136    /// Returns an owned mutable [`String`].
137    ///
138    /// This implementation is more efficient than [`ToString::to_string`]:
139    ///
140    /// - If the reference count is 1, the `Arc` can be unwrapped, giving ownership of the
141    ///   underlying string without cloning in `O(1)` time.
142    /// - This avoids some of the potential overhead of the `Display` trait.
143    pub fn into_owned(self) -> String {
144        match self.tag() {
145            DYNAMIC_TAG => {
146                // convert `self` into `arc`
147                let arc = unsafe { dynamic::restore_arc(ManuallyDrop::new(self).unsafe_data) };
148                match Arc::try_unwrap(arc) {
149                    Ok(v) => v.value.into_string(),
150                    Err(arc) => arc.value.as_str().to_string(),
151                }
152            }
153            INLINE_TAG => self.inline_as_str().to_string(),
154            STATIC_TAG => self.prehashed_string_as_str().to_string(),
155            _ => unsafe { debug_unreachable!() },
156        }
157    }
158
159    pub fn map(self, f: impl FnOnce(String) -> String) -> Self {
160        RcStr::from(Cow::Owned(f(self.into_owned())))
161    }
162}
163
164impl DeterministicHash for RcStr {
165    fn deterministic_hash<H: DeterministicHasher>(&self, state: &mut H) {
166        state.write_usize(self.len());
167        state.write_bytes(self.as_bytes());
168    }
169}
170
171impl Deref for RcStr {
172    type Target = str;
173
174    fn deref(&self) -> &Self::Target {
175        self.as_str()
176    }
177}
178
179impl Borrow<str> for RcStr {
180    fn borrow(&self) -> &str {
181        self.as_str()
182    }
183}
184
185impl AsRef<str> for RcStr {
186    fn as_ref(&self) -> &str {
187        self.as_str()
188    }
189}
190
191impl From<BytesStr> for RcStr {
192    fn from(s: BytesStr) -> Self {
193        let bytes: Vec<u8> = s.into_bytes().into();
194        RcStr::from(unsafe {
195            // Safety: BytesStr are valid utf-8
196            String::from_utf8_unchecked(bytes)
197        })
198    }
199}
200
201impl From<Arc<String>> for RcStr {
202    fn from(s: Arc<String>) -> Self {
203        match Arc::try_unwrap(s) {
204            Ok(v) => new_atom(Cow::Owned(v)),
205            Err(arc) => new_atom(Cow::Borrowed(&**arc)),
206        }
207    }
208}
209
210impl From<String> for RcStr {
211    fn from(s: String) -> Self {
212        new_atom(Cow::Owned(s))
213    }
214}
215
216impl From<&'_ str> for RcStr {
217    fn from(s: &str) -> Self {
218        new_atom(Cow::Borrowed(s))
219    }
220}
221
222impl From<Cow<'_, str>> for RcStr {
223    fn from(s: Cow<str>) -> Self {
224        new_atom(s)
225    }
226}
227
228/// Mimic `&str`
229impl AsRef<Path> for RcStr {
230    fn as_ref(&self) -> &Path {
231        self.as_str().as_ref()
232    }
233}
234
235/// Mimic `&str`
236impl AsRef<OsStr> for RcStr {
237    fn as_ref(&self) -> &OsStr {
238        self.as_str().as_ref()
239    }
240}
241
242/// Mimic `&str`
243impl AsRef<[u8]> for RcStr {
244    fn as_ref(&self) -> &[u8] {
245        self.as_str().as_ref()
246    }
247}
248
249impl From<RcStr> for BytesStr {
250    fn from(value: RcStr) -> Self {
251        Self::from_str_slice(value.as_str())
252    }
253}
254
255impl PartialEq<str> for RcStr {
256    fn eq(&self, other: &str) -> bool {
257        self.as_str() == other
258    }
259}
260
261impl PartialEq<&'_ str> for RcStr {
262    fn eq(&self, other: &&str) -> bool {
263        self.as_str() == *other
264    }
265}
266
267impl PartialEq<String> for RcStr {
268    fn eq(&self, other: &String) -> bool {
269        self.as_str() == other.as_str()
270    }
271}
272
273impl Debug for RcStr {
274    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
275        Debug::fmt(&self.as_str(), f)
276    }
277}
278
279impl Display for RcStr {
280    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
281        Display::fmt(&self.as_str(), f)
282    }
283}
284
285impl From<RcStr> for String {
286    fn from(s: RcStr) -> Self {
287        s.into_owned()
288    }
289}
290
291impl From<RcStr> for PathBuf {
292    fn from(s: RcStr) -> Self {
293        String::from(s).into()
294    }
295}
296
297impl Clone for RcStr {
298    #[inline(always)]
299    fn clone(&self) -> Self {
300        let alias = self.unsafe_data;
301        // We only need to increment the ref count for DYNAMIC_TAG values
302        // For STATIC_TAG and INLINE_TAG we can just copy the value.
303        if alias.tag_byte() & TAG_MASK == DYNAMIC_TAG {
304            unsafe {
305                let arc = dynamic::restore_arc(alias);
306                forget(arc.clone());
307                forget(arc);
308            }
309        }
310
311        RcStr { unsafe_data: alias }
312    }
313}
314
315impl Default for RcStr {
316    fn default() -> Self {
317        rcstr!("")
318    }
319}
320
321impl PartialEq for RcStr {
322    fn eq(&self, other: &Self) -> bool {
323        // For inline RcStrs this is sufficient and for out of line values it handles a simple
324        // identity cases
325        if self.unsafe_data == other.unsafe_data {
326            return true;
327        }
328        // They can still be equal if they are both stored on the heap
329        match (self.location(), other.location()) {
330            (PREHASHED_STRING_LOCATION, PREHASHED_STRING_LOCATION) => {
331                let l = unsafe { deref_from(self.unsafe_data) };
332                let r = unsafe { deref_from(other.unsafe_data) };
333                l.hash == r.hash && l.value == r.value
334            }
335            // NOTE: it is never possible for an inline storage string to compare equal to a dynamic
336            // allocated string, the construction routines separate the strings based on length.
337            _ => false,
338        }
339    }
340}
341
342impl Eq for RcStr {}
343
344impl PartialOrd for RcStr {
345    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
346        Some(self.cmp(other))
347    }
348}
349
350impl Ord for RcStr {
351    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
352        self.as_str().cmp(other.as_str())
353    }
354}
355
356impl Hash for RcStr {
357    fn hash<H: Hasher>(&self, state: &mut H) {
358        match self.location() {
359            PREHASHED_STRING_LOCATION => {
360                let l = unsafe { deref_from(self.unsafe_data) };
361                state.write_u64(l.hash);
362                state.write_u8(0xff); // matches the implementation of the `str` Hash impl
363            }
364            INLINE_LOCATION => {
365                self.inline_as_str().hash(state);
366            }
367            _ => unsafe { debug_unreachable!() },
368        }
369    }
370}
371
372impl Serialize for RcStr {
373    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
374        serializer.serialize_str(self.as_str())
375    }
376}
377
378impl<'de> Deserialize<'de> for RcStr {
379    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
380        let s = String::deserialize(deserializer)?;
381        Ok(RcStr::from(s))
382    }
383}
384
385impl Encode for RcStr {
386    fn encode<E: Encoder>(&self, encoder: &mut E) -> Result<(), EncodeError> {
387        self.as_str().encode(encoder)
388    }
389}
390
391impl<Context> Decode<Context> for RcStr {
392    fn decode<D: Decoder<Context = Context>>(decoder: &mut D) -> Result<Self, DecodeError> {
393        Ok(RcStr::from(String::decode(decoder)?))
394    }
395}
396
397impl_borrow_decode!(RcStr);
398
399impl Drop for RcStr {
400    fn drop(&mut self) {
401        match self.tag() {
402            DYNAMIC_TAG => unsafe { drop(dynamic::restore_arc(self.unsafe_data)) },
403            STATIC_TAG => {
404                // do nothing, these are never deallocated
405            }
406            INLINE_TAG => {
407                // do nothing, these payloads need no drop logic
408            }
409            _ => unsafe { debug_unreachable!() },
410        }
411    }
412}
413
414// Exports for our macro
415#[doc(hidden)]
416pub const fn inline_atom(s: &str) -> Option<RcStr> {
417    dynamic::inline_atom(s)
418}
419
420#[doc(hidden)]
421#[inline(always)]
422pub fn from_static(s: &'static PrehashedString) -> RcStr {
423    dynamic::new_static_atom(s)
424}
425#[doc(hidden)]
426pub use dynamic::PrehashedString;
427
428#[doc(hidden)]
429pub const fn make_const_prehashed_string(text: &'static str) -> PrehashedString {
430    PrehashedString {
431        value: dynamic::Payload::Ref(text),
432        hash: hash_bytes(text.as_bytes()),
433    }
434}
435
436/// Create an rcstr from a string literal.
437/// allocates the RcStr inline when possible otherwise uses a `LazyLock` to manage the allocation.
438#[macro_export]
439macro_rules! rcstr {
440    ($s:expr) => {{
441        const INLINE: core::option::Option<$crate::RcStr> = $crate::inline_atom($s);
442        // This condition can be compile time evaluated and inlined.
443        if INLINE.is_some() {
444            INLINE.unwrap()
445        } else {
446            fn get_rcstr() -> $crate::RcStr {
447                // Allocate static storage for the PrehashedString
448                static RCSTR_STORAGE: $crate::PrehashedString =
449                    $crate::make_const_prehashed_string($s);
450                // This basically just tags a bit onto the raw pointer and wraps it in an RcStr
451                // should be fast enough to do every time.
452                $crate::from_static(&RCSTR_STORAGE)
453            }
454            get_rcstr()
455        }
456    }};
457}
458
459/// noop
460impl ShrinkToFit for RcStr {
461    #[inline(always)]
462    fn shrink_to_fit(&mut self) {}
463}
464
465#[cfg(all(feature = "napi", target_family = "wasm"))]
466compile_error!("The napi feature cannot be enabled for wasm targets");
467
468#[cfg(all(feature = "napi", not(target_family = "wasm")))]
469mod napi_impl {
470    use napi::{
471        bindgen_prelude::{FromNapiValue, ToNapiValue, TypeName, ValidateNapiValue},
472        sys::{napi_env, napi_value},
473    };
474
475    use super::*;
476
477    impl TypeName for RcStr {
478        fn type_name() -> &'static str {
479            String::type_name()
480        }
481
482        fn value_type() -> napi::ValueType {
483            String::value_type()
484        }
485    }
486
487    impl ToNapiValue for RcStr {
488        unsafe fn to_napi_value(env: napi_env, val: Self) -> napi::Result<napi_value> {
489            unsafe { ToNapiValue::to_napi_value(env, val.as_str()) }
490        }
491    }
492
493    impl FromNapiValue for RcStr {
494        unsafe fn from_napi_value(env: napi_env, napi_val: napi_value) -> napi::Result<Self> {
495            Ok(RcStr::from(unsafe {
496                String::from_napi_value(env, napi_val)
497            }?))
498        }
499    }
500
501    impl ValidateNapiValue for RcStr {
502        unsafe fn validate(env: napi_env, napi_val: napi_value) -> napi::Result<napi_value> {
503            unsafe { String::validate(env, napi_val) }
504        }
505    }
506}
507
508#[cfg(test)]
509mod tests {
510    use std::mem::ManuallyDrop;
511
512    use super::*;
513
514    #[test]
515    fn test_refcount() {
516        fn refcount(str: &RcStr) -> usize {
517            assert!(str.tag() == DYNAMIC_TAG);
518            let arc = ManuallyDrop::new(unsafe { dynamic::restore_arc(str.unsafe_data) });
519            triomphe::Arc::count(&arc)
520        }
521
522        let str = RcStr::from("this is a long string that won't be inlined");
523
524        assert_eq!(refcount(&str), 1);
525        assert_eq!(refcount(&str), 1); // refcount should not modify the refcount itself
526
527        let cloned_str = str.clone();
528        assert_eq!(refcount(&str), 2);
529
530        drop(cloned_str);
531        assert_eq!(refcount(&str), 1);
532
533        let _ = str.clone().into_owned();
534        assert_eq!(refcount(&str), 1);
535    }
536
537    #[test]
538    fn test_rcstr() {
539        // Test enough to exceed the small string optimization
540        assert_eq!(rcstr!(""), RcStr::default());
541        assert_eq!(rcstr!(""), RcStr::from(""));
542        assert_eq!(rcstr!("a"), RcStr::from("a"));
543        assert_eq!(rcstr!("ab"), RcStr::from("ab"));
544        assert_eq!(rcstr!("abc"), RcStr::from("abc"));
545        assert_eq!(rcstr!("abcd"), RcStr::from("abcd"));
546        assert_eq!(rcstr!("abcde"), RcStr::from("abcde"));
547        assert_eq!(rcstr!("abcdef"), RcStr::from("abcdef"));
548        assert_eq!(rcstr!("abcdefg"), RcStr::from("abcdefg"));
549        assert_eq!(rcstr!("abcdefgh"), RcStr::from("abcdefgh"));
550        assert_eq!(rcstr!("abcdefghi"), RcStr::from("abcdefghi"));
551    }
552
553    #[test]
554    fn test_static_atom() {
555        const LONG: &str = "a very long string that lives forever";
556        let leaked = rcstr!(LONG);
557        let not_leaked = RcStr::from(LONG);
558        assert_ne!(leaked.tag(), not_leaked.tag());
559        assert_eq!(leaked, not_leaked);
560    }
561
562    #[test]
563    fn test_inline_atom() {
564        // This is a silly test, just asserts that we can evaluate this in a constant context.
565        const STR: RcStr = {
566            let inline = inline_atom("hello");
567            if inline.is_some() {
568                inline.unwrap()
569            } else {
570                unreachable!();
571            }
572        };
573        assert_eq!(STR, RcStr::from("hello"));
574    }
575
576    #[test]
577    fn test_hash_matches_str() {
578        use std::hash::{Hash, Hasher};
579
580        use rustc_hash::FxHasher;
581
582        fn fxhash<T: Hash>(value: T) -> u64 {
583            let mut hasher = FxHasher::default();
584            value.hash(&mut hasher);
585            hasher.finish()
586        }
587
588        // Test various string lengths covering inline and prehashed storage
589        let test_strings = [
590            "",
591            "a",
592            "ab",
593            "abc",
594            "abcdef",  // max inline (6 chars)
595            "abcdefg", // just beyond inline (7 chars)
596            "abcdefgh",
597            "a very long string that exceeds sixteen bytes",
598        ];
599
600        // Test RcStr vs &str
601        for s in test_strings {
602            let rcstr = RcStr::from(s);
603            assert_eq!(
604                fxhash(&rcstr),
605                fxhash(s),
606                "Hash mismatch for string of length {}: {:?}",
607                s.len(),
608                s
609            );
610        }
611
612        // Test (RcStr, RcStr) vs (&str, &str)
613        for s1 in test_strings {
614            for s2 in test_strings {
615                let rcstr1 = RcStr::from(s1);
616                let rcstr2 = RcStr::from(s2);
617                assert_eq!(
618                    fxhash((&rcstr1, &rcstr2)),
619                    fxhash((s1, s2)),
620                    "Tuple hash mismatch for ({:?}, {:?})",
621                    s1,
622                    s2
623                );
624            }
625        }
626    }
627}