Skip to main content

turbo_rcstr/
lib.rs

1// Allow the `rcstr!` proc macro's emitted `::turbo_rcstr::...` paths to
2// resolve when used inside this crate's own source (e.g. tests, doctests).
3extern crate self as turbo_rcstr;
4
5use std::{
6    borrow::{Borrow, Cow},
7    collections::HashMap,
8    ffi::OsStr,
9    fmt::{Debug, Display},
10    hash::{Hash, Hasher},
11    mem::{ManuallyDrop, forget},
12    num::NonZeroU8,
13    ops::Deref,
14    path::{Path, PathBuf},
15    sync::LazyLock,
16};
17
18use bincode::{
19    Decode, Encode,
20    de::{Decoder, read::Reader},
21    enc::Encoder,
22    error::{DecodeError, EncodeError},
23    impl_borrow_decode,
24};
25use bytes_str::BytesStr;
26use debug_unreachable::debug_unreachable;
27use rustc_hash::FxBuildHasher;
28use serde::{Deserialize, Deserializer, Serialize, Serializer};
29use shrink_to_fit::ShrinkToFit;
30use smallvec::SmallVec;
31use triomphe::Arc;
32use turbo_tasks_hash::{DeterministicHash, DeterministicHasher};
33
34use crate::{
35    dynamic::{
36        DynamicPrehashedString, deref_dynamic, deref_static, hash_bytes, new_atom,
37        new_atom_from_prehashed, new_static_atom,
38    },
39    tagged_value::{MAX_INLINE_LEN, TaggedValue},
40};
41
42mod dynamic;
43mod tagged_value;
44
45/// An immutable reference counted [`String`], similar to [`Arc<String>`][std::sync::Arc].
46///
47/// This is the preferred immutable string type for [`turbo_tasks::function`][func] arguments and
48/// inside of [`turbo_tasks::value`][value].
49///
50/// As turbo-tasks must store copies of function arguments to enable caching, non-reference counted
51/// [`String`]s would incur frequent cloning. Reference counting typically decreases memory
52/// consumption and CPU time in these cases.
53///
54/// [func]: https://turbopack-rust-docs.vercel.sh/rustdoc/turbo_tasks/attr.function.html
55/// [value]: https://turbopack-rust-docs.vercel.sh/rustdoc/turbo_tasks/attr.value.html
56///
57/// ## Conversion
58///
59/// Converting a `String` or `&str` to an `RcStr` can be performed using `.into()`,
60/// `RcStr::from(...)`, or the `rcstr!` macro.
61///
62/// ```
63/// # use turbo_rcstr::{RcStr, rcstr};
64/// #
65/// let s = "foo";
66/// let rc_s1: RcStr = s.into();
67/// let rc_s2 = RcStr::from(s);
68/// let rc_s3 = rcstr!("foo");
69/// assert_eq!(rc_s1, rc_s2);
70/// ```
71///
72/// Generally speaking you should
73///  * use `rcstr!` when converting a `const`-compatible `str`
74///  * use `RcStr::from` for readability
75///  * use `.into()` when context makes it clear.
76///
77/// Converting from an [`RcStr`] to a `&str` should be done with [`RcStr::as_str`]. Converting to a
78/// `String` should be done with [`RcStr::into_owned`].
79///
80/// ## Future Optimizations
81///
82/// This type is intentionally opaque to allow for optimizations to the underlying representation.
83/// Future implementations may use inline representations or interning.
84//
85// If you want to change the underlying string type to `Arc<str>`, please ensure that you profile
86// performance. The current implementation offers very cheap `String -> RcStr -> String`, meaning we
87// only pay for the allocation for `Arc` when we pass `format!("").into()` to a function.
88pub struct RcStr {
89    unsafe_data: TaggedValue,
90}
91
92const _: () = {
93    // Enforce that RcStr triggers the non-zero size optimization.
94    assert!(std::mem::size_of::<RcStr>() == std::mem::size_of::<Option<RcStr>>());
95};
96
97unsafe impl Send for RcStr {}
98unsafe impl Sync for RcStr {}
99
100// Marks a payload that is stored in an Arc
101const DYNAMIC_TAG: u8 = 0b_10;
102// Marks a payload that has been leaked since it has a static lifetime
103const STATIC_TAG: u8 = 0b_00;
104// The payload is stored inline
105const INLINE_TAG: u8 = 0b_01; // len in upper nybble
106const INLINE_TAG_INIT: NonZeroU8 = NonZeroU8::new(INLINE_TAG).unwrap();
107const TAG_MASK: u8 = 0b_11;
108// For inline tags the length is stored in the upper 4 bits of the tag byte
109const LEN_OFFSET: usize = 4;
110const LEN_MASK: u8 = 0xf0;
111
112impl RcStr {
113    #[inline(always)]
114    fn tag(&self) -> u8 {
115        self.unsafe_data.tag_byte() & TAG_MASK
116    }
117
118    #[inline(never)]
119    pub fn as_str(&self) -> &str {
120        match self.tag() {
121            STATIC_TAG => unsafe { deref_static(self.unsafe_data).value },
122            DYNAMIC_TAG => unsafe { &deref_dynamic(self.unsafe_data).value },
123            INLINE_TAG => self.inline_as_str(),
124            _ => unsafe { debug_unreachable!() },
125        }
126    }
127
128    fn inline_as_str(&self) -> &str {
129        debug_assert!(self.tag() == INLINE_TAG);
130        let len = (self.unsafe_data.tag_byte() & LEN_MASK) >> LEN_OFFSET;
131        let src = self.unsafe_data.data();
132        unsafe { std::str::from_utf8_unchecked(&src[..(len as usize)]) }
133    }
134
135    /// Returns an owned mutable [`String`].
136    ///
137    /// This implementation is more efficient than [`ToString::to_string`]:
138    ///
139    /// - If the reference count is 1, the `Arc` can be unwrapped, giving ownership of the
140    ///   underlying string without cloning in `O(1)` time.
141    /// - This avoids some of the potential overhead of the `Display` trait.
142    pub fn into_owned(self) -> String {
143        match self.tag() {
144            DYNAMIC_TAG => {
145                // convert `self` into `arc`
146                let arc = unsafe { dynamic::restore_arc(ManuallyDrop::new(self).unsafe_data) };
147                match Arc::try_unwrap(arc) {
148                    // `String::from(Box<str>)` reuses the boxed allocation, so this is O(1).
149                    Ok(v) => String::from(v.value),
150                    Err(arc) => arc.value.to_string(),
151                }
152            }
153            INLINE_TAG => self.inline_as_str().to_string(),
154            STATIC_TAG => unsafe { deref_static(self.unsafe_data).value.to_string() },
155            _ => unsafe { debug_unreachable!() },
156        }
157    }
158
159    pub fn map(self, f: impl FnOnce(String) -> String) -> Self {
160        RcStr::from(Cow::Owned(f(self.into_owned())))
161    }
162
163    /// Create an RcStr from a deserialized string, checking the static constant
164    /// table first. If the string matches an `rcstr!` constant, returns a
165    /// zero-cost static copy instead of allocating a new Arc.
166    ///
167    /// Accepts `&str` so that borrow-decode paths can avoid heap allocation
168    /// entirely for inline strings (≤7 bytes) and static table hits.
169    fn from_deserialized(s: &str) -> Self {
170        if !is_atom_inlineable(s) {
171            let hash = hash_bytes(s.as_bytes());
172            // Check the static table
173            if let Some(entries) = STATIC_TABLE.get(&hash)
174                && let Some(static_phs) = entries.iter().find(|phs| phs.value == s)
175            {
176                new_static_atom(static_phs)
177            } else {
178                new_atom_from_prehashed(DynamicPrehashedString {
179                    hash,
180                    value: s.into(),
181                })
182            }
183        } else {
184            inline_atom(s).unwrap()
185        }
186    }
187}
188
189impl DeterministicHash for RcStr {
190    fn deterministic_hash<H: DeterministicHasher>(&self, state: &mut H) {
191        state.write_usize(self.len());
192        state.write_bytes(self.as_bytes());
193    }
194}
195
196impl Deref for RcStr {
197    type Target = str;
198
199    fn deref(&self) -> &Self::Target {
200        self.as_str()
201    }
202}
203
204impl Borrow<str> for RcStr {
205    fn borrow(&self) -> &str {
206        self.as_str()
207    }
208}
209
210impl AsRef<str> for RcStr {
211    fn as_ref(&self) -> &str {
212        self.as_str()
213    }
214}
215
216impl From<BytesStr> for RcStr {
217    fn from(s: BytesStr) -> Self {
218        let bytes: Vec<u8> = s.into_bytes().into();
219        RcStr::from(unsafe {
220            // Safety: BytesStr are valid utf-8
221            String::from_utf8_unchecked(bytes)
222        })
223    }
224}
225
226impl From<Arc<String>> for RcStr {
227    fn from(s: Arc<String>) -> Self {
228        match Arc::try_unwrap(s) {
229            Ok(v) => new_atom(Cow::Owned(v)),
230            Err(arc) => new_atom(Cow::Borrowed(&**arc)),
231        }
232    }
233}
234
235impl From<String> for RcStr {
236    fn from(s: String) -> Self {
237        new_atom(Cow::Owned(s))
238    }
239}
240
241impl From<&'_ str> for RcStr {
242    fn from(s: &str) -> Self {
243        new_atom(Cow::Borrowed(s))
244    }
245}
246
247impl From<Cow<'_, str>> for RcStr {
248    fn from(s: Cow<str>) -> Self {
249        new_atom(s)
250    }
251}
252
253/// Mimic `&str`
254impl AsRef<Path> for RcStr {
255    fn as_ref(&self) -> &Path {
256        self.as_str().as_ref()
257    }
258}
259
260/// Mimic `&str`
261impl AsRef<OsStr> for RcStr {
262    fn as_ref(&self) -> &OsStr {
263        self.as_str().as_ref()
264    }
265}
266
267/// Mimic `&str`
268impl AsRef<[u8]> for RcStr {
269    fn as_ref(&self) -> &[u8] {
270        self.as_str().as_ref()
271    }
272}
273
274impl From<RcStr> for BytesStr {
275    fn from(value: RcStr) -> Self {
276        Self::from_str_slice(value.as_str())
277    }
278}
279
280impl PartialEq<str> for RcStr {
281    fn eq(&self, other: &str) -> bool {
282        self.as_str() == other
283    }
284}
285
286impl PartialEq<&'_ str> for RcStr {
287    fn eq(&self, other: &&str) -> bool {
288        self.as_str() == *other
289    }
290}
291
292impl PartialEq<String> for RcStr {
293    fn eq(&self, other: &String) -> bool {
294        self.as_str() == other.as_str()
295    }
296}
297
298impl Debug for RcStr {
299    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
300        Debug::fmt(&self.as_str(), f)
301    }
302}
303
304impl Display for RcStr {
305    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
306        Display::fmt(&self.as_str(), f)
307    }
308}
309
310impl From<RcStr> for String {
311    fn from(s: RcStr) -> Self {
312        s.into_owned()
313    }
314}
315
316impl From<RcStr> for PathBuf {
317    fn from(s: RcStr) -> Self {
318        String::from(s).into()
319    }
320}
321
322impl Clone for RcStr {
323    #[inline(always)]
324    fn clone(&self) -> Self {
325        // We only need to increment the ref count for DYNAMIC_TAG values.
326        // For STATIC_TAG and INLINE_TAG we can just copy the value.
327        if self.tag() == DYNAMIC_TAG {
328            unsafe {
329                let arc = dynamic::restore_arc(self.unsafe_data);
330                forget(arc.clone());
331                forget(arc);
332            }
333        }
334
335        RcStr {
336            unsafe_data: self.unsafe_data,
337        }
338    }
339}
340
341impl Default for RcStr {
342    fn default() -> Self {
343        rcstr!("")
344    }
345}
346
347impl PartialEq for RcStr {
348    fn eq(&self, other: &Self) -> bool {
349        // For inline RcStrs this is sufficient and for out of line values it handles a simple
350        // identity cases
351        if self.unsafe_data == other.unsafe_data {
352            return true;
353        }
354        // If either side is inline, they can't be equal: an inline string is always shorter than
355        // any heap-allocated one (construction splits on length), and two inline strings would
356        // have been caught by the `unsafe_data == unsafe_data` check above.
357        if self.tag() == INLINE_TAG || other.tag() == INLINE_TAG {
358            return false;
359        }
360
361        // slow path compare precomputed hashes and string refs
362        let (l_hash, l_str) = unsafe { heap_hash_and_str(self) };
363        let (r_hash, r_str) = unsafe { heap_hash_and_str(other) };
364        l_hash == r_hash && l_str == r_str
365    }
366}
367
368/// Caller must ensure `s.tag()` is `STATIC_TAG` or `DYNAMIC_TAG`.
369#[inline]
370unsafe fn heap_hash_and_str(s: &RcStr) -> (u64, &str) {
371    match s.tag() {
372        STATIC_TAG => {
373            let p = unsafe { deref_static(s.unsafe_data) };
374            (p.hash, p.value)
375        }
376        DYNAMIC_TAG => {
377            let p = unsafe { deref_dynamic(s.unsafe_data) };
378            (p.hash, &p.value)
379        }
380        _ => unsafe { debug_unreachable!() },
381    }
382}
383
384impl Eq for RcStr {}
385
386impl PartialOrd for RcStr {
387    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
388        Some(self.cmp(other))
389    }
390}
391
392impl Ord for RcStr {
393    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
394        self.as_str().cmp(other.as_str())
395    }
396}
397
398impl Hash for RcStr {
399    fn hash<H: Hasher>(&self, state: &mut H) {
400        match self.tag() {
401            STATIC_TAG => {
402                state.write_u64(unsafe { deref_static(self.unsafe_data).hash });
403                state.write_u8(0xff); // matches the implementation of the `str` Hash impl
404            }
405            DYNAMIC_TAG => {
406                state.write_u64(unsafe { deref_dynamic(self.unsafe_data).hash });
407                state.write_u8(0xff); // matches the implementation of the `str` Hash impl
408            }
409            INLINE_TAG => {
410                self.inline_as_str().hash(state);
411            }
412            _ => unsafe { debug_unreachable!() },
413        }
414    }
415}
416
417impl Serialize for RcStr {
418    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
419        serializer.serialize_str(self.as_str())
420    }
421}
422
423impl<'de> Deserialize<'de> for RcStr {
424    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
425        struct RcStrVisitor;
426
427        impl serde::de::Visitor<'_> for RcStrVisitor {
428            type Value = RcStr;
429
430            fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
431                f.write_str("a string")
432            }
433
434            fn visit_str<E: serde::de::Error>(self, v: &str) -> Result<RcStr, E> {
435                Ok(RcStr::from_deserialized(v))
436            }
437
438            fn visit_string<E: serde::de::Error>(self, v: String) -> Result<RcStr, E> {
439                Ok(RcStr::from_deserialized(&v))
440            }
441        }
442
443        deserializer.deserialize_str(RcStrVisitor)
444    }
445}
446
447impl Encode for RcStr {
448    fn encode<E: Encoder>(&self, encoder: &mut E) -> Result<(), EncodeError> {
449        self.as_str().encode(encoder)
450    }
451}
452
453impl<Context> Decode<Context> for RcStr {
454    fn decode<D: Decoder<Context = Context>>(decoder: &mut D) -> Result<Self, DecodeError> {
455        // Decode the length prefix
456        let len = u64::decode(decoder)?;
457        let len: usize = len
458            .try_into()
459            .map_err(|_| DecodeError::OutsideUsizeRange(len))?;
460
461        if unty::type_equal::<D::R, turbo_bincode::TurboBincodeReader>() {
462            // We know the reader is a TurboBincodeReader backed by &[u8], so peek_read
463            // returning None means data corruption (not enough bytes), not "unsupported".
464            let bytes = decoder
465                .reader()
466                .peek_read(len)
467                .ok_or(DecodeError::UnexpectedEnd { additional: len })?;
468            let s = core::str::from_utf8(bytes).map_err(|inner| DecodeError::Utf8 { inner })?;
469            let rcstr = RcStr::from_deserialized(s);
470            decoder.reader().consume(len);
471            Ok(rcstr)
472        } else {
473            unreachable!(
474                "RcStr::decode expected TurboBincodeReader, but was called with a {} reader",
475                std::any::type_name::<D::R>(),
476            )
477        }
478    }
479}
480
481impl_borrow_decode!(RcStr);
482
483impl Drop for RcStr {
484    fn drop(&mut self) {
485        match self.tag() {
486            DYNAMIC_TAG => unsafe { drop(dynamic::restore_arc(self.unsafe_data)) },
487            INLINE_TAG | STATIC_TAG => {
488                // no-ops
489            }
490            _ => unsafe { debug_unreachable!() },
491        }
492    }
493}
494
495// Exports for our macro
496#[doc(hidden)]
497pub const fn inline_atom(s: &str) -> Option<RcStr> {
498    dynamic::inline_atom(s)
499}
500
501// Exports for our macro
502#[doc(hidden)]
503pub const fn is_atom_inlineable(s: &str) -> bool {
504    s.len() <= MAX_INLINE_LEN
505}
506
507#[doc(hidden)]
508#[inline(always)]
509pub const fn from_static(s: &'static StaticPrehashedString) -> RcStr {
510    dynamic::new_static_atom(s)
511}
512#[doc(hidden)]
513pub use dynamic::StaticPrehashedString;
514
515#[doc(hidden)]
516pub const fn make_const_prehashed_string(text: &'static str) -> StaticPrehashedString {
517    StaticPrehashedString {
518        value: text,
519        hash: hash_bytes(text.as_bytes()),
520    }
521}
522
523// Re-export inventory so the rcstr! macro can reference it via $crate::inventory
524#[doc(hidden)]
525pub use inventory;
526
527/// Wrapper for collecting `rcstr!` static constants via `inventory`.
528#[doc(hidden)]
529pub struct StaticRcStr(pub &'static StaticPrehashedString);
530
531inventory::collect!(StaticRcStr);
532
533/// Forwarder around [`inventory::submit!`] that lets the `rcstr!` proc macro
534/// emit a single path it can rely on, without depending on whether
535/// `turbo_rcstr::inventory` is reachable as a macro path in the call site
536/// crate. Macros emitted from a proc macro lose access to the proc macro
537/// crate's deps, so the submission has to bounce through this declarative
538/// macro defined where `inventory::submit!` is in scope.
539#[doc(hidden)]
540#[macro_export]
541macro_rules! __rcstr_inventory_submit {
542    ($value:expr) => {
543        $crate::inventory::submit!($value);
544    };
545}
546
547/// Read-only lookup table mapping precomputed hash -> static StaticPrehashedString.
548/// Built once on first access from all `rcstr!` constants collected by `inventory`.
549///
550/// Multiple `rcstr!` calls with the same string content will each submit to
551/// inventory, but we deduplicate by content here so only one entry per unique
552/// string is stored.
553static STATIC_TABLE: LazyLock<
554    HashMap<u64, SmallVec<[&'static StaticPrehashedString; 1]>, FxBuildHasher>,
555> = LazyLock::new(|| {
556    let mut map: HashMap<u64, SmallVec<[&'static StaticPrehashedString; 1]>, FxBuildHasher> =
557        HashMap::with_hasher(FxBuildHasher);
558    for StaticRcStr(phs) in inventory::iter::<StaticRcStr> {
559        if phs.value.len() <= MAX_INLINE_LEN {
560            // This is rare, but possible if our macro cannot determine the length of the string at
561            // macro time we may end up with a wasted StaticPrehashedString submitted to inventory.
562
563            // Just skip it
564            continue;
565        }
566        let entries = map.entry(phs.hash).or_default();
567        // Deduplicate: skip if an entry with the same string content exists
568        // Mostly linkers will merge static strings but this isn't guaranteed so we cannot just rely
569        // on pointer equality.
570        if !entries.iter().any(|e| e.value == phs.value) {
571            entries.push(phs);
572        }
573    }
574    map.shrink_to_fit(); // this map will never change again
575    map
576});
577
578/// Create an rcstr from a string literal.
579/// Allocates the RcStr inline when possible, otherwise uses a static `PrehashedString`.  In
580/// either case this is a compile time constant
581pub use turbo_rcstr_macros::rcstr;
582
583/// noop
584impl ShrinkToFit for RcStr {
585    #[inline(always)]
586    fn shrink_to_fit(&mut self) {}
587}
588
589#[cfg(all(feature = "napi", target_family = "wasm"))]
590compile_error!("The napi feature cannot be enabled for wasm targets");
591
592#[cfg(all(feature = "napi", not(target_family = "wasm")))]
593mod napi_impl {
594    use napi::{
595        bindgen_prelude::{FromNapiValue, ToNapiValue, TypeName, ValidateNapiValue},
596        sys::{napi_env, napi_value},
597    };
598
599    use super::*;
600
601    impl TypeName for RcStr {
602        fn type_name() -> &'static str {
603            String::type_name()
604        }
605
606        fn value_type() -> napi::ValueType {
607            String::value_type()
608        }
609    }
610
611    impl ToNapiValue for RcStr {
612        unsafe fn to_napi_value(env: napi_env, val: Self) -> napi::Result<napi_value> {
613            unsafe { ToNapiValue::to_napi_value(env, val.as_str()) }
614        }
615    }
616
617    impl FromNapiValue for RcStr {
618        unsafe fn from_napi_value(env: napi_env, napi_val: napi_value) -> napi::Result<Self> {
619            Ok(RcStr::from(unsafe {
620                String::from_napi_value(env, napi_val)
621            }?))
622        }
623    }
624
625    impl ValidateNapiValue for RcStr {
626        unsafe fn validate(env: napi_env, napi_val: napi_value) -> napi::Result<napi_value> {
627            unsafe { String::validate(env, napi_val) }
628        }
629    }
630}
631
632/// Runtime string interning table.
633///
634/// Deduplicates strings by storing them in an `FxHashSet<RcStr>`. Strings
635/// shorter than the inline threshold are already zero-allocation, so only
636/// longer strings benefit from interning.
637pub struct RcStrInterning {
638    set: rustc_hash::FxHashSet<RcStr>,
639}
640
641impl Default for RcStrInterning {
642    fn default() -> Self {
643        Self::new()
644    }
645}
646
647impl RcStrInterning {
648    /// Create a new empty interning table.
649    pub fn new() -> Self {
650        Self {
651            set: rustc_hash::FxHashSet::default(),
652        }
653    }
654
655    /// Intern a string slice. Returns a cheap-to-clone [`RcStr`].
656    ///
657    /// Strings below the inline threshold are returned directly (they are
658    /// already zero-allocation inline atoms). Longer strings are looked up
659    /// in the interning table and deduplicated.
660    pub fn intern(&mut self, s: &str) -> RcStr {
661        if is_atom_inlineable(s) {
662            // Inline atom — no allocation needed, don't bother with the set.
663            return RcStr::from(s);
664        }
665        if let Some(existing) = self.set.get(s) {
666            return existing.clone();
667        }
668        let rc = RcStr::from(s);
669        self.set.insert(rc.clone());
670        rc
671    }
672
673    /// Intern an owned `String`. When the string is not yet interned, avoids
674    /// an extra copy compared to [`intern`](Self::intern).
675    fn intern_owned(&mut self, s: String) -> RcStr {
676        if is_atom_inlineable(&s) {
677            return RcStr::from(s);
678        }
679        if let Some(existing) = self.set.get(s.as_str()) {
680            return existing.clone();
681        }
682        let rc = RcStr::from(s);
683        self.set.insert(rc.clone());
684        rc
685    }
686
687    /// Intern a `Cow<str>`. When the cow is `Owned`, avoids an extra copy
688    /// if the string is not yet interned.
689    pub fn intern_cow(&mut self, s: std::borrow::Cow<'_, str>) -> RcStr {
690        match s {
691            std::borrow::Cow::Borrowed(s) => self.intern(s),
692            std::borrow::Cow::Owned(s) => self.intern_owned(s),
693        }
694    }
695
696    /// Intern the [`Display`](std::fmt::Display) output of a value.
697    pub fn intern_display(&mut self, v: &impl std::fmt::Display) -> RcStr {
698        self.intern_owned(v.to_string())
699    }
700}
701
702#[cfg(test)]
703mod tests {
704    use std::mem::ManuallyDrop;
705
706    use super::*;
707
708    #[test]
709    fn test_refcount() {
710        fn refcount(str: &RcStr) -> usize {
711            assert!(str.tag() == DYNAMIC_TAG);
712            let arc = ManuallyDrop::new(unsafe { dynamic::restore_arc(str.unsafe_data) });
713            triomphe::Arc::count(&arc)
714        }
715
716        let str = RcStr::from("this is a long string that won't be inlined");
717
718        assert_eq!(refcount(&str), 1);
719        assert_eq!(refcount(&str), 1); // refcount should not modify the refcount itself
720
721        let cloned_str = str.clone();
722        assert_eq!(refcount(&str), 2);
723
724        drop(cloned_str);
725        assert_eq!(refcount(&str), 1);
726
727        let _ = str.clone().into_owned();
728        assert_eq!(refcount(&str), 1);
729    }
730
731    #[test]
732    fn test_rcstr() {
733        // Test enough to exceed the small string optimization
734        assert_eq!(rcstr!(""), RcStr::default());
735        assert_eq!(rcstr!(""), RcStr::from(""));
736        assert_eq!(rcstr!("a"), RcStr::from("a"));
737        assert_eq!(rcstr!("ab"), RcStr::from("ab"));
738        assert_eq!(rcstr!("abc"), RcStr::from("abc"));
739        assert_eq!(rcstr!("abcd"), RcStr::from("abcd"));
740        assert_eq!(rcstr!("abcde"), RcStr::from("abcde"));
741        assert_eq!(rcstr!("abcdef"), RcStr::from("abcdef"));
742        assert_eq!(rcstr!("abcdefg"), RcStr::from("abcdefg"));
743        assert_eq!(rcstr!("abcdefgh"), RcStr::from("abcdefgh"));
744        assert_eq!(rcstr!("abcdefghi"), RcStr::from("abcdefghi"));
745    }
746
747    #[test]
748    fn test_static_atom() {
749        const LONG: &str = "a very long string that lives forever";
750        let leaked = rcstr!(LONG);
751        let not_leaked = RcStr::from(LONG);
752        assert_ne!(leaked.tag(), not_leaked.tag());
753        assert_eq!(leaked, not_leaked);
754    }
755
756    #[test]
757    fn test_inline_atom() {
758        // This is a silly test, just asserts that we can evaluate this in a constant context.
759        const STR: RcStr = {
760            let inline = inline_atom("hello");
761            if inline.is_some() {
762                inline.unwrap()
763            } else {
764                unreachable!();
765            }
766        };
767        assert_eq!(STR, RcStr::from("hello"));
768    }
769
770    #[test]
771    fn test_hash_matches_str() {
772        use std::hash::{Hash, Hasher};
773
774        use rustc_hash::FxHasher;
775
776        fn fxhash<T: Hash>(value: T) -> u64 {
777            let mut hasher = FxHasher::default();
778            value.hash(&mut hasher);
779            hasher.finish()
780        }
781
782        // Test various string lengths covering inline and prehashed storage
783        let test_strings = [
784            "",
785            "a",
786            "ab",
787            "abc",
788            "abcdef",  // max inline (6 chars)
789            "abcdefg", // just beyond inline (7 chars)
790            "abcdefgh",
791            "a very long string that exceeds sixteen bytes",
792        ];
793
794        // Test RcStr vs &str
795        for s in test_strings {
796            let rcstr = RcStr::from(s);
797            assert_eq!(
798                fxhash(&rcstr),
799                fxhash(s),
800                "Hash mismatch for string of length {}: {:?}",
801                s.len(),
802                s
803            );
804        }
805
806        // Test (RcStr, RcStr) vs (&str, &str)
807        for s1 in test_strings {
808            for s2 in test_strings {
809                let rcstr1 = RcStr::from(s1);
810                let rcstr2 = RcStr::from(s2);
811                assert_eq!(
812                    fxhash((&rcstr1, &rcstr2)),
813                    fxhash((s1, s2)),
814                    "Tuple hash mismatch for ({:?}, {:?})",
815                    s1,
816                    s2
817                );
818            }
819        }
820    }
821
822    #[test]
823    fn test_bincode_roundtrip() {
824        use turbo_bincode::{turbo_bincode_decode, turbo_bincode_encode};
825
826        // Test inline string
827        let short = RcStr::from("hi");
828        let encoded = turbo_bincode_encode(&short).unwrap();
829        let decoded: RcStr = turbo_bincode_decode(&encoded).unwrap();
830        assert_eq!(decoded, short);
831        assert_eq!(decoded.tag(), INLINE_TAG);
832
833        // Test dynamic string (no static match)
834        let long = RcStr::from("bincode_roundtrip: no matching rcstr constant");
835        let encoded = turbo_bincode_encode(&long).unwrap();
836        let decoded: RcStr = turbo_bincode_decode(&encoded).unwrap();
837        assert_eq!(decoded, long);
838        assert_eq!(decoded.tag(), DYNAMIC_TAG);
839
840        // Test static dedup via decode
841        const STATIC_STR: &str = "bincode_roundtrip: a static constant for testing";
842        let _register = rcstr!(STATIC_STR);
843        let original = RcStr::from(STATIC_STR); // DYNAMIC since from() doesn't check
844        let encoded = turbo_bincode_encode(&original).unwrap();
845        let decoded: RcStr = turbo_bincode_decode(&encoded).unwrap();
846        assert_eq!(decoded.as_str(), STATIC_STR);
847        // Decoded via peek_read path should find the static constant
848        assert_eq!(decoded.tag(), STATIC_TAG);
849    }
850
851    #[test]
852    fn test_interning() {
853        let mut interner = RcStrInterning::new();
854
855        // Short strings are always inline (no interning needed)
856        let a = interner.intern("hi");
857        let b = interner.intern("hi");
858        assert_eq!(a, b);
859
860        // Long strings should be deduplicated to the same allocation.
861        let long = "this is a long string that exceeds inline threshold";
862        let c = interner.intern(long);
863        let d = interner.intern(long);
864        assert_eq!(c, d);
865        assert!(std::ptr::eq(c.as_str().as_ptr(), d.as_str().as_ptr()));
866
867        // intern_cow with borrowed — same allocation as c
868        let e = interner.intern_cow(std::borrow::Cow::Borrowed(long));
869        assert_eq!(e, c);
870        assert!(std::ptr::eq(e.as_str().as_ptr(), c.as_str().as_ptr()));
871
872        // intern_cow with owned — same allocation as c (no new alloc)
873        let f = interner.intern_cow(std::borrow::Cow::Owned(long.to_string()));
874        assert_eq!(f, c);
875        assert!(std::ptr::eq(f.as_str().as_ptr(), c.as_str().as_ptr()));
876
877        // intern_display — a fresh long string, verify it is interned too
878        let long2 = "another long string that exceeds the inline threshold here";
879        let g = interner.intern_display(&long2);
880        let h = interner.intern_display(&long2);
881        assert_eq!(g, h);
882        assert!(std::ptr::eq(g.as_str().as_ptr(), h.as_str().as_ptr()));
883    }
884}