1extern crate self as turbo_rcstr;
4
5use std::{
6 borrow::{Borrow, Cow},
7 collections::HashMap,
8 ffi::OsStr,
9 fmt::{Debug, Display},
10 hash::{Hash, Hasher},
11 mem::{ManuallyDrop, forget},
12 num::NonZeroU8,
13 ops::Deref,
14 path::{Path, PathBuf},
15 sync::LazyLock,
16};
17
18use bincode::{
19 Decode, Encode,
20 de::{Decoder, read::Reader},
21 enc::Encoder,
22 error::{DecodeError, EncodeError},
23 impl_borrow_decode,
24};
25use bytes_str::BytesStr;
26use debug_unreachable::debug_unreachable;
27use rustc_hash::FxBuildHasher;
28use serde::{Deserialize, Deserializer, Serialize, Serializer};
29use shrink_to_fit::ShrinkToFit;
30use smallvec::SmallVec;
31use triomphe::Arc;
32use turbo_tasks_hash::{DeterministicHash, DeterministicHasher};
33
34use crate::{
35 dynamic::{
36 DynamicPrehashedString, deref_dynamic, deref_static, hash_bytes, new_atom,
37 new_atom_from_prehashed, new_static_atom,
38 },
39 tagged_value::{MAX_INLINE_LEN, TaggedValue},
40};
41
42mod dynamic;
43mod tagged_value;
44
45pub struct RcStr {
89 unsafe_data: TaggedValue,
90}
91
92const _: () = {
93 assert!(std::mem::size_of::<RcStr>() == std::mem::size_of::<Option<RcStr>>());
95};
96
97unsafe impl Send for RcStr {}
98unsafe impl Sync for RcStr {}
99
100const DYNAMIC_TAG: u8 = 0b_10;
102const STATIC_TAG: u8 = 0b_00;
104const INLINE_TAG: u8 = 0b_01; const INLINE_TAG_INIT: NonZeroU8 = NonZeroU8::new(INLINE_TAG).unwrap();
107const TAG_MASK: u8 = 0b_11;
108const LEN_OFFSET: usize = 4;
110const LEN_MASK: u8 = 0xf0;
111
112impl RcStr {
113 #[inline(always)]
114 fn tag(&self) -> u8 {
115 self.unsafe_data.tag_byte() & TAG_MASK
116 }
117
118 #[inline(never)]
119 pub fn as_str(&self) -> &str {
120 match self.tag() {
121 STATIC_TAG => unsafe { deref_static(self.unsafe_data).value },
122 DYNAMIC_TAG => unsafe { &deref_dynamic(self.unsafe_data).value },
123 INLINE_TAG => self.inline_as_str(),
124 _ => unsafe { debug_unreachable!() },
125 }
126 }
127
128 fn inline_as_str(&self) -> &str {
129 debug_assert!(self.tag() == INLINE_TAG);
130 let len = (self.unsafe_data.tag_byte() & LEN_MASK) >> LEN_OFFSET;
131 let src = self.unsafe_data.data();
132 unsafe { std::str::from_utf8_unchecked(&src[..(len as usize)]) }
133 }
134
135 pub fn into_owned(self) -> String {
143 match self.tag() {
144 DYNAMIC_TAG => {
145 let arc = unsafe { dynamic::restore_arc(ManuallyDrop::new(self).unsafe_data) };
147 match Arc::try_unwrap(arc) {
148 Ok(v) => String::from(v.value),
150 Err(arc) => arc.value.to_string(),
151 }
152 }
153 INLINE_TAG => self.inline_as_str().to_string(),
154 STATIC_TAG => unsafe { deref_static(self.unsafe_data).value.to_string() },
155 _ => unsafe { debug_unreachable!() },
156 }
157 }
158
159 pub fn map(self, f: impl FnOnce(String) -> String) -> Self {
160 RcStr::from(Cow::Owned(f(self.into_owned())))
161 }
162
163 fn from_deserialized(s: &str) -> Self {
170 if !is_atom_inlineable(s) {
171 let hash = hash_bytes(s.as_bytes());
172 if let Some(entries) = STATIC_TABLE.get(&hash)
174 && let Some(static_phs) = entries.iter().find(|phs| phs.value == s)
175 {
176 new_static_atom(static_phs)
177 } else {
178 new_atom_from_prehashed(DynamicPrehashedString {
179 hash,
180 value: s.into(),
181 })
182 }
183 } else {
184 inline_atom(s).unwrap()
185 }
186 }
187}
188
189impl DeterministicHash for RcStr {
190 fn deterministic_hash<H: DeterministicHasher>(&self, state: &mut H) {
191 state.write_usize(self.len());
192 state.write_bytes(self.as_bytes());
193 }
194}
195
196impl Deref for RcStr {
197 type Target = str;
198
199 fn deref(&self) -> &Self::Target {
200 self.as_str()
201 }
202}
203
204impl Borrow<str> for RcStr {
205 fn borrow(&self) -> &str {
206 self.as_str()
207 }
208}
209
210impl AsRef<str> for RcStr {
211 fn as_ref(&self) -> &str {
212 self.as_str()
213 }
214}
215
216impl From<BytesStr> for RcStr {
217 fn from(s: BytesStr) -> Self {
218 let bytes: Vec<u8> = s.into_bytes().into();
219 RcStr::from(unsafe {
220 String::from_utf8_unchecked(bytes)
222 })
223 }
224}
225
226impl From<Arc<String>> for RcStr {
227 fn from(s: Arc<String>) -> Self {
228 match Arc::try_unwrap(s) {
229 Ok(v) => new_atom(Cow::Owned(v)),
230 Err(arc) => new_atom(Cow::Borrowed(&**arc)),
231 }
232 }
233}
234
235impl From<String> for RcStr {
236 fn from(s: String) -> Self {
237 new_atom(Cow::Owned(s))
238 }
239}
240
241impl From<&'_ str> for RcStr {
242 fn from(s: &str) -> Self {
243 new_atom(Cow::Borrowed(s))
244 }
245}
246
247impl From<Cow<'_, str>> for RcStr {
248 fn from(s: Cow<str>) -> Self {
249 new_atom(s)
250 }
251}
252
253impl AsRef<Path> for RcStr {
255 fn as_ref(&self) -> &Path {
256 self.as_str().as_ref()
257 }
258}
259
260impl AsRef<OsStr> for RcStr {
262 fn as_ref(&self) -> &OsStr {
263 self.as_str().as_ref()
264 }
265}
266
267impl AsRef<[u8]> for RcStr {
269 fn as_ref(&self) -> &[u8] {
270 self.as_str().as_ref()
271 }
272}
273
274impl From<RcStr> for BytesStr {
275 fn from(value: RcStr) -> Self {
276 Self::from_str_slice(value.as_str())
277 }
278}
279
280impl PartialEq<str> for RcStr {
281 fn eq(&self, other: &str) -> bool {
282 self.as_str() == other
283 }
284}
285
286impl PartialEq<&'_ str> for RcStr {
287 fn eq(&self, other: &&str) -> bool {
288 self.as_str() == *other
289 }
290}
291
292impl PartialEq<String> for RcStr {
293 fn eq(&self, other: &String) -> bool {
294 self.as_str() == other.as_str()
295 }
296}
297
298impl Debug for RcStr {
299 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
300 Debug::fmt(&self.as_str(), f)
301 }
302}
303
304impl Display for RcStr {
305 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
306 Display::fmt(&self.as_str(), f)
307 }
308}
309
310impl From<RcStr> for String {
311 fn from(s: RcStr) -> Self {
312 s.into_owned()
313 }
314}
315
316impl From<RcStr> for PathBuf {
317 fn from(s: RcStr) -> Self {
318 String::from(s).into()
319 }
320}
321
322impl Clone for RcStr {
323 #[inline(always)]
324 fn clone(&self) -> Self {
325 if self.tag() == DYNAMIC_TAG {
328 unsafe {
329 let arc = dynamic::restore_arc(self.unsafe_data);
330 forget(arc.clone());
331 forget(arc);
332 }
333 }
334
335 RcStr {
336 unsafe_data: self.unsafe_data,
337 }
338 }
339}
340
341impl Default for RcStr {
342 fn default() -> Self {
343 rcstr!("")
344 }
345}
346
347impl PartialEq for RcStr {
348 fn eq(&self, other: &Self) -> bool {
349 if self.unsafe_data == other.unsafe_data {
352 return true;
353 }
354 if self.tag() == INLINE_TAG || other.tag() == INLINE_TAG {
358 return false;
359 }
360
361 let (l_hash, l_str) = unsafe { heap_hash_and_str(self) };
363 let (r_hash, r_str) = unsafe { heap_hash_and_str(other) };
364 l_hash == r_hash && l_str == r_str
365 }
366}
367
368#[inline]
370unsafe fn heap_hash_and_str(s: &RcStr) -> (u64, &str) {
371 match s.tag() {
372 STATIC_TAG => {
373 let p = unsafe { deref_static(s.unsafe_data) };
374 (p.hash, p.value)
375 }
376 DYNAMIC_TAG => {
377 let p = unsafe { deref_dynamic(s.unsafe_data) };
378 (p.hash, &p.value)
379 }
380 _ => unsafe { debug_unreachable!() },
381 }
382}
383
384impl Eq for RcStr {}
385
386impl PartialOrd for RcStr {
387 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
388 Some(self.cmp(other))
389 }
390}
391
392impl Ord for RcStr {
393 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
394 self.as_str().cmp(other.as_str())
395 }
396}
397
398impl Hash for RcStr {
399 fn hash<H: Hasher>(&self, state: &mut H) {
400 match self.tag() {
401 STATIC_TAG => {
402 state.write_u64(unsafe { deref_static(self.unsafe_data).hash });
403 state.write_u8(0xff); }
405 DYNAMIC_TAG => {
406 state.write_u64(unsafe { deref_dynamic(self.unsafe_data).hash });
407 state.write_u8(0xff); }
409 INLINE_TAG => {
410 self.inline_as_str().hash(state);
411 }
412 _ => unsafe { debug_unreachable!() },
413 }
414 }
415}
416
417impl Serialize for RcStr {
418 fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
419 serializer.serialize_str(self.as_str())
420 }
421}
422
423impl<'de> Deserialize<'de> for RcStr {
424 fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
425 struct RcStrVisitor;
426
427 impl serde::de::Visitor<'_> for RcStrVisitor {
428 type Value = RcStr;
429
430 fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
431 f.write_str("a string")
432 }
433
434 fn visit_str<E: serde::de::Error>(self, v: &str) -> Result<RcStr, E> {
435 Ok(RcStr::from_deserialized(v))
436 }
437
438 fn visit_string<E: serde::de::Error>(self, v: String) -> Result<RcStr, E> {
439 Ok(RcStr::from_deserialized(&v))
440 }
441 }
442
443 deserializer.deserialize_str(RcStrVisitor)
444 }
445}
446
447impl Encode for RcStr {
448 fn encode<E: Encoder>(&self, encoder: &mut E) -> Result<(), EncodeError> {
449 self.as_str().encode(encoder)
450 }
451}
452
453impl<Context> Decode<Context> for RcStr {
454 fn decode<D: Decoder<Context = Context>>(decoder: &mut D) -> Result<Self, DecodeError> {
455 let len = u64::decode(decoder)?;
457 let len: usize = len
458 .try_into()
459 .map_err(|_| DecodeError::OutsideUsizeRange(len))?;
460
461 if unty::type_equal::<D::R, turbo_bincode::TurboBincodeReader>() {
462 let bytes = decoder
465 .reader()
466 .peek_read(len)
467 .ok_or(DecodeError::UnexpectedEnd { additional: len })?;
468 let s = core::str::from_utf8(bytes).map_err(|inner| DecodeError::Utf8 { inner })?;
469 let rcstr = RcStr::from_deserialized(s);
470 decoder.reader().consume(len);
471 Ok(rcstr)
472 } else {
473 unreachable!(
474 "RcStr::decode expected TurboBincodeReader, but was called with a {} reader",
475 std::any::type_name::<D::R>(),
476 )
477 }
478 }
479}
480
481impl_borrow_decode!(RcStr);
482
483impl Drop for RcStr {
484 fn drop(&mut self) {
485 match self.tag() {
486 DYNAMIC_TAG => unsafe { drop(dynamic::restore_arc(self.unsafe_data)) },
487 INLINE_TAG | STATIC_TAG => {
488 }
490 _ => unsafe { debug_unreachable!() },
491 }
492 }
493}
494
495#[doc(hidden)]
497pub const fn inline_atom(s: &str) -> Option<RcStr> {
498 dynamic::inline_atom(s)
499}
500
501#[doc(hidden)]
503pub const fn is_atom_inlineable(s: &str) -> bool {
504 s.len() <= MAX_INLINE_LEN
505}
506
507#[doc(hidden)]
508#[inline(always)]
509pub const fn from_static(s: &'static StaticPrehashedString) -> RcStr {
510 dynamic::new_static_atom(s)
511}
512#[doc(hidden)]
513pub use dynamic::StaticPrehashedString;
514
515#[doc(hidden)]
516pub const fn make_const_prehashed_string(text: &'static str) -> StaticPrehashedString {
517 StaticPrehashedString {
518 value: text,
519 hash: hash_bytes(text.as_bytes()),
520 }
521}
522
523#[doc(hidden)]
525pub use inventory;
526
527#[doc(hidden)]
529pub struct StaticRcStr(pub &'static StaticPrehashedString);
530
531inventory::collect!(StaticRcStr);
532
533#[doc(hidden)]
540#[macro_export]
541macro_rules! __rcstr_inventory_submit {
542 ($value:expr) => {
543 $crate::inventory::submit!($value);
544 };
545}
546
547static STATIC_TABLE: LazyLock<
554 HashMap<u64, SmallVec<[&'static StaticPrehashedString; 1]>, FxBuildHasher>,
555> = LazyLock::new(|| {
556 let mut map: HashMap<u64, SmallVec<[&'static StaticPrehashedString; 1]>, FxBuildHasher> =
557 HashMap::with_hasher(FxBuildHasher);
558 for StaticRcStr(phs) in inventory::iter::<StaticRcStr> {
559 if phs.value.len() <= MAX_INLINE_LEN {
560 continue;
565 }
566 let entries = map.entry(phs.hash).or_default();
567 if !entries.iter().any(|e| e.value == phs.value) {
571 entries.push(phs);
572 }
573 }
574 map.shrink_to_fit(); map
576});
577
578pub use turbo_rcstr_macros::rcstr;
582
583impl ShrinkToFit for RcStr {
585 #[inline(always)]
586 fn shrink_to_fit(&mut self) {}
587}
588
589#[cfg(all(feature = "napi", target_family = "wasm"))]
590compile_error!("The napi feature cannot be enabled for wasm targets");
591
592#[cfg(all(feature = "napi", not(target_family = "wasm")))]
593mod napi_impl {
594 use napi::{
595 bindgen_prelude::{FromNapiValue, ToNapiValue, TypeName, ValidateNapiValue},
596 sys::{napi_env, napi_value},
597 };
598
599 use super::*;
600
601 impl TypeName for RcStr {
602 fn type_name() -> &'static str {
603 String::type_name()
604 }
605
606 fn value_type() -> napi::ValueType {
607 String::value_type()
608 }
609 }
610
611 impl ToNapiValue for RcStr {
612 unsafe fn to_napi_value(env: napi_env, val: Self) -> napi::Result<napi_value> {
613 unsafe { ToNapiValue::to_napi_value(env, val.as_str()) }
614 }
615 }
616
617 impl FromNapiValue for RcStr {
618 unsafe fn from_napi_value(env: napi_env, napi_val: napi_value) -> napi::Result<Self> {
619 Ok(RcStr::from(unsafe {
620 String::from_napi_value(env, napi_val)
621 }?))
622 }
623 }
624
625 impl ValidateNapiValue for RcStr {
626 unsafe fn validate(env: napi_env, napi_val: napi_value) -> napi::Result<napi_value> {
627 unsafe { String::validate(env, napi_val) }
628 }
629 }
630}
631
632pub struct RcStrInterning {
638 set: rustc_hash::FxHashSet<RcStr>,
639}
640
641impl Default for RcStrInterning {
642 fn default() -> Self {
643 Self::new()
644 }
645}
646
647impl RcStrInterning {
648 pub fn new() -> Self {
650 Self {
651 set: rustc_hash::FxHashSet::default(),
652 }
653 }
654
655 pub fn intern(&mut self, s: &str) -> RcStr {
661 if is_atom_inlineable(s) {
662 return RcStr::from(s);
664 }
665 if let Some(existing) = self.set.get(s) {
666 return existing.clone();
667 }
668 let rc = RcStr::from(s);
669 self.set.insert(rc.clone());
670 rc
671 }
672
673 fn intern_owned(&mut self, s: String) -> RcStr {
676 if is_atom_inlineable(&s) {
677 return RcStr::from(s);
678 }
679 if let Some(existing) = self.set.get(s.as_str()) {
680 return existing.clone();
681 }
682 let rc = RcStr::from(s);
683 self.set.insert(rc.clone());
684 rc
685 }
686
687 pub fn intern_cow(&mut self, s: std::borrow::Cow<'_, str>) -> RcStr {
690 match s {
691 std::borrow::Cow::Borrowed(s) => self.intern(s),
692 std::borrow::Cow::Owned(s) => self.intern_owned(s),
693 }
694 }
695
696 pub fn intern_display(&mut self, v: &impl std::fmt::Display) -> RcStr {
698 self.intern_owned(v.to_string())
699 }
700}
701
702#[cfg(test)]
703mod tests {
704 use std::mem::ManuallyDrop;
705
706 use super::*;
707
708 #[test]
709 fn test_refcount() {
710 fn refcount(str: &RcStr) -> usize {
711 assert!(str.tag() == DYNAMIC_TAG);
712 let arc = ManuallyDrop::new(unsafe { dynamic::restore_arc(str.unsafe_data) });
713 triomphe::Arc::count(&arc)
714 }
715
716 let str = RcStr::from("this is a long string that won't be inlined");
717
718 assert_eq!(refcount(&str), 1);
719 assert_eq!(refcount(&str), 1); let cloned_str = str.clone();
722 assert_eq!(refcount(&str), 2);
723
724 drop(cloned_str);
725 assert_eq!(refcount(&str), 1);
726
727 let _ = str.clone().into_owned();
728 assert_eq!(refcount(&str), 1);
729 }
730
731 #[test]
732 fn test_rcstr() {
733 assert_eq!(rcstr!(""), RcStr::default());
735 assert_eq!(rcstr!(""), RcStr::from(""));
736 assert_eq!(rcstr!("a"), RcStr::from("a"));
737 assert_eq!(rcstr!("ab"), RcStr::from("ab"));
738 assert_eq!(rcstr!("abc"), RcStr::from("abc"));
739 assert_eq!(rcstr!("abcd"), RcStr::from("abcd"));
740 assert_eq!(rcstr!("abcde"), RcStr::from("abcde"));
741 assert_eq!(rcstr!("abcdef"), RcStr::from("abcdef"));
742 assert_eq!(rcstr!("abcdefg"), RcStr::from("abcdefg"));
743 assert_eq!(rcstr!("abcdefgh"), RcStr::from("abcdefgh"));
744 assert_eq!(rcstr!("abcdefghi"), RcStr::from("abcdefghi"));
745 }
746
747 #[test]
748 fn test_static_atom() {
749 const LONG: &str = "a very long string that lives forever";
750 let leaked = rcstr!(LONG);
751 let not_leaked = RcStr::from(LONG);
752 assert_ne!(leaked.tag(), not_leaked.tag());
753 assert_eq!(leaked, not_leaked);
754 }
755
756 #[test]
757 fn test_inline_atom() {
758 const STR: RcStr = {
760 let inline = inline_atom("hello");
761 if inline.is_some() {
762 inline.unwrap()
763 } else {
764 unreachable!();
765 }
766 };
767 assert_eq!(STR, RcStr::from("hello"));
768 }
769
770 #[test]
771 fn test_hash_matches_str() {
772 use std::hash::{Hash, Hasher};
773
774 use rustc_hash::FxHasher;
775
776 fn fxhash<T: Hash>(value: T) -> u64 {
777 let mut hasher = FxHasher::default();
778 value.hash(&mut hasher);
779 hasher.finish()
780 }
781
782 let test_strings = [
784 "",
785 "a",
786 "ab",
787 "abc",
788 "abcdef", "abcdefg", "abcdefgh",
791 "a very long string that exceeds sixteen bytes",
792 ];
793
794 for s in test_strings {
796 let rcstr = RcStr::from(s);
797 assert_eq!(
798 fxhash(&rcstr),
799 fxhash(s),
800 "Hash mismatch for string of length {}: {:?}",
801 s.len(),
802 s
803 );
804 }
805
806 for s1 in test_strings {
808 for s2 in test_strings {
809 let rcstr1 = RcStr::from(s1);
810 let rcstr2 = RcStr::from(s2);
811 assert_eq!(
812 fxhash((&rcstr1, &rcstr2)),
813 fxhash((s1, s2)),
814 "Tuple hash mismatch for ({:?}, {:?})",
815 s1,
816 s2
817 );
818 }
819 }
820 }
821
822 #[test]
823 fn test_bincode_roundtrip() {
824 use turbo_bincode::{turbo_bincode_decode, turbo_bincode_encode};
825
826 let short = RcStr::from("hi");
828 let encoded = turbo_bincode_encode(&short).unwrap();
829 let decoded: RcStr = turbo_bincode_decode(&encoded).unwrap();
830 assert_eq!(decoded, short);
831 assert_eq!(decoded.tag(), INLINE_TAG);
832
833 let long = RcStr::from("bincode_roundtrip: no matching rcstr constant");
835 let encoded = turbo_bincode_encode(&long).unwrap();
836 let decoded: RcStr = turbo_bincode_decode(&encoded).unwrap();
837 assert_eq!(decoded, long);
838 assert_eq!(decoded.tag(), DYNAMIC_TAG);
839
840 const STATIC_STR: &str = "bincode_roundtrip: a static constant for testing";
842 let _register = rcstr!(STATIC_STR);
843 let original = RcStr::from(STATIC_STR); let encoded = turbo_bincode_encode(&original).unwrap();
845 let decoded: RcStr = turbo_bincode_decode(&encoded).unwrap();
846 assert_eq!(decoded.as_str(), STATIC_STR);
847 assert_eq!(decoded.tag(), STATIC_TAG);
849 }
850
851 #[test]
852 fn test_interning() {
853 let mut interner = RcStrInterning::new();
854
855 let a = interner.intern("hi");
857 let b = interner.intern("hi");
858 assert_eq!(a, b);
859
860 let long = "this is a long string that exceeds inline threshold";
862 let c = interner.intern(long);
863 let d = interner.intern(long);
864 assert_eq!(c, d);
865 assert!(std::ptr::eq(c.as_str().as_ptr(), d.as_str().as_ptr()));
866
867 let e = interner.intern_cow(std::borrow::Cow::Borrowed(long));
869 assert_eq!(e, c);
870 assert!(std::ptr::eq(e.as_str().as_ptr(), c.as_str().as_ptr()));
871
872 let f = interner.intern_cow(std::borrow::Cow::Owned(long.to_string()));
874 assert_eq!(f, c);
875 assert!(std::ptr::eq(f.as_str().as_ptr(), c.as_str().as_ptr()));
876
877 let long2 = "another long string that exceeds the inline threshold here";
879 let g = interner.intern_display(&long2);
880 let h = interner.intern_display(&long2);
881 assert_eq!(g, h);
882 assert!(std::ptr::eq(g.as_str().as_ptr(), h.as_str().as_ptr()));
883 }
884}