@@ -2850,3 +2850,221 @@ pub fn used_keywords(edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> {
28502850 } )
28512851 . collect ( )
28522852}
2853+
2854+ /// njn: update
2855+ /// njn: could move this to byte_symbol module
2856+ /// An interned string.
2857+ ///
2858+ /// Internally, a `Symbol` is implemented as an index, and all operations
2859+ /// (including hashing, equality, and ordering) operate on that index. The use
2860+ /// of `rustc_index::newtype_index!` means that `Option<Symbol>` only takes up 4 bytes,
2861+ /// because `rustc_index::newtype_index!` reserves the last 256 values for tagging purposes.
2862+ ///
2863+ /// Note that `Symbol` cannot directly be a `rustc_index::newtype_index!` because it
2864+ /// implements `fmt::Debug`, `Encodable`, and `Decodable` in special ways.
2865+ #[ derive( Clone , Copy , PartialEq , Eq , PartialOrd , Ord , Hash ) ]
2866+ pub struct ByteSymbol ( ByteSymbolIndex ) ;
2867+
2868+ rustc_index:: newtype_index! {
2869+ #[ orderable]
2870+ struct ByteSymbolIndex { }
2871+ }
2872+
2873+ impl ByteSymbol {
2874+ pub const fn new ( n : u32 ) -> Self {
2875+ ByteSymbol ( ByteSymbolIndex :: from_u32 ( n) )
2876+ }
2877+
2878+ /// Maps a string to its interned representation.
2879+ #[ rustc_diagnostic_item = "ByteSymbolIntern" ]
2880+ // njn: rename `string` variables as `byte_str`?
2881+ pub fn intern ( string : & [ u8 ] ) -> Self {
2882+ with_session_globals ( |session_globals| session_globals. byte_symbol_interner . intern ( string) )
2883+ }
2884+
2885+ /// Access the underlying string. This is a slowish operation because it
2886+ /// requires locking the symbol interner.
2887+ ///
2888+ /// Note that the lifetime of the return value is a lie. It's not the same
2889+ /// as `&self`, but actually tied to the lifetime of the underlying
2890+ /// interner. Interners are long-lived, and there are very few of them, and
2891+ /// this function is typically used for short-lived things, so in practice
2892+ /// it works out ok.
2893+ /// njn: rename?
2894+ pub fn as_byte_str ( & self ) -> & [ u8 ] {
2895+ with_session_globals ( |session_globals| unsafe {
2896+ std:: mem:: transmute :: < & [ u8 ] , & [ u8 ] > ( session_globals. byte_symbol_interner . get ( * self ) )
2897+ } )
2898+ }
2899+
2900+ pub fn as_u32 ( self ) -> u32 {
2901+ self . 0 . as_u32 ( )
2902+ }
2903+
2904+ // pub fn is_empty(self) -> bool {
2905+ // self == sym::empty
2906+ // }
2907+ }
2908+
2909+ // njn: needed?
2910+ impl fmt:: Debug for ByteSymbol {
2911+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
2912+ fmt:: Debug :: fmt ( self . as_byte_str ( ) , f)
2913+ }
2914+ }
2915+
2916+ // impl fmt::Display for Symbol {
2917+ // fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2918+ // fmt::Display::fmt(self.as_str(), f)
2919+ // }
2920+ // }
2921+
2922+ // impl<CTX> HashStable<CTX> for Symbol {
2923+ // #[inline]
2924+ // fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
2925+ // self.as_str().hash_stable(hcx, hasher);
2926+ // }
2927+ // }
2928+
2929+ // impl<CTX> ToStableHashKey<CTX> for Symbol {
2930+ // type KeyType = String;
2931+ // #[inline]
2932+ // fn to_stable_hash_key(&self, _: &CTX) -> String {
2933+ // self.as_str().to_string()
2934+ // }
2935+ // }
2936+
2937+ // impl StableCompare for Symbol {
2938+ // const CAN_USE_UNSTABLE_SORT: bool = true;
2939+
2940+ // fn stable_cmp(&self, other: &Self) -> std::cmp::Ordering {
2941+ // self.as_str().cmp(other.as_str())
2942+ // }
2943+ // }
2944+
2945+ #[ derive( Default ) ]
2946+ pub ( crate ) struct ByteInterner ( Lock < ByteInternerInner > ) ;
2947+
2948+ // njn: update comment
2949+ // The `&'static str`s in this type actually point into the arena.
2950+ //
2951+ // This type is private to prevent accidentally constructing more than one
2952+ // `Interner` on the same thread, which makes it easy to mix up `Symbol`s
2953+ // between `Interner`s.
2954+ // njn: parameterize?
2955+ #[ derive( Default ) ]
2956+ struct ByteInternerInner {
2957+ arena : DroplessArena ,
2958+ strings : FxIndexSet < & ' static [ u8 ] > , // njn: rename?
2959+ }
2960+
2961+ impl ByteInterner {
2962+ // fn new(init: &[&'static str], extra: &[&'static str]) -> Self {
2963+ // let strings = FxIndexSet::from_iter(init.iter().copied().chain(extra.iter().copied()));
2964+ // assert_eq!(
2965+ // strings.len(),
2966+ // init.len() + extra.len(),
2967+ // "`init` or `extra` contain duplicate symbols",
2968+ // );
2969+ // Interner(Lock::new(ByteInternerInner { arena: Default::default(), strings }))
2970+ // }
2971+
2972+ // fn prefill(init: &[&'static str], extra: &[&'static str]) -> Self {
2973+ // let strings = FxIndexSet::from_iter(init.iter().copied().chain(extra.iter().copied()));
2974+ // assert_eq!(
2975+ // strings.len(),
2976+ // init.len() + extra.len(),
2977+ // "`init` or `extra` contain duplicate symbols",
2978+ // );
2979+ // Interner(Lock::new(InternerInner { arena: Default::default(), strings }))
2980+ // }
2981+
2982+ #[ inline]
2983+ fn intern ( & self , string : & [ u8 ] ) -> ByteSymbol {
2984+ let mut inner = self . 0 . lock ( ) ;
2985+ if let Some ( idx) = inner. strings . get_index_of ( string) {
2986+ return ByteSymbol :: new ( idx as u32 ) ;
2987+ }
2988+
2989+ let string: & [ u8 ] = inner. arena . alloc_slice ( string) ;
2990+
2991+ // SAFETY: we can extend the arena allocation to `'static` because we
2992+ // only access these while the arena is still alive.
2993+ let string: & ' static [ u8 ] = unsafe { & * ( string as * const [ u8 ] ) } ;
2994+
2995+ // This second hash table lookup can be avoided by using `RawEntryMut`,
2996+ // but this code path isn't hot enough for it to be worth it. See
2997+ // #91445 for details.
2998+ let ( idx, is_new) = inner. strings . insert_full ( string) ;
2999+ debug_assert ! ( is_new) ; // due to the get_index_of check above
3000+
3001+ ByteSymbol :: new ( idx as u32 )
3002+ }
3003+
3004+ /// Get the symbol as a string.
3005+ ///
3006+ /// [`ByteSymbol::as_byte_str()`] should be used in preference to this function.
3007+ fn get ( & self , symbol : ByteSymbol ) -> & [ u8 ] {
3008+ self . 0 . lock ( ) . strings . get_index ( symbol. 0 . as_usize ( ) ) . unwrap ( )
3009+ }
3010+ }
3011+
3012+ impl Symbol {
3013+ // fn is_special(self) -> bool {
3014+ // self <= kw::Underscore
3015+ // }
3016+
3017+ // fn is_used_keyword_always(self) -> bool {
3018+ // self >= kw::As && self <= kw::While
3019+ // }
3020+
3021+ // fn is_unused_keyword_always(self) -> bool {
3022+ // self >= kw::Abstract && self <= kw::Yield
3023+ // }
3024+
3025+ // fn is_used_keyword_conditional(self, edition: impl FnOnce() -> Edition) -> bool {
3026+ // (self >= kw::Async && self <= kw::Dyn) && edition() >= Edition::Edition2018
3027+ // }
3028+
3029+ // fn is_unused_keyword_conditional(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
3030+ // self == kw::Gen && edition().at_least_rust_2024()
3031+ // || self == kw::Try && edition().at_least_rust_2018()
3032+ // }
3033+
3034+ // pub fn is_reserved(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
3035+ // self.is_special()
3036+ // || self.is_used_keyword_always()
3037+ // || self.is_unused_keyword_always()
3038+ // || self.is_used_keyword_conditional(edition)
3039+ // || self.is_unused_keyword_conditional(edition)
3040+ // }
3041+
3042+ // pub fn is_weak(self) -> bool {
3043+ // self >= kw::Auto && self <= kw::Yeet
3044+ // }
3045+
3046+ // /// A keyword or reserved identifier that can be used as a path segment.
3047+ // pub fn is_path_segment_keyword(self) -> bool {
3048+ // self == kw::Super
3049+ // || self == kw::SelfLower
3050+ // || self == kw::SelfUpper
3051+ // || self == kw::Crate
3052+ // || self == kw::PathRoot
3053+ // || self == kw::DollarCrate
3054+ // }
3055+
3056+ // /// Returns `true` if the symbol is `true` or `false`.
3057+ // pub fn is_bool_lit(self) -> bool {
3058+ // self == kw::True || self == kw::False
3059+ // }
3060+
3061+ // /// Returns `true` if this symbol can be a raw identifier.
3062+ // pub fn can_be_raw(self) -> bool {
3063+ // self != sym::empty && self != kw::Underscore && !self.is_path_segment_keyword()
3064+ // }
3065+
3066+ // /// Was this symbol predefined in the compiler's `symbols!` macro
3067+ // pub fn is_predefined(self) -> bool {
3068+ // self.as_u32() < PREDEFINED_SYMBOLS_COUNT
3069+ // }
3070+ }
0 commit comments