@@ -235,15 +235,54 @@ static inline PyObject *get_interned_dict(PyInterpreterState *interp)
235
235
return _Py_INTERP_CACHED_OBJECT (interp , interned_strings );
236
236
}
237
237
238
+ #define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings
239
+
238
240
Py_ssize_t
239
241
_PyUnicode_InternedSize (void )
240
242
{
241
- return PyObject_Length (get_interned_dict (_PyInterpreterState_GET ()));
243
+ PyObject * dict = get_interned_dict (_PyInterpreterState_GET ());
244
+ return _Py_hashtable_len (INTERNED_STRINGS ) + PyDict_GET_SIZE (dict );
245
+ }
246
+
247
+ static Py_hash_t unicode_hash (PyObject * );
248
+ static int unicode_compare_eq (PyObject * , PyObject * );
249
+
250
+ static Py_uhash_t
251
+ hashtable_unicode_hash (const void * key )
252
+ {
253
+ return unicode_hash ((PyObject * )key );
254
+ }
255
+
256
+ static int
257
+ hashtable_unicode_compare (const void * key1 , const void * key2 )
258
+ {
259
+ PyObject * obj1 = (PyObject * )key1 ;
260
+ PyObject * obj2 = (PyObject * )key2 ;
261
+ if (obj1 != NULL && obj2 != NULL ) {
262
+ return unicode_compare_eq (obj1 , obj2 );
263
+ }
264
+ else {
265
+ return obj1 == obj2 ;
266
+ }
242
267
}
243
268
244
269
static int
245
270
init_interned_dict (PyInterpreterState * interp )
246
271
{
272
+ if (_Py_IsMainInterpreter (interp )) {
273
+ assert (INTERNED_STRINGS == NULL );
274
+ _Py_hashtable_allocator_t hashtable_alloc = {PyMem_RawMalloc , PyMem_RawFree };
275
+ INTERNED_STRINGS = _Py_hashtable_new_full (
276
+ hashtable_unicode_hash ,
277
+ hashtable_unicode_compare ,
278
+ NULL ,
279
+ NULL ,
280
+ & hashtable_alloc
281
+ );
282
+ if (INTERNED_STRINGS == NULL ) {
283
+ return -1 ;
284
+ }
285
+ }
247
286
assert (get_interned_dict (interp ) == NULL );
248
287
PyObject * interned = interned = PyDict_New ();
249
288
if (interned == NULL ) {
@@ -262,6 +301,10 @@ clear_interned_dict(PyInterpreterState *interp)
262
301
Py_DECREF (interned );
263
302
_Py_INTERP_CACHED_OBJECT (interp , interned_strings ) = NULL ;
264
303
}
304
+ if (_Py_IsMainInterpreter (interp ) && INTERNED_STRINGS != NULL ) {
305
+ _Py_hashtable_destroy (INTERNED_STRINGS );
306
+ INTERNED_STRINGS = NULL ;
307
+ }
265
308
}
266
309
267
310
#define _Py_RETURN_UNICODE_EMPTY () \
@@ -1222,6 +1265,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
1222
1265
_PyUnicode_STATE (unicode ).kind = kind ;
1223
1266
_PyUnicode_STATE (unicode ).compact = 1 ;
1224
1267
_PyUnicode_STATE (unicode ).ascii = is_ascii ;
1268
+ _PyUnicode_STATE (unicode ).statically_allocated = 0 ;
1225
1269
if (is_ascii ) {
1226
1270
((char * )data )[size ] = 0 ;
1227
1271
}
@@ -1552,7 +1596,9 @@ unicode_dealloc(PyObject *unicode)
1552
1596
* we accidentally decref an immortal string out of existence. Since
1553
1597
* the string is an immortal object, just re-set the reference count.
1554
1598
*/
1555
- if (PyUnicode_CHECK_INTERNED (unicode )) {
1599
+ if (PyUnicode_CHECK_INTERNED (unicode )
1600
+ || _PyUnicode_STATE (unicode ).statically_allocated )
1601
+ {
1556
1602
_Py_SetImmortal (unicode );
1557
1603
return ;
1558
1604
}
@@ -14502,6 +14548,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
14502
14548
_PyUnicode_STATE (self ).kind = kind ;
14503
14549
_PyUnicode_STATE (self ).compact = 0 ;
14504
14550
_PyUnicode_STATE (self ).ascii = _PyUnicode_STATE (unicode ).ascii ;
14551
+ _PyUnicode_STATE (self ).statically_allocated = 0 ;
14505
14552
_PyUnicode_UTF8_LENGTH (self ) = 0 ;
14506
14553
_PyUnicode_UTF8 (self ) = NULL ;
14507
14554
_PyUnicode_DATA_ANY (self ) = NULL ;
@@ -14725,6 +14772,23 @@ _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p)
14725
14772
return ;
14726
14773
}
14727
14774
14775
+ /* Look in the global cache first. */
14776
+ PyObject * r = (PyObject * )_Py_hashtable_get (INTERNED_STRINGS , s );
14777
+ if (r != NULL && r != s ) {
14778
+ Py_SETREF (* p , Py_NewRef (r ));
14779
+ return ;
14780
+ }
14781
+
14782
+ /* Handle statically allocated strings. */
14783
+ if (_PyUnicode_STATE (s ).statically_allocated ) {
14784
+ assert (_Py_IsImmortal (s ));
14785
+ if (_Py_hashtable_set (INTERNED_STRINGS , s , s ) == 0 ) {
14786
+ _PyUnicode_STATE (* p ).interned = SSTATE_INTERNED_IMMORTAL_STATIC ;
14787
+ }
14788
+ return ;
14789
+ }
14790
+
14791
+ /* Look in the per-interpreter cache. */
14728
14792
PyObject * interned = get_interned_dict (interp );
14729
14793
assert (interned != NULL );
14730
14794
@@ -14740,9 +14804,11 @@ _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p)
14740
14804
}
14741
14805
14742
14806
if (_Py_IsImmortal (s )) {
14807
+ // XXX Restrict this to the main interpreter?
14743
14808
_PyUnicode_STATE (* p ).interned = SSTATE_INTERNED_IMMORTAL_STATIC ;
14744
- return ;
14809
+ return ;
14745
14810
}
14811
+
14746
14812
#ifdef Py_REF_DEBUG
14747
14813
/* The reference count value excluding the 2 references from the
14748
14814
interned dictionary should be excluded from the RefTotal. The
0 commit comments