Skip to content

gh-106320: Move private _PyHash API to the internal C API #107026

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 80 additions & 5 deletions Include/internal/pycore_pyhash.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,86 @@
#ifndef Py_INTERNAL_HASH_H
#define Py_INTERNAL_HASH_H
#ifndef Py_INTERNAL_PYHASH_H
#define Py_INTERNAL_PYHASH_H

#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif

/* Helpers for hash functions */
extern Py_hash_t _Py_HashDouble(PyObject *, double);
// _decimal shared extensions uses _Py_HashPointer()
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*);
// Similar to _Py_HashPointer(), but don't replace -1 with -2
extern Py_hash_t _Py_HashPointerRaw(const void*);
// _datetime shared extension uses _Py_HashBytes()
PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);

/* Prime multiplier used in string and various other hashes. */
#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */

/* Parameters used for the numeric hash implementation. See notes for
_Py_HashDouble in Python/pyhash.c. Numeric hashes are based on
reduction modulo the prime 2**_PyHASH_BITS - 1. */

#if SIZEOF_VOID_P >= 8
# define _PyHASH_BITS 61
#else
# define _PyHASH_BITS 31
#endif

#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
#define _PyHASH_INF 314159
#define _PyHASH_IMAG _PyHASH_MULTIPLIER

/* Hash secret
*
* memory layout on 64 bit systems
* cccccccc cccccccc cccccccc uc -- unsigned char[24]
* pppppppp ssssssss ........ fnv -- two Py_hash_t
* k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t
* ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t
* ........ ........ eeeeeeee pyexpat XML hash salt
*
* memory layout on 32 bit systems
* cccccccc cccccccc cccccccc uc
* ppppssss ........ ........ fnv -- two Py_hash_t
* k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t (*)
* ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t
* ........ ........ eeee.... pyexpat XML hash salt
*
* (*) The siphash member may not be available on 32 bit platforms without
* an unsigned int64 data type.
*/
typedef union {
/* ensure 24 bytes */
unsigned char uc[24];
/* two Py_hash_t for FNV */
struct {
Py_hash_t prefix;
Py_hash_t suffix;
} fnv;
/* two uint64 for SipHash24 */
struct {
uint64_t k0;
uint64_t k1;
} siphash;
/* a different (!) Py_hash_t for small string optimization */
struct {
unsigned char padding[16];
Py_hash_t suffix;
} djbx33a;
struct {
unsigned char padding[16];
Py_hash_t hashsalt;
} expat;
} _Py_HashSecret_t;

// _elementtree shared extension uses _Py_HashSecret.expat
PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;

#ifdef Py_DEBUG
extern int _Py_HashSecret_Initialized;
#endif


struct pyhash_runtime_state {
struct {
Expand Down Expand Up @@ -34,7 +110,6 @@ struct pyhash_runtime_state {
}


uint64_t _Py_KeyedHash(uint64_t, const char *, Py_ssize_t);

extern uint64_t _Py_KeyedHash(uint64_t key, const void *src, Py_ssize_t src_sz);

#endif // Py_INTERNAL_HASH_H
#endif // !Py_INTERNAL_PYHASH_H
84 changes: 3 additions & 81 deletions Include/pyhash.h
Original file line number Diff line number Diff line change
@@ -1,87 +1,10 @@
#ifndef Py_HASH_H

#define Py_HASH_H
#ifdef __cplusplus
extern "C" {
#endif

/* Helpers for hash functions */
#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(PyObject *, double);
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*);
// Similar to _Py_HashPointer(), but don't replace -1 with -2
PyAPI_FUNC(Py_hash_t) _Py_HashPointerRaw(const void*);
PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
#endif

/* Prime multiplier used in string and various other hashes. */
#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */

/* Parameters used for the numeric hash implementation. See notes for
_Py_HashDouble in Python/pyhash.c. Numeric hashes are based on
reduction modulo the prime 2**_PyHASH_BITS - 1. */

#if SIZEOF_VOID_P >= 8
# define _PyHASH_BITS 61
#else
# define _PyHASH_BITS 31
#endif

#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
#define _PyHASH_INF 314159
#define _PyHASH_IMAG _PyHASH_MULTIPLIER


/* hash secret
*
* memory layout on 64 bit systems
* cccccccc cccccccc cccccccc uc -- unsigned char[24]
* pppppppp ssssssss ........ fnv -- two Py_hash_t
* k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t
* ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t
* ........ ........ eeeeeeee pyexpat XML hash salt
*
* memory layout on 32 bit systems
* cccccccc cccccccc cccccccc uc
* ppppssss ........ ........ fnv -- two Py_hash_t
* k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t (*)
* ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t
* ........ ........ eeee.... pyexpat XML hash salt
*
* (*) The siphash member may not be available on 32 bit platforms without
* an unsigned int64 data type.
*/
#ifndef Py_LIMITED_API
typedef union {
/* ensure 24 bytes */
unsigned char uc[24];
/* two Py_hash_t for FNV */
struct {
Py_hash_t prefix;
Py_hash_t suffix;
} fnv;
/* two uint64 for SipHash24 */
struct {
uint64_t k0;
uint64_t k1;
} siphash;
/* a different (!) Py_hash_t for small string optimization */
struct {
unsigned char padding[16];
Py_hash_t suffix;
} djbx33a;
struct {
unsigned char padding[16];
Py_hash_t hashsalt;
} expat;
} _Py_HashSecret_t;
PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;

#ifdef Py_DEBUG
PyAPI_DATA(int) _Py_HashSecret_Initialized;
#endif


/* hash function definition */
typedef struct {
Py_hash_t (*const hash)(const void *, Py_ssize_t);
Expand All @@ -94,7 +17,7 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
#endif


/* cutoff for small string DJBX33A optimization in range [1, cutoff).
/* Cutoff for small string DJBX33A optimization in range [1, cutoff).
*
* About 50% of the strings in a typical Python application are smaller than
* 6 to 7 chars. However DJBX33A is vulnerable to hash collision attacks.
Expand All @@ -112,7 +35,7 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
#endif /* Py_HASH_CUTOFF */


/* hash algorithm selection
/* Hash algorithm selection
*
* The values for Py_HASH_* are hard-coded in the
* configure script.
Expand Down Expand Up @@ -140,5 +63,4 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
#ifdef __cplusplus
}
#endif

#endif /* !Py_HASH_H */
#endif // !Py_HASH_H
1 change: 1 addition & 0 deletions Modules/_elementtree.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "Python.h"
#include "pycore_import.h" // _PyImport_GetModuleAttrString()
#include "pycore_pyhash.h" // _Py_HashSecret
#include "structmember.h" // PyMemberDef
#include "expat.h"
#include "pyexpat.h"
Expand Down
3 changes: 2 additions & 1 deletion Modules/_hashopenssl.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@

#include "Python.h"
#include "pycore_hashtable.h"
#include "hashlib.h"
#include "pycore_pyhash.h" // _Py_HashBytes()
#include "pycore_strhex.h" // _Py_strhex()
#include "hashlib.h"

/* EVP is the preferred interface to hashing in OpenSSL */
#include <openssl/evp.h>
Expand Down
5 changes: 5 additions & 0 deletions Modules/_xxtestfuzz/fuzzer.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@

See the source code for LLVMFuzzerTestOneInput for details. */

#ifndef Py_BUILD_CORE
# define Py_BUILD_CORE 1
#endif

#include <Python.h>
#include "pycore_pyhash.h" // _Py_HashBytes()
#include <stdlib.h>
#include <inttypes.h>

Expand Down
1 change: 1 addition & 0 deletions Modules/pyexpat.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "Python.h"
#include "pycore_import.h" // _PyImport_SetModule()
#include "pycore_pyhash.h" // _Py_HashSecret
#include <ctype.h>

#include "structmember.h" // PyMemberDef
Expand Down
1 change: 1 addition & 0 deletions Python/hashtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

#include "Python.h"
#include "pycore_hashtable.h"
#include "pycore_pyhash.h" // _Py_HashPointerRaw()

#define HASHTABLE_MIN_SIZE 16
#define HASHTABLE_HIGH 0.50
Expand Down
1 change: 1 addition & 0 deletions Python/pyhash.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
All the utility functions (_Py_Hash*()) return "-1" to signify an error.
*/
#include "Python.h"
#include "pycore_pyhash.h" // _Py_HashSecret_t

#ifdef __APPLE__
# include <libkern/OSByteOrder.h>
Expand Down