threefoldtech · maxux · Dec 20, 2021 · Dec 20, 2021 · Dec 20, 2021 · Dec 20, 2021
diff --git a/libzdb/api.c b/libzdb/api.c
@@ -416,12 +416,12 @@ zdb_api_t *zdb_api_del(namespace_t *ns, void *key, size_t ksize) {
     return zdb_api_reply_success();
 }
 
-index_root_t *zdb_index_init_lazy(zdb_settings_t *settings, char *indexdir, void *namespace) {
-    return index_init_lazy(settings, indexdir, namespace);
+index_root_t *zdb_index_init_lazy(zdb_settings_t *settings, char *indexdir) {
+    return index_init_lazy(settings, indexdir);
 }
 
-index_root_t *zdb_index_init(zdb_settings_t *settings, char *indexdir, void *namespace, index_branch_t **branches) {
-    return index_init(settings, indexdir, namespace, branches);
+index_root_t *zdb_index_init(zdb_settings_t *settings, char *indexdir) {
+    return index_init(settings, indexdir);
 }
 
 uint64_t zdb_index_availity_check(index_root_t *root) {

diff --git a/libzdb/api.h b/libzdb/api.h
@@ -55,8 +55,8 @@
     int zdb_index_open_readwrite(index_root_t *root, fileid_t fileid);
     void zdb_index_close(index_root_t *zdbindex);
 
-    index_root_t *zdb_index_init_lazy(zdb_settings_t *settings, char *indexdir, void *namespace);
-    index_root_t *zdb_index_init(zdb_settings_t *settings, char *indexdir, void *namespace, index_branch_t **branches);
+    index_root_t *zdb_index_init_lazy(zdb_settings_t *settings, char *indexdir);
+    index_root_t *zdb_index_init(zdb_settings_t *settings, char *indexdir);
     uint64_t zdb_index_availity_check(index_root_t *root);
 
     // index header validity

diff --git a/libzdb/index.c b/libzdb/index.c
@@ -39,7 +39,6 @@ void index_entry_dump(index_entry_t *entry) {
 #ifdef RELEASE
     (void) entry;
 #else
-    zdb_debug("[+] index: entry dump: namespace  : %p\n", entry->namespace);
     zdb_debug("[+] index: entry dump: id length  : %" PRIu8  "\n", entry->idlength);
     zdb_debug("[+] index: entry dump: idx offset : %" PRIu32 "\n", entry->idxoffset);
     zdb_debug("[+] index: entry dump: idx fileid : %" PRIu32 "\n", entry->indexid);
@@ -417,30 +416,28 @@ uint32_t index_next_objectid(index_root_t *root) {
 // perform the basic "hashing" (crc based) used to point to the expected branch
 // we only keep partial amount of the result to not fill the memory too fast
 uint32_t index_key_hash(unsigned char *id, uint8_t idlength) {
-    return zdb_crc32((const uint8_t *) id, idlength) & buckets_mask;
+    return zdb_crc32((const uint8_t *) id, idlength);
 }
 
 // main look-up function, used to get an entry from the memory index
 index_entry_t *index_entry_get(index_root_t *root, unsigned char *id, uint8_t idlength) {
     uint32_t branchkey = index_key_hash(id, idlength);
-    index_branch_t *branch = index_branch_get(root->branches, branchkey);
-    index_entry_t *entry;
+    index_entry_t *list;
 
-    // branch not exists
-    if(!branch)
+    // no list found, entry not found
+    if(!(list = index_hash_lookup(root->hash, branchkey)))
         return NULL;
 
-    for(entry = branch->list; entry; entry = entry->next) {
+    // walk over the list
+    for(index_entry_t *entry = list; entry; entry = entry->next) {
         if(entry->idlength != idlength)
             continue;
 
-        if(entry->namespace != root->namespace)
-            continue;
-
         if(memcmp(entry->id, id, idlength) == 0)
             return entry;
     }
 
+    // entry not found
     return NULL;
 }
 
@@ -509,23 +506,14 @@ int index_entry_delete_memory(index_root_t *root, index_entry_t *entry) {
     root->stats.size -= sizeof(index_entry_t) + entry->idlength;
 
     // running in a mode without index, let's just skip this
-    if(root->branches == NULL)
+    if(root->hash == NULL)
         return 0;
 
-    uint32_t branchkey = index_key_hash(entry->id, entry->idlength);
-    index_branch_t *branch = index_branch_get(root->branches, branchkey);
-    index_entry_t *previous = index_branch_get_previous(branch, entry);
-
     zdb_debug("[+] index: delete memory: removing entry from memory\n");
 
-    if(previous == entry) {
-        zdb_danger("[-] index: entry delete memory: something wrong happens");
-        zdb_danger("[-] index: entry delete memory: branches seems buggy");
+    uint32_t hashkey = index_key_hash(entry->id, entry->idlength);
+    if(!index_hash_remove(root->hash, hashkey, entry))
         return 1;
-    }
-
-    // removing entry from global branch
-    index_branch_remove(branch, entry, previous);
 
     // cleaning memory object
     free(entry);
@@ -710,63 +698,14 @@ size_t index_offset_objectid(uint32_t objectid) {
     return offset;
 }
 
-// iterate over all entries in a single branch
-// and remove if this entry is related to requested namespace
-static inline size_t index_clean_namespace_branch(index_branch_t *branch, void *namespace) {
-    index_entry_t *entry = branch->list;
-    index_entry_t *previous = NULL;
-    size_t deleted  = 0;
-
-    while(entry) {
-        if(entry->namespace != namespace) {
-            // keeping this key, looking forward
-            previous = entry;
-            entry = entry->next;
-            continue;
-        }
-
-        #ifndef RELEASE
-        zdb_log("[+] index: namespace cleaner: free: ");
-        zdb_hexdump(entry->id, entry->idlength);
-        printf("\n"); // FIXME
-        #endif
-
-        // okay, we need to remove this key
-        index_entry_t *next = entry->next;
-        index_entry_t *removed = index_branch_remove(branch, entry, previous);
-
-        free(removed);
-        deleted += 1;
-
-        entry = next;
-    }
-
-    return deleted;
-}
-
 // remove specific namespace from the index
 //
 // we use a global index for everything, when removing a
 // namespace, we walk over all the keys and remove keys matching
 // to this namespace
-int index_clean_namespace(index_root_t *root, void *namespace) {
-    index_branch_t **branches = root->branches;
-    size_t deleted = 0;
-
-    if(!branches)
-        return 0;
-
-    zdb_debug("[+] index: starting namespace cleaner\n");
-
-    for(uint32_t b = 0; b < buckets_branches; b++) {
-        if(!branches[b])
-            continue;
-
-        deleted += index_clean_namespace_branch(branches[b], namespace);
-    }
-
-    zdb_debug("[+] index: namespace cleaner: %lu keys removed\n", deleted);
-
+int index_clean_namespace(index_root_t *root) {
+    index_hash_free(root->hash);
+    root->hash = NULL;
     return 0;
 }
 

diff --git a/libzdb/index.h b/libzdb/index.h
@@ -78,16 +78,6 @@
         // linked list pointer
         struct index_entry_t *next;
 
-        // pointer to source namespace
-        // index should not be aware of his namespace
-        // but since we use a single big index, we need to
-        // be able to make namespace distinction
-        // note: another approch could be separate branch-list per namespace
-        // note 2: we keep a void pointer, we will only compare address and not
-        //         the object itself, this make some opacity later if we change
-        //         and reduce issue with circular inclusion
-        void *namespace;
-
         uint8_t idlength;    // length of the id, here uint8_t limits to 256 bytes
         uint32_t offset;     // offset on the corresponding datafile
         uint32_t idxoffset;  // offset on the index file (index file id is the same as data file)
@@ -103,27 +93,33 @@
 
     } index_entry_t;
 
-    // WARNING: this should be on index_branch.h
-    //          but we can't due to circular dependencies
-    //          in order to fix this, we should put all structs in a dedicated file
     //
-    // the current implementation of the index use rudimental index memory system
-    // it's basicly just linked-list of entries
-    // to improve performance without changing this basic implementation,
-    // which is really slow, of course, we use a "branch" system which simply
-    // splits all the arrays based on an id
+    // new index memory hash use a multi-level indirection
+    // array, based on crc32 entry
     //
-    // the id is specified on the implementation file, with the length, etc.
+    // more information can be found on index-branch files
     //
-    // - id 0000: [...........]
-    // - id 0001: [...................]
-    // - id 0002: [...]
-    typedef struct index_branch_t {
-        size_t length;       // length of this branch (count of entries)
-        index_entry_t *list; // entry point of the linked list
-        index_entry_t *last; // pointer to the last item, quicker to append
 
-    } index_branch_t;
+    typedef struct index_hash_t {
+        char type;
+        union {
+            struct index_hash_t **sub;
+            index_entry_t *list;
+        };
+
+    } index_hash_t;
+
+    typedef struct index_hash_stats_t {
+        size_t subs;
+        size_t subsubs;
+        size_t entries;
+        size_t max_entries;
+        size_t lists;
+        size_t entries_size;
+        size_t ids_size;
+
+    } index_hash_stats_t;
+
 
     // index status flags
     // keep some heatly status of the index
@@ -189,10 +185,8 @@
         int updated;        // does current index changed since opened
         int secure;         // enable some safety (see secure zdb_settings_t)
 
-        void *namespace;    // see index_entry_t, same reason
-
         index_seqid_t *seqid;      // sequential fileid mapping
-        index_branch_t **branches; // list of branches (explained later)
+        index_hash_t *hash;        // index keys hashmap
         index_status_t status;     // index health
         index_stats_t stats;       // index statistics
         index_dirty_t dirty;       // bitmap of dirty index files
@@ -281,7 +275,7 @@
     int index_entry_delete_memory(index_root_t *root, index_entry_t *entry);
     int index_entry_is_deleted(index_entry_t *entry);
 
-    int index_clean_namespace(index_root_t *root, void *namespace);
+    int index_clean_namespace(index_root_t *root);
 
     extern index_entry_t *index_reusable_entry;