Skip to content

Improve performance of RRTYPE and CLASS parser #81

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ target_include_directories(

target_sources(zone PRIVATE
src/zone.c
src/types.c
src/log.c
src/parser.c
src/fallback/parser.c
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ similar performance boost for parsing zone data.
Running `zone-bench` on my system (Intel Core i7-1065G7) against an older
`.com` zone file of 12482791271 bytes under Linux (Fedora 37).

GCC 12.2.1, release mode:
clang version 15.0.7, release mode:
```
$ time ./zone-bench parse ../../zones/com.zone
Selected target haswell
Parsed 341535548 records

real 0m18.721s
user 0m17.503s
sys 0m1.181s
real 0m16.344s
user 0m15.125s
sys 0m1.165s
```

There are bound to be bugs and quite possibly smarter ways of implementing
Expand Down
46 changes: 9 additions & 37 deletions include/zone.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,6 @@ extern "C" {
#define ZONE_DLV (32769u)
/** @} */

typedef int32_t zone_code_t;
typedef int32_t zone_return_t;

typedef struct zone_string zone_string_t;
struct zone_string {
size_t length;
Expand All @@ -194,7 +191,10 @@ struct zone_string {

typedef struct zone_symbol zone_symbol_t;
struct zone_symbol {
zone_string_t key;
struct {
char data[24]; // zero padded for convenient vectorized comparison
size_t length;
} key;
uint32_t value;
};

Expand All @@ -204,34 +204,6 @@ struct zone_table {
const zone_symbol_t *symbols; // sorted for use with bsearch
};

// @private
//
// bsearch is quite slow compared to a hash table, but a hash table is either
// quite big or there is a significant chance or collisions. a minimal perfect
// hash table can be used instead, but there is a good chance of mispredicted
// branches.
//
// the fast table provides a hybrid solution. the current incarnation uses the
// first (upper case) character to make a first selection. the last character
// is permuted and used as key for the smaller table. in practice, it should
// effectively function as a one-level radix trie without branching.
//
// the permutation used is the following.
// 1. use the last character as one always exists, token length is available,
// is very likely alphanumeric and likely does not reoccur too often for
// records starting with the same alphabetic character. this will provide
// a unique key for e.g. MB, MD, MF MG, MR, MX and e.g. NSEC, NSEC3.
// 2. multiply the character by a given number to get a reasonably good
// distribution.
// 3. increment the character by the length of the identifier to ensure
// unique keys for identifiers that begin and end with the same
// characters. e.g. A and AAAA.
typedef struct zone_fast_table zone_fast_table_t;
struct zone_fast_table {
uint8_t keys[16];
const zone_symbol_t *symbols[16];
};

/**
* @brief Type of value defined by field
*
Expand Down Expand Up @@ -326,6 +298,7 @@ typedef enum {
#define ZONE_CAA_TAG (1u << 12)
/** @} */

// FIXME: drop rdata_info, just use field_info
typedef struct zone_rdata_info zone_rdata_info_t;
struct zone_rdata_info {
zone_string_t name;
Expand All @@ -351,8 +324,7 @@ typedef struct zone_rdata_info zone_field_info_t;

typedef struct zone_type_info zone_type_info_t;
struct zone_type_info {
zone_string_t name;
uint16_t code;
zone_symbol_t name;
uint32_t options;
struct {
size_t length;
Expand Down Expand Up @@ -508,7 +480,7 @@ struct zone_name {
// invoked for each record (host order). header (owner, type, class and ttl)
// fields are passed individually for convenience. rdata fields can be visited
// individually by means of the iterator
typedef zone_return_t(*zone_add_t)(
typedef int32_t(*zone_add_t)(
zone_parser_t *,
const zone_name_t *, // owner (length + octets)
uint16_t, // type
Expand Down Expand Up @@ -615,7 +587,7 @@ struct zone_parser {
/**
* @brief Parse zone file
*/
ZONE_EXPORT zone_return_t
ZONE_EXPORT int32_t
zone_parse(
zone_parser_t *parser,
const zone_options_t *options,
Expand All @@ -627,7 +599,7 @@ zone_nonnull((1,2,3,4));
/**
* @brief Parse zone from string
*/
ZONE_EXPORT zone_return_t
ZONE_EXPORT int32_t
zone_parse_string(
zone_parser_t *parser,
const zone_options_t *options,
Expand Down
161 changes: 161 additions & 0 deletions scripts/hash.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/*
* hash.c -- Calculate perfect hash for TYPEs and CLASSes
*
* Copyright (c) 2023, NLnet Labs. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <stdio.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include <inttypes.h>

typedef struct tuple tuple_t;
struct tuple {
char name[16];
uint16_t code;
bool type;
};

static const tuple_t types_and_classes[] = {
// classes
{ "IN", 1, false },
{ "CS", 2, false },
{ "CH", 3, false },
{ "HS", 4, false },
// types
{ "A", 1, true },
{ "NS", 2, true },
{ "MD", 3, true },
{ "MF", 4, true },
{ "CNAME", 5, true },
{ "SOA", 6, true },
{ "MB", 7, true },
{ "MG", 8, true },
{ "MR", 9, true },
{ "NULL", 10, true },
{ "WKS", 11, true },
{ "PTR", 12, true },
{ "HINFO", 13, true },
{ "MINFO", 14, true },
{ "MX", 15, true },
{ "TXT", 16, true },
{ "RP", 17, true },
{ "AFSDB", 18, true },
{ "X25", 19, true },
{ "ISDN", 20, true },
{ "RT", 21, true },
{ "NSAP", 22, true },
{ "NSAP-PTR", 23, true },
{ "SIG", 24, true },
{ "KEY", 25, true },
{ "PX", 26, true },
{ "GPOS", 27, true },
{ "AAAA", 28, true },
{ "LOC", 29, true },
{ "NXT", 30, true },
{ "SRV", 33, true },
{ "NAPTR", 35, true },
{ "KX", 36, true },
{ "CERT", 37, true },
{ "A6", 38, true },
{ "DNAME", 39, true },
{ "APL", 42, true },
{ "DS", 43, true },
{ "SSHFP", 44, true },
{ "IPSECKEY", 45, true },
{ "RRSIG", 46, true },
{ "NSEC", 47, true },
{ "DNSKEY", 48, true },
{ "DHCID", 49, true },
{ "NSEC3", 50, true },
{ "NSEC3PARAM", 51, true },
{ "TLSA", 52, true },
{ "SMIMEA", 53, true },
{ "HIP", 55, true },
{ "CDS", 59, true },
{ "CDNSKEY", 60, true },
{ "OPENPGPKEY", 61, true },
{ "CSYNC", 62, true },
{ "ZONEMD", 63, true },
{ "SVCB", 64, true },
{ "HTTPS", 65, true },
{ "SPF", 99, true },
{ "NID", 104, true },
{ "L32", 105, true },
{ "L64", 106, true },
{ "LP", 107, true },
{ "EUI48", 108, true },
{ "EUI64", 109, true },
{ "URI", 256, true },
{ "CAA", 257, true },
{ "AVC", 258, true },
{ "DLV", 32769, true }
};

const uint64_t original_magic = 3523216699ull; // original hash from hash.cpp

static uint8_t hash(uint64_t magic, uint64_t value)
{
uint32_t value32 = ((value >> 32) ^ value);
return (value32 * magic) >> 32;
}

static void print_table(uint64_t magic)
{
struct { uint16_t code; bool type; } keys[256];
memset(keys, 0, sizeof(keys));
const size_t n = sizeof(types_and_classes)/sizeof(types_and_classes[0]);
for (size_t i=0; i < n; i++) {
uint64_t value;
memcpy(&value, types_and_classes[i].name, 8);
uint8_t key = hash(magic, value);
keys[key].code = types_and_classes[i].code;
keys[key].type = types_and_classes[i].type;
}

printf("static const symbol_t *hash_to_symbol[256] = {\n");
for (size_t i=0; i < 256; ) {
for (size_t j=i+8; i < j; i++) {
uint16_t code = keys[i].code;
char macro = !code || keys[i].type ? 'T' : 'C';
printf("%c(%u), ", macro, code);
}
printf("\n");
}
printf("};\n");
}

int main(int argc, char *argv[])
{
const size_t n = sizeof(types_and_classes)/sizeof(types_and_classes[0]);
for (uint64_t magic = original_magic; magic < UINT64_MAX; magic++) {
size_t i;
uint16_t keys[256] = { 0 };
for (i=0; i < n; i++) {
uint64_t value;
memcpy(&value, types_and_classes[i].name, 8);

uint8_t key = hash(magic, value);
if (keys[key])
break;
keys[key] = 1;
}

if (i == n) {
printf("i: %zu, magic: %" PRIu64 "\n", i, magic);
for (i=0; i < n; i++) {
uint64_t value;
memcpy(&value, types_and_classes[i].name, 8);
uint8_t key = hash(magic, value);
printf("TYPE_%s: %" PRIu8 " (%" PRIu16 ")\n", types_and_classes[i].name, key, types_and_classes[i].code);
}
print_table(magic);
return 0;
}
}

printf("no magic value\n");
return 1;
}
57 changes: 0 additions & 57 deletions scripts/keys.py

This file was deleted.

4 changes: 2 additions & 2 deletions src/fallback/ip4.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ static zone_really_inline int32_t parse_ip4(
n = n * 10 + (uint8_t)d;
} else {
if (!(p - ps) || p - ps > 3 || n < m[(p - ps)] || n > 255 || o - os > 3)
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type));
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
ps = p + 1;
*o++ = (uint8_t)n;
if (*p != '.')
Expand All @@ -44,7 +44,7 @@ static zone_really_inline int32_t parse_ip4(
}

if (is_contiguous((uint8_t)*p) || o - os != 4)
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type));
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));

parser->rdata->length += 4;
return ZONE_IP4;
Expand Down
Loading