Skip to content

Commit 84c1e01

Browse files
committed
Improve RRTYPE and CLASS parser
Fixes #6.
1 parent 6f51d35 commit 84c1e01

40 files changed

+2353
-1995
lines changed

CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,6 @@ target_include_directories(
135135

136136
target_sources(zone PRIVATE
137137
src/zone.c
138-
src/types.c
139138
src/log.c
140139
src/parser.c
141140
src/fallback/parser.c

include/zone.h

Lines changed: 9 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -183,9 +183,6 @@ extern "C" {
183183
#define ZONE_DLV (32769u)
184184
/** @} */
185185

186-
typedef int32_t zone_code_t;
187-
typedef int32_t zone_return_t;
188-
189186
typedef struct zone_string zone_string_t;
190187
struct zone_string {
191188
size_t length;
@@ -194,7 +191,10 @@ struct zone_string {
194191

195192
typedef struct zone_symbol zone_symbol_t;
196193
struct zone_symbol {
197-
zone_string_t key;
194+
struct {
195+
char data[24]; // zero padded for convenient vectorized comparison
196+
size_t length;
197+
} key;
198198
uint32_t value;
199199
};
200200

@@ -204,34 +204,6 @@ struct zone_table {
204204
const zone_symbol_t *symbols; // sorted for use with bsearch
205205
};
206206

207-
// @private
208-
//
209-
// bsearch is quite slow compared to a hash table, but a hash table is either
210-
// quite big or there is a significant chance or collisions. a minimal perfect
211-
// hash table can be used instead, but there is a good chance of mispredicted
212-
// branches.
213-
//
214-
// the fast table provides a hybrid solution. the current incarnation uses the
215-
// first (upper case) character to make a first selection. the last character
216-
// is permuted and used as key for the smaller table. in practice, it should
217-
// effectively function as a one-level radix trie without branching.
218-
//
219-
// the permutation used is the following.
220-
// 1. use the last character as one always exists, token length is available,
221-
// is very likely alphanumeric and likely does not reoccur too often for
222-
// records starting with the same alphabetic character. this will provide
223-
// a unique key for e.g. MB, MD, MF MG, MR, MX and e.g. NSEC, NSEC3.
224-
// 2. multiply the character by a given number to get a reasonably good
225-
// distribution.
226-
// 3. increment the character by the length of the identifier to ensure
227-
// unique keys for identifiers that begin and end with the same
228-
// characters. e.g. A and AAAA.
229-
typedef struct zone_fast_table zone_fast_table_t;
230-
struct zone_fast_table {
231-
uint8_t keys[16];
232-
const zone_symbol_t *symbols[16];
233-
};
234-
235207
/**
236208
* @brief Type of value defined by field
237209
*
@@ -326,6 +298,7 @@ typedef enum {
326298
#define ZONE_CAA_TAG (1u << 12)
327299
/** @} */
328300

301+
// FIXME: drop rdata_info, just use field_info
329302
typedef struct zone_rdata_info zone_rdata_info_t;
330303
struct zone_rdata_info {
331304
zone_string_t name;
@@ -351,8 +324,7 @@ typedef struct zone_rdata_info zone_field_info_t;
351324

352325
typedef struct zone_type_info zone_type_info_t;
353326
struct zone_type_info {
354-
zone_string_t name;
355-
uint16_t code;
327+
zone_symbol_t name;
356328
uint32_t options;
357329
struct {
358330
size_t length;
@@ -508,7 +480,7 @@ struct zone_name {
508480
// invoked for each record (host order). header (owner, type, class and ttl)
509481
// fields are passed individually for convenience. rdata fields can be visited
510482
// individually by means of the iterator
511-
typedef zone_return_t(*zone_add_t)(
483+
typedef int32_t(*zone_add_t)(
512484
zone_parser_t *,
513485
const zone_name_t *, // owner (length + octets)
514486
uint16_t, // type
@@ -615,7 +587,7 @@ struct zone_parser {
615587
/**
616588
* @brief Parse zone file
617589
*/
618-
ZONE_EXPORT zone_return_t
590+
ZONE_EXPORT int32_t
619591
zone_parse(
620592
zone_parser_t *parser,
621593
const zone_options_t *options,
@@ -627,7 +599,7 @@ zone_nonnull((1,2,3,4));
627599
/**
628600
* @brief Parse zone from string
629601
*/
630-
ZONE_EXPORT zone_return_t
602+
ZONE_EXPORT int32_t
631603
zone_parse_string(
632604
zone_parser_t *parser,
633605
const zone_options_t *options,

scripts/hash.c

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
/*
2+
* hash.c -- Calculate perfect hash for TYPEs and CLASSes
3+
*
4+
* Copyright (c) 2023, NLnet Labs. All rights reserved.
5+
*
6+
* SPDX-License-Identifier: BSD-3-Clause
7+
*/
8+
#include <stdio.h>
9+
#include <stdbool.h>
10+
#include <stdint.h>
11+
#include <string.h>
12+
#include <inttypes.h>
13+
14+
typedef struct tuple tuple_t;
15+
struct tuple {
16+
char name[16];
17+
uint16_t code;
18+
bool type;
19+
};
20+
21+
static const tuple_t types_and_classes[] = {
22+
// classes
23+
{ "IN", 1, false },
24+
{ "CS", 2, false },
25+
{ "CH", 3, false },
26+
{ "HS", 4, false },
27+
// types
28+
{ "A", 1, true },
29+
{ "NS", 2, true },
30+
{ "MD", 3, true },
31+
{ "MF", 4, true },
32+
{ "CNAME", 5, true },
33+
{ "SOA", 6, true },
34+
{ "MB", 7, true },
35+
{ "MG", 8, true },
36+
{ "MR", 9, true },
37+
{ "NULL", 10, true },
38+
{ "WKS", 11, true },
39+
{ "PTR", 12, true },
40+
{ "HINFO", 13, true },
41+
{ "MINFO", 14, true },
42+
{ "MX", 15, true },
43+
{ "TXT", 16, true },
44+
{ "RP", 17, true },
45+
{ "AFSDB", 18, true },
46+
{ "X25", 19, true },
47+
{ "ISDN", 20, true },
48+
{ "RT", 21, true },
49+
{ "NSAP", 22, true },
50+
{ "NSAP-PTR", 23, true },
51+
{ "SIG", 24, true },
52+
{ "KEY", 25, true },
53+
{ "PX", 26, true },
54+
{ "GPOS", 27, true },
55+
{ "AAAA", 28, true },
56+
{ "LOC", 29, true },
57+
{ "NXT", 30, true },
58+
{ "SRV", 33, true },
59+
{ "NAPTR", 35, true },
60+
{ "KX", 36, true },
61+
{ "CERT", 37, true },
62+
{ "A6", 38, true },
63+
{ "DNAME", 39, true },
64+
{ "APL", 42, true },
65+
{ "DS", 43, true },
66+
{ "SSHFP", 44, true },
67+
{ "IPSECKEY", 45, true },
68+
{ "RRSIG", 46, true },
69+
{ "NSEC", 47, true },
70+
{ "DNSKEY", 48, true },
71+
{ "DHCID", 49, true },
72+
{ "NSEC3", 50, true },
73+
{ "NSEC3PARAM", 51, true },
74+
{ "TLSA", 52, true },
75+
{ "SMIMEA", 53, true },
76+
{ "HIP", 55, true },
77+
{ "CDS", 59, true },
78+
{ "CDNSKEY", 60, true },
79+
{ "OPENPGPKEY", 61, true },
80+
{ "CSYNC", 62, true },
81+
{ "ZONEMD", 63, true },
82+
{ "SVCB", 64, true },
83+
{ "HTTPS", 65, true },
84+
{ "SPF", 99, true },
85+
{ "NID", 104, true },
86+
{ "L32", 105, true },
87+
{ "L64", 106, true },
88+
{ "LP", 107, true },
89+
{ "EUI48", 108, true },
90+
{ "EUI64", 109, true },
91+
{ "URI", 256, true },
92+
{ "CAA", 257, true },
93+
{ "AVC", 258, true },
94+
{ "DLV", 32769, true }
95+
};
96+
97+
const uint64_t original_magic = 3523216699ull; // original hash from hash.cpp
98+
99+
static uint8_t hash(uint64_t magic, uint64_t value)
100+
{
101+
uint32_t value32 = ((value >> 32) ^ value);
102+
return (value32 * magic) >> 32;
103+
}
104+
105+
static void print_table(uint64_t magic)
106+
{
107+
struct { uint16_t code; bool type; } keys[256];
108+
memset(keys, 0, sizeof(keys));
109+
const size_t n = sizeof(types_and_classes)/sizeof(types_and_classes[0]);
110+
for (size_t i=0; i < n; i++) {
111+
uint64_t value;
112+
memcpy(&value, types_and_classes[i].name, 8);
113+
uint8_t key = hash(magic, value);
114+
keys[key].code = types_and_classes[i].code;
115+
keys[key].type = types_and_classes[i].type;
116+
}
117+
118+
printf("static const symbol_t *hash_to_symbol[256] = {\n");
119+
for (size_t i=0; i < 256; ) {
120+
for (size_t j=i+8; i < j; i++) {
121+
uint16_t code = keys[i].code;
122+
char macro = !code || keys[i].type ? 'T' : 'C';
123+
printf("%c(%u), ", macro, code);
124+
}
125+
printf("\n");
126+
}
127+
printf("};\n");
128+
}
129+
130+
int main(int argc, char *argv[])
131+
{
132+
const size_t n = sizeof(types_and_classes)/sizeof(types_and_classes[0]);
133+
for (uint64_t magic = original_magic; magic < UINT64_MAX; magic++) {
134+
size_t i;
135+
uint16_t keys[256] = { 0 };
136+
for (i=0; i < n; i++) {
137+
uint64_t value;
138+
memcpy(&value, types_and_classes[i].name, 8);
139+
140+
uint8_t key = hash(magic, value);
141+
if (keys[key])
142+
break;
143+
keys[key] = 1;
144+
}
145+
146+
if (i == n) {
147+
printf("i: %zu, magic: %" PRIu64 "\n", i, magic);
148+
for (i=0; i < n; i++) {
149+
uint64_t value;
150+
memcpy(&value, types_and_classes[i].name, 8);
151+
uint8_t key = hash(magic, value);
152+
printf("TYPE_%s: %" PRIu8 " (%" PRIu16 ")\n", types_and_classes[i].name, key, types_and_classes[i].code);
153+
}
154+
print_table(magic);
155+
return 0;
156+
}
157+
}
158+
159+
printf("no magic value\n");
160+
return 1;
161+
}

scripts/keys.py

Lines changed: 0 additions & 57 deletions
This file was deleted.

src/fallback/ip4.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ static zone_really_inline int32_t parse_ip4(
3434
n = n * 10 + (uint8_t)d;
3535
} else {
3636
if (!(p - ps) || p - ps > 3 || n < m[(p - ps)] || n > 255 || o - os > 3)
37-
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type));
37+
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
3838
ps = p + 1;
3939
*o++ = (uint8_t)n;
4040
if (*p != '.')
@@ -44,7 +44,7 @@ static zone_really_inline int32_t parse_ip4(
4444
}
4545

4646
if (is_contiguous((uint8_t)*p) || o - os != 4)
47-
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type));
47+
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
4848

4949
parser->rdata->length += 4;
5050
return ZONE_IP4;

src/fallback/name.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,20 +39,20 @@ static zone_really_inline int32_t scan_name(
3939
d[1] = (uint8_t)s[2] - '0';
4040
d[2] = (uint8_t)s[3] - '0';
4141
if (d[1] > m || d[2] > m)
42-
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type));
42+
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
4343
b[0] = d[0] * 100 + d[1] * 10 + d[0];
4444
b += 1; s += 4;
4545
}
4646
} else if (c == '.') {
4747
if ((b - 1) - l > 63 || (b - 1) - l == 0)
48-
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type));
48+
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
4949
l[0] = (uint8_t)((b - 1) - l);
5050
l = b;
5151
l[0] = 0;
5252
b += 1; s += 1;
5353
} else if (delimiters[c] != token->code) {
5454
if ((b - 1) - l > 63)
55-
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type));
55+
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
5656
l[0] = (uint8_t)((b - 1) - l);
5757
break;
5858
} else {
@@ -62,7 +62,7 @@ static zone_really_inline int32_t scan_name(
6262
}
6363

6464
if (delimiters[(uint8_t)*s] == token->code)
65-
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type));
65+
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
6666

6767
*length = (size_t)(b - octets);
6868
return l[0] == 0 ? 0 : ZONE_NAME;
@@ -124,7 +124,7 @@ static zone_really_inline int32_t parse_name(
124124

125125
relative:
126126
if (n > 255 - parser->file->origin.length)
127-
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type));
127+
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
128128
memcpy(o+n, parser->file->origin.octets, parser->file->origin.length);
129129
parser->rdata->length += n + parser->file->origin.length;
130130
return ZONE_NAME;

0 commit comments

Comments
 (0)