Skip to content

Add length and alternate name parser #96

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@ similar performance boost for parsing zone data.

## Results
Running `zone-bench` on my system (Intel Core i7-1065G7) against an older
`.com` zone file of 12482791271 bytes under Linux (Fedora 37).
`.com` zone file of 12482791271 bytes under Linux (Fedora 38).

clang version 15.0.7, release mode:
clang version 16.0.6, release mode:
```
$ time ./zone-bench parse ../../zones/com.zone
Selected target haswell
Parsed 341535548 records

real 0m16.344s
user 0m15.125s
sys 0m1.165s
real 0m14.812s
user 0m13.704s
sys 0m1.088s
```

There are bound to be bugs and quite possibly smarter ways of implementing
Expand Down
2 changes: 1 addition & 1 deletion conanfile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ cmocka/1.1.5
CMakeDeps

[options]
cmocka/*:shared=True
cmocka*:shared=False
59 changes: 29 additions & 30 deletions include/zone.h
Original file line number Diff line number Diff line change
Expand Up @@ -298,25 +298,21 @@ typedef enum {
#define ZONE_CAA_TAG (1u << 12)
/** @} */

// FIXME: drop rdata_info, just use field_info
typedef struct zone_rdata_info zone_rdata_info_t;
struct zone_rdata_info {
typedef struct zone_field_info zone_field_info_t;
struct zone_field_info {
zone_string_t name;
uint32_t type;
uint32_t qualifiers;
zone_table_t symbols;
};

typedef struct zone_rdata_info zone_field_info_t;

/**
* @defgroup options Type options
* @brief Options for record types
*
* @{
*/
// type options
// ZONE_IN goes here too!
// ZONE_IN (1) can be used too
#define ZONE_ANY (1<<2)
#define ZONE_EXPERIMENTAL (1<<3)
#define ZONE_OBSOLETE (1<<4)
Expand All @@ -328,7 +324,7 @@ struct zone_type_info {
uint32_t options;
struct {
size_t length;
const zone_rdata_info_t *fields;
const zone_field_info_t *fields;
} rdata;
};

Expand All @@ -340,20 +336,23 @@ struct zone_type_info {
// worst-case read (e.g. 64 consecutive line feeds). in practice a single
// block will never contain 64 tokens, therefore, to optimize throughput,
// allocate twice the size so consecutive index operations can be done
#define ZONE_TAPE_SIZE (100 * (ZONE_BLOCK_SIZE + ZONE_BLOCK_SIZE))
#define ZONE_TAPE_SIZE ((100 * ZONE_BLOCK_SIZE) + ZONE_BLOCK_SIZE)

#define ZONE_RDATA_SIZE (65535)

#define ZONE_RDATA_LIMIT (65535)
#define ZONE_NAME_SIZE (255)
#define ZONE_PADDING_SIZE (ZONE_BLOCK_SIZE)

typedef struct zone_name_block zone_name_block_t;
struct zone_name_block {
typedef struct zone_name_buffer zone_name_buffer_t;
struct zone_name_buffer {
size_t length; /**< Length of domain name stored in block */
uint8_t octets[ 255 + ZONE_BLOCK_SIZE ];
uint8_t octets[ ZONE_NAME_SIZE + ZONE_PADDING_SIZE ];
};

typedef struct zone_rdata_block zone_rdata_block_t;
struct zone_rdata_block {
typedef struct zone_rdata_buffer zone_rdata_buffer_t;
struct zone_rdata_buffer {
size_t length; /**< Length of RDATA stored in block */
uint8_t octets[ 65535 + 4096 /* nsec padding */ ];
uint8_t octets[ ZONE_RDATA_SIZE + 4096 /* nsec padding */ ];
};

// @private
Expand All @@ -371,7 +370,7 @@ struct zone_rdata_block {
typedef struct zone_file zone_file_t;
struct zone_file {
zone_file_t *includer;
zone_name_block_t origin, owner;
zone_name_buffer_t origin, owner;
uint16_t last_type;
uint32_t last_ttl, default_ttl;
uint16_t last_class;
Expand All @@ -397,9 +396,10 @@ struct zone_file {
uint64_t is_escaped;
uint64_t follows_contiguous;
} state;
// vector of tokens generated by the indexer. guaranteed to be large
// vector(s) of tokens generated by the indexer. guaranteed to be large
// enough to hold every token for a single read + terminators
struct { const char **head, **tail, *tape[ZONE_TAPE_SIZE + 2]; } fields;
struct { const char **head, **tail, *tape[ZONE_TAPE_SIZE + 1]; } delimiters;
struct { uint16_t *head, *tail, tape[ZONE_TAPE_SIZE + 1]; } lines;
};

Expand Down Expand Up @@ -534,29 +534,28 @@ typedef struct {
* rdata buffer to use next. Rotation of name buffers is controlled by the
* parser.
*/
typedef struct zone_cache zone_cache_t;
struct zone_cache {
typedef struct zone_buffers zone_buffers_t;
struct zone_buffers {
size_t size; /**< Number of name and rdata storage blocks available */
zone_name_block_t *owner;
zone_rdata_block_t *rdata;
zone_name_buffer_t *owner;
zone_rdata_buffer_t *rdata;
};

struct zone_parser {
zone_options_t options;
void *user_data;
volatile void *environment; // FIXME: not sure about this yet
struct {
size_t size;
struct {
size_t serial;
zone_name_block_t *blocks;
zone_name_buffer_t *blocks;
} owner;
struct {
zone_rdata_block_t *blocks;
zone_rdata_buffer_t *blocks;
} rdata;
} cache;
zone_name_block_t *owner;
zone_rdata_block_t *rdata;
} buffers;
zone_name_buffer_t *owner;
zone_rdata_buffer_t *rdata;
zone_file_t *file, first;
};

Expand Down Expand Up @@ -592,7 +591,7 @@ ZONE_EXPORT int32_t
zone_parse(
zone_parser_t *parser,
const zone_options_t *options,
zone_cache_t *cache,
zone_buffers_t *buffers,
const char *path,
void *user_data)
zone_nonnull((1,2,3,4));
Expand All @@ -604,7 +603,7 @@ ZONE_EXPORT int32_t
zone_parse_string(
zone_parser_t *parser,
const zone_options_t *options,
zone_cache_t *cache,
zone_buffers_t *buffers,
const char *string,
size_t length,
void *user_data)
Expand Down
11 changes: 6 additions & 5 deletions src/bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ static const target_t targets[] = {
extern int32_t zone_open(
zone_parser_t *,
const zone_options_t *,
zone_cache_t *,
zone_buffers_t *,
const char *,
void *user_data);

Expand Down Expand Up @@ -129,6 +129,7 @@ static const target_t *select_target(const char *name)
if (targets[i].instruction_set & supported)
target = &targets[i];
}
assert(target != NULL);
} else {
for (size_t i=0; !target && i < n; i++) {
if (strcasecmp(name, targets[i].name) == 0)
Expand Down Expand Up @@ -208,16 +209,16 @@ int main(int argc, char *argv[])

zone_parser_t parser = { 0 };
zone_options_t options = { 0 };
zone_name_block_t owner;
zone_rdata_block_t rdata;
zone_cache_t cache = { 1, &owner, &rdata };
zone_name_buffer_t owner;
zone_rdata_buffer_t rdata;
zone_buffers_t buffers = { 1, &owner, &rdata };

options.accept.add = &bench_accept;
options.origin = ".";
options.default_ttl = 3600;
options.default_class = ZONE_IN;

if (zone_open(&parser, &options, &cache, argv[argc-1], NULL) < 0)
if (zone_open(&parser, &options, &buffers, argv[argc-1], NULL) < 0)
exit(EXIT_FAILURE);
if (bench(&parser, target) < 0)
exit(EXIT_FAILURE);
Expand Down
2 changes: 1 addition & 1 deletion src/fallback/base16.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ static zone_really_inline int32_t parse_base16(

uint8_t x0 = 0x80, x1 = 0x80;
uint8_t *w = &parser->rdata->octets[parser->rdata->length];
const uint8_t *ws = w, *we = &parser->rdata->octets[ZONE_RDATA_LIMIT];
const uint8_t *ws = w, *we = &parser->rdata->octets[ZONE_RDATA_SIZE];
const char *p;

do {
Expand Down
126 changes: 43 additions & 83 deletions src/fallback/name.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,91 +12,49 @@
zone_nonnull_all
static zone_really_inline int32_t scan_name(
zone_parser_t *parser,
const zone_type_info_t *type,
const zone_field_info_t *field,
const uint8_t delimiters[256],
const token_t *token,
uint8_t octets[255 + ZONE_BLOCK_SIZE],
size_t *length)
size_t *lengthp)
{
uint8_t *l = octets, *b = octets + 1;
const uint8_t *bs = octets + 255;
const char *s = token->data;
uint8_t *l = octets, *w = octets + 1;
const uint8_t *we = octets + 255;
const char *t = token->data, *te = t + token->length;

l[0] = 0;
(void)parser;

if (s[0] == '.') {
if (delimiters[(uint8_t)s[1]] == token->code)
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
*length = 1;
return 0;
}
l[0] = 0;

while (b < bs) {
const uint8_t c = (uint8_t)s[0];
if (c == '\\') {
uint8_t d[3];
d[0] = (uint8_t)s[1] - '0';
if (*t == '.')
return (*lengthp = token->length) == 1 ? 0 : -1;

if (d[0] > 2) {
b[0] = (uint8_t)s[1];
b += 1; s += 2;
} else {
uint8_t m = d[0] < 2 ? 9 : 5;
d[1] = (uint8_t)s[2] - '0';
d[2] = (uint8_t)s[3] - '0';
if (d[1] > m || d[2] > m)
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
b[0] = d[0] * 100 + d[1] * 10 + d[0];
b += 1; s += 4;
}
} else if (c == '.') {
if ((b - 1) - l > 63 || (b - 1) - l == 0)
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
l[0] = (uint8_t)((b - 1) - l);
l = b;
while ((t < te) & (w < we)) {
*w = (uint8_t)*t;
if (*t == '\\') {
uint32_t n;
if (!(n = unescape(t, w)))
return -1;
w += 1; t += n;
} else if (*t == '.') {
if ((w - 1) - l > 63 || (w - 1) - l == 0)
return -1;
l[0] = (uint8_t)((w - 1) - l);
l = w;
l[0] = 0;
b += 1; s += 1;
} else if (delimiters[c] != token->code) {
if ((b - 1) - l > 63)
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
l[0] = (uint8_t)((b - 1) - l);
break;
w += 1; t += 1;
} else {
b[0] = c;
b += 1; s += 1;
w += 1; t += 1;
}
}

if (delimiters[(uint8_t)*s] == token->code)
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
if ((w - 1) - l > 63)
return -1;
*l = (uint8_t)((w - 1) - l);

*length = (size_t)(b - octets);
return l[0] == 0 ? 0 : ZONE_NAME;
}
if (t != te || w >= we)
return -1;

zone_nonnull_all
static zone_really_inline int32_t scan_contiguous_name(
zone_parser_t *parser,
const zone_type_info_t *type,
const zone_field_info_t *field,
const token_t *token,
uint8_t octets[255 + ZONE_BLOCK_SIZE],
size_t *length)
{
return scan_name(parser, type, field, contiguous, token, octets, length);
}

zone_nonnull_all
static zone_really_inline int32_t scan_quoted_name(
zone_parser_t *parser,
const zone_type_info_t *type,
const zone_field_info_t *field,
const token_t *token,
uint8_t octets[255 + ZONE_BLOCK_SIZE],
size_t *length)
{
return scan_name(parser, type, field, quoted, token, octets, length);
*lengthp = (size_t)(w - octets);
return *l != 0;
}

zone_nonnull_all
Expand All @@ -112,32 +70,34 @@ static zone_really_inline int32_t parse_name(

if (zone_likely(token->code == CONTIGUOUS)) {
// a freestanding "@" denotes the current origin
if (token->data[0] == '@' && !is_contiguous((uint8_t)token->data[1]))
if (token->data[0] == '@' && token->length > 1)
goto relative;
r = scan_contiguous_name(parser, type, field, token, o, &n);
r = scan_name(parser, token, o, &n);
if (r == 0)
goto absolute;
if (r < 0)
return r;
return (void)(parser->rdata->length += n), ZONE_NAME;
if (r > 0)
goto relative;
} else if (token->code == QUOTED) {
r = scan_quoted_name(parser, type, field, token, o, &n);
if (token->length == 0)
goto invalid;
r = scan_name(parser, token, o, &n);
if (r == 0)
goto absolute;
if (r < 0)
return r;
return (void)(parser->rdata->length += n), ZONE_NAME;
if (r > 0)
goto relative;
} else {
return have_string(parser, type, field, token);
}

invalid:
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));

relative:
if (n > 255 - parser->file->origin.length)
SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
memcpy(o+n, parser->file->origin.octets, parser->file->origin.length);
parser->rdata->length += n + parser->file->origin.length;
return ZONE_NAME;
absolute:
parser->rdata->length += n;
return ZONE_NAME;
}

#endif // NAME_H
Loading