diff --git a/README.md b/README.md index 2aa9ff3..628aeca 100644 --- a/README.md +++ b/README.md @@ -23,17 +23,17 @@ similar performance boost for parsing zone data. ## Results Running `zone-bench` on my system (Intel Core i7-1065G7) against an older -`.com` zone file of 12482791271 bytes under Linux (Fedora 37). +`.com` zone file of 12482791271 bytes under Linux (Fedora 38). -clang version 15.0.7, release mode: +clang version 16.0.6, release mode: ``` $ time ./zone-bench parse ../../zones/com.zone Selected target haswell Parsed 341535548 records -real 0m16.344s -user 0m15.125s -sys 0m1.165s +real 0m14.812s +user 0m13.704s +sys 0m1.088s ``` There are bound to be bugs and quite possibly smarter ways of implementing diff --git a/conanfile.txt b/conanfile.txt index b964051..e6ae77d 100644 --- a/conanfile.txt +++ b/conanfile.txt @@ -5,4 +5,4 @@ cmocka/1.1.5 CMakeDeps [options] -cmocka/*:shared=True +cmocka*:shared=False diff --git a/include/zone.h b/include/zone.h index c258b95..ee74fd8 100644 --- a/include/zone.h +++ b/include/zone.h @@ -298,25 +298,21 @@ typedef enum { #define ZONE_CAA_TAG (1u << 12) /** @} */ -// FIXME: drop rdata_info, just use field_info -typedef struct zone_rdata_info zone_rdata_info_t; -struct zone_rdata_info { +typedef struct zone_field_info zone_field_info_t; +struct zone_field_info { zone_string_t name; uint32_t type; uint32_t qualifiers; zone_table_t symbols; }; -typedef struct zone_rdata_info zone_field_info_t; - /** * @defgroup options Type options * @brief Options for record types * * @{ */ -// type options -// ZONE_IN goes here too! +// ZONE_IN (1) can be used too #define ZONE_ANY (1<<2) #define ZONE_EXPERIMENTAL (1<<3) #define ZONE_OBSOLETE (1<<4) @@ -328,7 +324,7 @@ struct zone_type_info { uint32_t options; struct { size_t length; - const zone_rdata_info_t *fields; + const zone_field_info_t *fields; } rdata; }; @@ -340,20 +336,23 @@ struct zone_type_info { // worst-case read (e.g. 64 consecutive line feeds). in practice a single // block will never contain 64 tokens, therefore, to optimize throughput, // allocate twice the size so consecutive index operations can be done -#define ZONE_TAPE_SIZE (100 * (ZONE_BLOCK_SIZE + ZONE_BLOCK_SIZE)) +#define ZONE_TAPE_SIZE ((100 * ZONE_BLOCK_SIZE) + ZONE_BLOCK_SIZE) + +#define ZONE_RDATA_SIZE (65535) -#define ZONE_RDATA_LIMIT (65535) +#define ZONE_NAME_SIZE (255) +#define ZONE_PADDING_SIZE (ZONE_BLOCK_SIZE) -typedef struct zone_name_block zone_name_block_t; -struct zone_name_block { +typedef struct zone_name_buffer zone_name_buffer_t; +struct zone_name_buffer { size_t length; /**< Length of domain name stored in block */ - uint8_t octets[ 255 + ZONE_BLOCK_SIZE ]; + uint8_t octets[ ZONE_NAME_SIZE + ZONE_PADDING_SIZE ]; }; -typedef struct zone_rdata_block zone_rdata_block_t; -struct zone_rdata_block { +typedef struct zone_rdata_buffer zone_rdata_buffer_t; +struct zone_rdata_buffer { size_t length; /**< Length of RDATA stored in block */ - uint8_t octets[ 65535 + 4096 /* nsec padding */ ]; + uint8_t octets[ ZONE_RDATA_SIZE + 4096 /* nsec padding */ ]; }; // @private @@ -371,7 +370,7 @@ struct zone_rdata_block { typedef struct zone_file zone_file_t; struct zone_file { zone_file_t *includer; - zone_name_block_t origin, owner; + zone_name_buffer_t origin, owner; uint16_t last_type; uint32_t last_ttl, default_ttl; uint16_t last_class; @@ -397,9 +396,10 @@ struct zone_file { uint64_t is_escaped; uint64_t follows_contiguous; } state; - // vector of tokens generated by the indexer. guaranteed to be large + // vector(s) of tokens generated by the indexer. guaranteed to be large // enough to hold every token for a single read + terminators struct { const char **head, **tail, *tape[ZONE_TAPE_SIZE + 2]; } fields; + struct { const char **head, **tail, *tape[ZONE_TAPE_SIZE + 1]; } delimiters; struct { uint16_t *head, *tail, tape[ZONE_TAPE_SIZE + 1]; } lines; }; @@ -534,29 +534,28 @@ typedef struct { * rdata buffer to use next. Rotation of name buffers is controlled by the * parser. */ -typedef struct zone_cache zone_cache_t; -struct zone_cache { +typedef struct zone_buffers zone_buffers_t; +struct zone_buffers { size_t size; /**< Number of name and rdata storage blocks available */ - zone_name_block_t *owner; - zone_rdata_block_t *rdata; + zone_name_buffer_t *owner; + zone_rdata_buffer_t *rdata; }; struct zone_parser { zone_options_t options; void *user_data; - volatile void *environment; // FIXME: not sure about this yet struct { size_t size; struct { size_t serial; - zone_name_block_t *blocks; + zone_name_buffer_t *blocks; } owner; struct { - zone_rdata_block_t *blocks; + zone_rdata_buffer_t *blocks; } rdata; - } cache; - zone_name_block_t *owner; - zone_rdata_block_t *rdata; + } buffers; + zone_name_buffer_t *owner; + zone_rdata_buffer_t *rdata; zone_file_t *file, first; }; @@ -592,7 +591,7 @@ ZONE_EXPORT int32_t zone_parse( zone_parser_t *parser, const zone_options_t *options, - zone_cache_t *cache, + zone_buffers_t *buffers, const char *path, void *user_data) zone_nonnull((1,2,3,4)); @@ -604,7 +603,7 @@ ZONE_EXPORT int32_t zone_parse_string( zone_parser_t *parser, const zone_options_t *options, - zone_cache_t *cache, + zone_buffers_t *buffers, const char *string, size_t length, void *user_data) diff --git a/src/bench.c b/src/bench.c index fe518fa..ea4e9e6 100644 --- a/src/bench.c +++ b/src/bench.c @@ -61,7 +61,7 @@ static const target_t targets[] = { extern int32_t zone_open( zone_parser_t *, const zone_options_t *, - zone_cache_t *, + zone_buffers_t *, const char *, void *user_data); @@ -129,6 +129,7 @@ static const target_t *select_target(const char *name) if (targets[i].instruction_set & supported) target = &targets[i]; } + assert(target != NULL); } else { for (size_t i=0; !target && i < n; i++) { if (strcasecmp(name, targets[i].name) == 0) @@ -208,16 +209,16 @@ int main(int argc, char *argv[]) zone_parser_t parser = { 0 }; zone_options_t options = { 0 }; - zone_name_block_t owner; - zone_rdata_block_t rdata; - zone_cache_t cache = { 1, &owner, &rdata }; + zone_name_buffer_t owner; + zone_rdata_buffer_t rdata; + zone_buffers_t buffers = { 1, &owner, &rdata }; options.accept.add = &bench_accept; options.origin = "."; options.default_ttl = 3600; options.default_class = ZONE_IN; - if (zone_open(&parser, &options, &cache, argv[argc-1], NULL) < 0) + if (zone_open(&parser, &options, &buffers, argv[argc-1], NULL) < 0) exit(EXIT_FAILURE); if (bench(&parser, target) < 0) exit(EXIT_FAILURE); diff --git a/src/fallback/base16.h b/src/fallback/base16.h index 4381904..bc7e15c 100644 --- a/src/fallback/base16.h +++ b/src/fallback/base16.h @@ -77,7 +77,7 @@ static zone_really_inline int32_t parse_base16( uint8_t x0 = 0x80, x1 = 0x80; uint8_t *w = &parser->rdata->octets[parser->rdata->length]; - const uint8_t *ws = w, *we = &parser->rdata->octets[ZONE_RDATA_LIMIT]; + const uint8_t *ws = w, *we = &parser->rdata->octets[ZONE_RDATA_SIZE]; const char *p; do { diff --git a/src/fallback/name.h b/src/fallback/name.h index 66632f7..3d5c383 100644 --- a/src/fallback/name.h +++ b/src/fallback/name.h @@ -12,91 +12,49 @@ zone_nonnull_all static zone_really_inline int32_t scan_name( zone_parser_t *parser, - const zone_type_info_t *type, - const zone_field_info_t *field, - const uint8_t delimiters[256], const token_t *token, uint8_t octets[255 + ZONE_BLOCK_SIZE], - size_t *length) + size_t *lengthp) { - uint8_t *l = octets, *b = octets + 1; - const uint8_t *bs = octets + 255; - const char *s = token->data; + uint8_t *l = octets, *w = octets + 1; + const uint8_t *we = octets + 255; + const char *t = token->data, *te = t + token->length; - l[0] = 0; + (void)parser; - if (s[0] == '.') { - if (delimiters[(uint8_t)s[1]] == token->code) - SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); - *length = 1; - return 0; - } + l[0] = 0; - while (b < bs) { - const uint8_t c = (uint8_t)s[0]; - if (c == '\\') { - uint8_t d[3]; - d[0] = (uint8_t)s[1] - '0'; + if (*t == '.') + return (*lengthp = token->length) == 1 ? 0 : -1; - if (d[0] > 2) { - b[0] = (uint8_t)s[1]; - b += 1; s += 2; - } else { - uint8_t m = d[0] < 2 ? 9 : 5; - d[1] = (uint8_t)s[2] - '0'; - d[2] = (uint8_t)s[3] - '0'; - if (d[1] > m || d[2] > m) - SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); - b[0] = d[0] * 100 + d[1] * 10 + d[0]; - b += 1; s += 4; - } - } else if (c == '.') { - if ((b - 1) - l > 63 || (b - 1) - l == 0) - SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); - l[0] = (uint8_t)((b - 1) - l); - l = b; + while ((t < te) & (w < we)) { + *w = (uint8_t)*t; + if (*t == '\\') { + uint32_t n; + if (!(n = unescape(t, w))) + return -1; + w += 1; t += n; + } else if (*t == '.') { + if ((w - 1) - l > 63 || (w - 1) - l == 0) + return -1; + l[0] = (uint8_t)((w - 1) - l); + l = w; l[0] = 0; - b += 1; s += 1; - } else if (delimiters[c] != token->code) { - if ((b - 1) - l > 63) - SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); - l[0] = (uint8_t)((b - 1) - l); - break; + w += 1; t += 1; } else { - b[0] = c; - b += 1; s += 1; + w += 1; t += 1; } } - if (delimiters[(uint8_t)*s] == token->code) - SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); + if ((w - 1) - l > 63) + return -1; + *l = (uint8_t)((w - 1) - l); - *length = (size_t)(b - octets); - return l[0] == 0 ? 0 : ZONE_NAME; -} + if (t != te || w >= we) + return -1; -zone_nonnull_all -static zone_really_inline int32_t scan_contiguous_name( - zone_parser_t *parser, - const zone_type_info_t *type, - const zone_field_info_t *field, - const token_t *token, - uint8_t octets[255 + ZONE_BLOCK_SIZE], - size_t *length) -{ - return scan_name(parser, type, field, contiguous, token, octets, length); -} - -zone_nonnull_all -static zone_really_inline int32_t scan_quoted_name( - zone_parser_t *parser, - const zone_type_info_t *type, - const zone_field_info_t *field, - const token_t *token, - uint8_t octets[255 + ZONE_BLOCK_SIZE], - size_t *length) -{ - return scan_name(parser, type, field, quoted, token, octets, length); + *lengthp = (size_t)(w - octets); + return *l != 0; } zone_nonnull_all @@ -112,32 +70,34 @@ static zone_really_inline int32_t parse_name( if (zone_likely(token->code == CONTIGUOUS)) { // a freestanding "@" denotes the current origin - if (token->data[0] == '@' && !is_contiguous((uint8_t)token->data[1])) + if (token->data[0] == '@' && token->length > 1) goto relative; - r = scan_contiguous_name(parser, type, field, token, o, &n); + r = scan_name(parser, token, o, &n); if (r == 0) - goto absolute; - if (r < 0) - return r; + return (void)(parser->rdata->length += n), ZONE_NAME; + if (r > 0) + goto relative; } else if (token->code == QUOTED) { - r = scan_quoted_name(parser, type, field, token, o, &n); + if (token->length == 0) + goto invalid; + r = scan_name(parser, token, o, &n); if (r == 0) - goto absolute; - if (r < 0) - return r; + return (void)(parser->rdata->length += n), ZONE_NAME; + if (r > 0) + goto relative; } else { return have_string(parser, type, field, token); } +invalid: + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); + relative: if (n > 255 - parser->file->origin.length) SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); memcpy(o+n, parser->file->origin.octets, parser->file->origin.length); parser->rdata->length += n + parser->file->origin.length; return ZONE_NAME; -absolute: - parser->rdata->length += n; - return ZONE_NAME; } #endif // NAME_H diff --git a/src/fallback/nsap.h b/src/fallback/nsap.h index 22f2f4f..f2f21d6 100644 --- a/src/fallback/nsap.h +++ b/src/fallback/nsap.h @@ -40,7 +40,7 @@ static zone_really_inline int32_t parse_nsap( uint8_t x0 = 0x80, x1 = 0x80; uint8_t *w = &parser->rdata->octets[parser->rdata->length]; - const uint8_t *ws = w, *we = &parser->rdata->octets[ZONE_RDATA_LIMIT]; + const uint8_t *ws = w, *we = &parser->rdata->octets[ZONE_RDATA_SIZE]; while (w < we) { x0 = b16rmap[(uint8_t)p[0]]; diff --git a/src/fallback/parser.c b/src/fallback/parser.c index b306e5d..1b68345 100644 --- a/src/fallback/parser.c +++ b/src/fallback/parser.c @@ -15,10 +15,10 @@ #include "generic/number.h" #include "generic/ttl.h" #include "fallback/time.h" +#include "fallback/text.h" #include "fallback/name.h" #include "fallback/ip4.h" #include "generic/ip6.h" -#include "fallback/text.h" #include "fallback/base16.h" #include "fallback/base32.h" #include "generic/base64.h" diff --git a/src/fallback/scanner.h b/src/fallback/scanner.h index d38b997..9e3ddbd 100644 --- a/src/fallback/scanner.h +++ b/src/fallback/scanner.h @@ -36,6 +36,7 @@ static zone_really_inline const char *scan_quoted( parser->file->lines.tail[0] += *(start + 1) == '\n'; start += 2; } else if (*start == '\"') { + *parser->file->delimiters.tail++ = start; return start + 1; } else if (*start == '\n') { parser->file->lines.tail[0]++; @@ -64,6 +65,7 @@ static zone_really_inline const char *scan_contiguous( start += 2; } } else { + *parser->file->delimiters.tail++ = start; return start; } } @@ -145,6 +147,9 @@ static zone_never_inline void step(zone_parser_t *parser, token_t *token) parser->file->fields.tail = parser->file->fields.tape; if (parser->file->fields.tape[0]) parser->file->fields.tail++; + // delimiters are never deferred + parser->file->delimiters.head = parser->file->delimiters.tape; + parser->file->delimiters.tail = parser->file->delimiters.tape; shuffle: // refill if required @@ -204,6 +209,7 @@ static zone_never_inline void step(zone_parser_t *parser, token_t *token) } parser->file->fields.tail[0] = data_limit; + parser->file->delimiters.tail[0] = data_limit; if (parser->file->fields.head[0] == parser->file->buffer.data) parser->file->start_of_line = start_of_line; else @@ -216,6 +222,9 @@ static zone_never_inline void step(zone_parser_t *parser, token_t *token) // end-of-file is idempotent parser->file->fields.head += (*data != '\0'); if (zone_likely(token->code == CONTIGUOUS)) { + const char *delimiter = *parser->file->delimiters.head++; + assert(delimiter > token->data); + token->length = (size_t)(delimiter - token->data); return; } else if (token->code == LINE_FEED) { if (zone_unlikely(token->data == line_feed)) @@ -228,7 +237,10 @@ static zone_never_inline void step(zone_parser_t *parser, token_t *token) parser->file->start_of_line = !is_blank((uint8_t)*(token->data+1)); return; } else if (token->code == QUOTED) { + const char *delimiter = *parser->file->delimiters.head++; token->data++; + assert(delimiter > token->data); + token->length = (size_t)(delimiter - token->data); return; } else if (token->code == END_OF_FILE) { zone_file_t *file; diff --git a/src/fallback/text.h b/src/fallback/text.h index 582bf31..bb653ba 100644 --- a/src/fallback/text.h +++ b/src/fallback/text.h @@ -1,5 +1,5 @@ /* - * text.h -- some useful comment + * text.h -- fallback parser for strings * * Copyright (c) 2022-2023, NLnet Labs. All rights reserved. * @@ -10,63 +10,52 @@ #define TEXT_H zone_nonnull_all -static zone_really_inline int32_t parse_string_internal( +static zone_really_inline uint32_t unescape(const char *text, uint8_t *wire) +{ + uint8_t d[3]; + + if ((d[0] = (uint8_t)text[1] - '0') > 9) { + *wire = (uint8_t)text[1]; + return 2u; + } else { + d[1] = (uint8_t)text[2] - '0'; + d[2] = (uint8_t)text[3] - '0'; + uint32_t o = d[0] * 100 + d[1] * 10 + d[2]; + *wire = (uint8_t)o; + return (o > 255 || d[1] > 9 || d[2] > 9) ? 0 : 4u; + } +} + +zone_nonnull_all +static zone_really_inline int32_t parse_string( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, const token_t *token) { - const uint8_t *d = token->code == CONTIGUOUS ? contiguous : quoted; uint8_t *w = &parser->rdata->octets[parser->rdata->length + 1]; const uint8_t *ws = w - 1, *we = w + 255; - const char *t = token->data; - - while (w < we) { - const uint8_t c = (uint8_t)*t; - if (c == '\\') { - uint8_t x[3]; - x[0] = (uint8_t)t[1] - '0'; + const char *t = token->data, *te = t + token->length; - if (x[0] > 2) { - w[0] = (uint8_t)t[1]; - w += 1; t += 2; - } else { - x[1] = (uint8_t)t[2] - '0'; - x[2] = (uint8_t)t[3] - '0'; - const uint32_t o = x[0] * 100 + x[1] * 10 + x[2]; - if (o > 255 || x[1] > 9 || x[2] > 9) - SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); - w[0] = (uint8_t)o; - w += 1; t += 4; - } - } else if (d[c] == token->code) { - w[0] = c; - w += 1; t += 1; + while ((t < te) & (w < we)) { + *w = (uint8_t)*t; + if (zone_unlikely(*t == '\\')) { + uint32_t o; + if (!(o = unescape(t, w))) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); + w += 1; t += o; } else { - break; + w += 1; t += 1; } } - if (w == we) + if (t != te || w >= we) SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); - assert(d[(uint8_t)*t] != token->code); parser->rdata->octets[parser->rdata->length] = (uint8_t)((w - ws) - 1); parser->rdata->length += (size_t)(w - ws); return ZONE_STRING; } -zone_nonnull_all -static zone_really_inline int32_t parse_string( - zone_parser_t *parser, - const zone_type_info_t *type, - const zone_field_info_t *field, - const token_t *token) -{ - if (zone_likely(token->code & (CONTIGUOUS|QUOTED))) - return parse_string_internal(parser, type, field, token); - return have_string(parser, type, field, token); -} - zone_nonnull_all static zone_really_inline int32_t parse_text_internal( zone_parser_t *parser, @@ -74,39 +63,24 @@ static zone_really_inline int32_t parse_text_internal( const zone_field_info_t *field, const token_t *token) { - const uint8_t *d = token->code == CONTIGUOUS ? contiguous : quoted; uint8_t *w = &parser->rdata->octets[parser->rdata->length]; - const uint8_t *ws = w, *we = &parser->rdata->octets[ZONE_RDATA_LIMIT]; - const char *t = token->data; + const uint8_t *ws = w, *we = &parser->rdata->octets[ZONE_RDATA_SIZE]; + const char *t = token->data, *te = t + token->length; - while (w < we) { - const uint8_t c = (uint8_t)*t; - if (c == '\\') { - uint8_t x[3]; - x[0] = (uint8_t)t[1] - '0'; - if (x[0] > 9) { - w[0] = (uint8_t)t[1]; - w += 1; t += 2; - } else { - x[1] = (uint8_t)t[2] - '0'; - x[2] = (uint8_t)t[3] - '0'; - const uint32_t o = x[0] * 100 + x[1] * 10 + x[0]; - if (o > 255 || x[1] > 9 || x[2] > 9) - SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); - w[0] = (uint8_t)o; - w += 1; t += 4; - } - } else if (d[c] == token->code) { - w[0] = c; - w += 1; t += 1; + while ((t < te) & (w < we)) { + *w = (uint8_t)*t; + if (zone_unlikely(*t == '\\')) { + uint32_t o; + if (!(o = unescape(t, w))) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); + w += 1; t += o; } else { - break; + w += 1; t += 1; } } - if (w == we) + if (t != te || w >= we) SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); - assert(d[(uint8_t)*t] != token->code); parser->rdata->length += (size_t)(w - ws); return ZONE_BLOB; } @@ -118,7 +92,7 @@ static zone_really_inline int32_t parse_quoted_text( const zone_field_info_t *field, const token_t *token) { - if (zone_likely(token->code == QUOTED)) + if (zone_likely(token->code & QUOTED)) return parse_text_internal(parser, type, field, token); return have_quoted(parser, type, field, token); } diff --git a/src/generic/name.h b/src/generic/name.h index ac27460..940d773 100644 --- a/src/generic/name.h +++ b/src/generic/name.h @@ -11,155 +11,153 @@ typedef struct name_block name_block_t; struct name_block { - delimited_t delimited; - uint64_t backslash; - uint64_t label; + uint64_t backslashes; + uint64_t dots; }; zone_nonnull_all static zone_really_inline void copy_name_block( - name_block_t *block, - const simd_table_t delimiter, - const simd_table_t space, - const char *source, - uint8_t *destination) + name_block_t *block, const char *text, uint8_t *wire) { - copy_and_scan_delimited( - &block->delimited, delimiter, space, source, destination); - block->backslash = simd_find_8x(&block->delimited.input, '\\'); - block->label = simd_find_8x(&block->delimited.input, '.'); + simd_8x32_t input; + simd_loadu_8x32(&input, text); + simd_storeu_8x32(wire, &input); + block->backslashes = simd_find_8x32(&input, '\\'); + block->dots = simd_find_8x32(&input, '.'); } +#define likely(...) zone_likely(__VA_ARGS__) +#define unlikely(...) zone_unlikely(__VA_ARGS__) + zone_nonnull_all static zone_really_inline int32_t scan_name( zone_parser_t *parser, - const zone_type_info_t *type, - const zone_field_info_t *field, - const simd_table_t delimiter, - const simd_table_t space, const token_t *token, uint8_t octets[255 + ZONE_BLOCK_SIZE], - size_t *length) + size_t *lengthp) { - name_block_t block; - uint8_t *wire = octets + 1, *label = octets; + uint64_t label = 0; const char *text = token->data; + uint8_t *wire = octets + 1; + name_block_t block; - *label = 0; - - if (text[0] == '.') { - // root needs to be handled differently - // FIXME: correct this check for quoted - if (is_contiguous((uint8_t)text[1]) || token->code == QUOTED) - SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); - *length = 1; - return 0; + (void)parser; + + octets[0] = 0; + + // real world domain names quickly exceed 16 octets (www.example.com is + // encoded as 3www7example3com0, or 18 octets), but rarely exceed 32 + // octets. encode in 32-byte blocks. + copy_name_block(&block, text, wire); + + uint64_t count = 32, length = 0, base = 0, left = token->length; + uint64_t carry = 0; + if (token->length < 32) + count = token->length; + uint64_t mask = (1llu << count) - 1u; + + // check for escape sequences + if (unlikely(block.backslashes & mask)) + goto escaped; + + // check for root, i.e. "." + if (unlikely(block.dots & 1llu)) + return ((*lengthp = token->length) == 1 ? 0 : -1); + + length = count; + block.dots &= mask; + carry = (block.dots >> (length - 1)); + + // check for null labels, i.e. ".." + if (unlikely(block.dots & (block.dots >> 1))) + return -1; + + if (likely(block.dots)) { + count = trailing_zeroes(block.dots); + block.dots = clear_lowest_bit(block.dots); + octets[label] = (uint8_t)count; + label = count + 1; + while (block.dots) { + count = trailing_zeroes(block.dots); + block.dots = clear_lowest_bit(block.dots); + octets[label] = (uint8_t)(count - label); + label = count + 1; + } } - for (bool loop=true; loop; ) { - copy_name_block(&block, delimiter, space, text, wire); - - uint64_t size; - if (!(block.backslash & (block.delimited.delimiter - 1))) { - block.label &= block.delimited.delimiter - 1; - size = trailing_zeroes(block.delimited.delimiter | (1llu << SIMD_8X_SIZE)); - loop = !block.delimited.delimiter; - text += size; - wire += size; + octets[label] = (uint8_t)(length - label); + + if (length < 32) + return (void)(*lengthp = length + 1), carry == 0; + + text += length; + wire += length; + left -= length; + + do { + copy_name_block(&block, text, wire); + count = 32; + if (left < 32) + count = left; + mask = (1llu << count) - 1u; + base = length; + + // check for escape sequences + if (unlikely(block.backslashes & mask)) { +escaped: + block.backslashes &= -block.backslashes; + mask = block.backslashes - 1; + block.dots &= mask; + count = count_ones(mask); + const uint32_t octet = unescape(text+count, wire+count); + if (!octet) + return -1; + text += count + octet; + wire += count + 1; + length += count + 1; } else { - size = trailing_zeroes(block.backslash); - uint8_t digits[3]; - digits[0] = (unsigned char)text[size + 1] - '0'; - - if (digits[0] > 2) { - wire[size] = (unsigned char)text[size + 1]; - wire += size + 1; - text += size + 2; - } else { - digits[1] = (unsigned char)text[size + 2] - '0'; - digits[2] = (unsigned char)text[size + 3] - '0'; - if (digits[0] < 2) { - if (digits[1] > 9 || digits[2] > 9) - SEMANTIC_ERROR(parser, "Bad escape sequence in %s of %s record", - NAME(field), TNAME(type)); - } else { - if (digits[1] > 5 || digits[2] > 5) - SEMANTIC_ERROR(parser, "Bad escape sequence in %s of %s record", - NAME(field), TNAME(type)); - } - - wire[size] = digits[0] * 100 + digits[1] * 10 + digits[0]; - wire += size + 1; - text += size + 4; - } - - block.label &= block.backslash - 1; + block.dots &= mask; + text += count; + wire += count; + length += count; } - if (wire - octets > 255) - SEMANTIC_ERROR(parser, "Bad domain name in %s of %s", - field->name.data, TNAME(type)); - - if (block.label) { - uint64_t count = 0, last = 0; - const uint64_t labels = count_ones(block.label); - for (uint64_t i = 0; i < labels; i++) { - count = trailing_zeroes(block.label) - last; - block.label = clear_lowest_bit(block.label); - *label += count; - if (!*label || *label > 63) - SEMANTIC_ERROR(parser, "Bad domain name in %s of %s record", - NAME(field), TNAME(type)); - label += *label + 1; - *label = 0; - last += count + 1; - assert(label < wire); + left -= count; + + // check for null labels, i.e. ".." + if (unlikely(block.dots & ((block.dots >> 1) | carry))) + return -1; + carry = block.dots >> (count - 1); + + if (likely(block.dots)) { + count = trailing_zeroes(block.dots) + base; + block.dots = clear_lowest_bit(block.dots); + octets[label] = (uint8_t)(count - label); + // check if label exceeds 63 octets + if (unlikely(count - label > 63)) + return -1; + label = count + 1; + while (block.dots) { + count = trailing_zeroes(block.dots) + base; + block.dots = clear_lowest_bit(block.dots); + octets[label] = (uint8_t)(count - label); + label = count + 1; } - *label += (wire - label) - 1; } else { - *label += (uint8_t)size; - if (*label > 63) - SEMANTIC_ERROR(parser, "Bad domain name in %s of %s record", - NAME(field), TNAME(type)); + // check if label exceeds 63 octets + if (length - label > 63) + return -1; } - } - if (!(wire - octets)) { - SEMANTIC_ERROR(parser, "Invalid domain name in %s of %s", - NAME(field), TNAME(type)); - } + octets[label] = (uint8_t)(length - label); + } while (left); - *length = (size_t)(wire - octets); - if (!*label) - return 0; - return ZONE_NAME; + *lengthp = length + 1; + return carry == 0; } -zone_nonnull_all -static zone_really_inline int32_t scan_contiguous_name( - zone_parser_t *parser, - const zone_type_info_t *type, - const zone_field_info_t *field, - const token_t *token, - uint8_t octets[255 + ZONE_BLOCK_SIZE], - size_t *length) -{ - return scan_name( - parser, type, field, non_contiguous, blank, token, octets, length); -} - -zone_nonnull_all -static zone_really_inline int32_t scan_quoted_name( - zone_parser_t *parser, - const zone_type_info_t *type, - const zone_field_info_t *field, - const token_t *token, - uint8_t octets[255 + ZONE_BLOCK_SIZE], - size_t *length) -{ - return scan_name( - parser, type, field, non_quoted, non_quoted, token, octets, length); -} +#undef likely +#undef unlikely zone_nonnull_all static zone_really_inline int32_t parse_name( @@ -174,23 +172,28 @@ static zone_really_inline int32_t parse_name( if (zone_likely(token->code == CONTIGUOUS)) { // a freestanding "@" denotes the current origin - if (token->data[0] == '@' && !is_contiguous((uint8_t)token->data[1])) + if (token->data[0] == '@' && token->length == 1) goto relative; - r = scan_contiguous_name(parser, type, field, token, o, &n); + r = scan_name(parser, token, o, &n); if (r == 0) return (void)(parser->rdata->length += n), ZONE_NAME; - if (r < 0) - return r; + if (r > 0) + goto relative; } else if (token->code == QUOTED) { - r = scan_quoted_name(parser, type, field, token, o, &n); + if (token->length == 0) + goto invalid; + r = scan_name(parser, token, o, &n); if (r == 0) return (void)(parser->rdata->length += n), ZONE_NAME; - if (r < 0) - return r; + if (r > 0) + goto relative; } else { return have_string(parser, type, field, token); } +invalid: + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); + relative: if (n > 255 - parser->file->origin.length) SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); diff --git a/src/generic/scanner.h b/src/generic/scanner.h index 76ae483..3176fbe 100644 --- a/src/generic/scanner.h +++ b/src/generic/scanner.h @@ -142,7 +142,6 @@ struct block { uint64_t follows_contiguous; uint64_t blank; uint64_t special; - uint64_t bits; }; static zone_really_inline void scan(zone_parser_t *parser, block_t *block) @@ -190,63 +189,97 @@ static zone_really_inline void scan(zone_parser_t *parser, block_t *block) ~(block->blank | block->special | block->quoted) & ~(block->in_quoted | block->in_comment); block->follows_contiguous = follows(block->contiguous, &parser->file->state.follows_contiguous); - - // quoted and contiguous have dynamic lengths, write two indexes - block->bits = (block->contiguous & ~block->follows_contiguous) | (block->quoted & block->in_quoted) | block->special; } -static zone_really_inline void tokenize(zone_parser_t *parser, const block_t *block) +static zone_really_inline void tokenize(zone_parser_t *parser, const block_t *block, uint64_t clear) { - uint64_t bits = block->bits; - uint64_t count = count_ones(bits); + uint64_t fields = (block->contiguous & ~block->follows_contiguous) | + (block->quoted & block->in_quoted) | + (block->special); + + // delimiters are only important for contigouos and quoted character strings + // (all other tokens automatically have a length 1). write out both in + // separate vectors and base logic solely on field vector, order is + // automatically correct + uint64_t delimiters = (~block->contiguous & block->follows_contiguous) | + (block->quoted & ~block->in_quoted); + + fields &= ~clear; + delimiters &= ~clear; + const char *base = parser->file->buffer.data + parser->file->buffer.index; + uint64_t field_count = count_ones(fields); + uint64_t delimiter_count = count_ones(delimiters); + // bulk of the data are contiguous and quoted character strings. field and + // delimiter counts are therefore (mostly) equal. select the greater number + // and write out indexes using a single loop, (hopefully) leveraging + // superscalar properties of modern CPUs + uint64_t count = field_count; + if (delimiter_count > field_count) + count = delimiter_count; uint64_t newline = block->newline; const uint64_t in_string = block->contiguous | block->in_quoted; - // take slow path if (escaped) newlines appear in contiguous or quoted. - // edge case, but must be supported and handled in the scanner for ease of - // use and to accommodate for parallel processing in the parser. note that - // escaped newlines may have been present in the last block + // take slow path if (escaped) newlines appear in contiguous or quoted + // character strings. edge case, but must be supported and handled in the + // scanner for ease of use and to accommodate for parallel processing in the + // parser. escaped newlines may have been present in the last block if (zone_unlikely(parser->file->lines.tail[0] || (newline & in_string))) { - for (uint64_t i=0; i < count; i++) { - uint64_t bit = -bits & bits; - bits ^= bit; - if (bit & newline) { + // FIXME: test logic properly, likely eligable for simplification + for (count=0; count < field_count; count++) { + const uint64_t field = -fields & fields; + if (field & newline) { parser->file->lines.tail++; - parser->file->fields.tail[i] = line_feed; - newline &= -bit; + parser->file->fields.tail[count] = line_feed; + newline &= -field; } else { // count newlines here so number of newlines remains correct if last // token is start of contiguous or quoted and index must be reset - *parser->file->lines.tail += count_ones(newline & ~(-bit)); - parser->file->fields.tail[i] = base + trailing_zeroes(bit); - newline &= -bit; + *parser->file->lines.tail += count_ones(newline & ~(-field)); + parser->file->fields.tail[count] = base + trailing_zeroes(field); + newline &= -field; } + parser->file->delimiters.tail[count] = base + trailing_zeroes(delimiters); + fields = clear_lowest_bit(fields); + delimiters = clear_lowest_bit(delimiters); + } + + for (; count < delimiter_count; count++) { + parser->file->delimiters.tail[count] = base + trailing_zeroes(delimiters); + delimiters = clear_lowest_bit(delimiters); } - parser->file->fields.tail += count; + parser->file->fields.tail += field_count; + parser->file->delimiters.tail += delimiter_count; } else { for (uint64_t i=0; i < 6; i++) { - parser->file->fields.tail[i] = base + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); + parser->file->fields.tail[i] = base + trailing_zeroes(fields); + parser->file->delimiters.tail[i] = base + trailing_zeroes(delimiters); + fields = clear_lowest_bit(fields); + delimiters = clear_lowest_bit(delimiters); } if (zone_unlikely(count > 6)) { for (uint64_t i=6; i < 12; i++) { - parser->file->fields.tail[i] = base + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); + parser->file->fields.tail[i] = base + trailing_zeroes(fields); + parser->file->delimiters.tail[i] = base + trailing_zeroes(delimiters); + fields = clear_lowest_bit(fields); + delimiters = clear_lowest_bit(delimiters); } if (zone_unlikely(count > 12)) { for (uint64_t i=12; i < count; i++) { - parser->file->fields.tail[i] = base + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); + parser->file->fields.tail[i] = base + trailing_zeroes(fields); + parser->file->delimiters.tail[i] = base + trailing_zeroes(delimiters); + fields = clear_lowest_bit(fields); + delimiters = clear_lowest_bit(delimiters); } } } - parser->file->fields.tail += count; + parser->file->fields.tail += field_count; + parser->file->delimiters.tail += delimiter_count; } } @@ -273,6 +306,9 @@ static zone_never_inline void step(zone_parser_t *parser, token_t *token) parser->file->fields.tail = parser->file->fields.tape; if (parser->file->fields.tape[0]) parser->file->fields.tail++; + // delimiters are never deferred + parser->file->delimiters.head = parser->file->delimiters.tape; + parser->file->delimiters.tail = parser->file->delimiters.tape; shuffle: if (parser->file->end_of_file == ZONE_HAVE_DATA) { @@ -305,7 +341,7 @@ static zone_never_inline void step(zone_parser_t *parser, token_t *token) goto terminate; simd_loadu_8x64(&block.input, (const uint8_t *)data); scan(parser, &block); - tokenize(parser, &block); + tokenize(parser, &block, 0); parser->file->buffer.index += ZONE_BLOCK_SIZE; } @@ -321,9 +357,9 @@ static zone_never_inline void step(zone_parser_t *parser, token_t *token) const uint64_t clear = ~((1llu << length) - 1); simd_loadu_8x64(&block.input, buffer); scan(parser, &block); - block.bits &= ~clear; + //block.starts &= ~clear; block.contiguous &= ~clear; - tokenize(parser, &block); + tokenize(parser, &block, clear); parser->file->buffer.index += length; parser->file->end_of_file = ZONE_NO_MORE_DATA; @@ -337,6 +373,7 @@ static zone_never_inline void step(zone_parser_t *parser, token_t *token) } parser->file->fields.tail[0] = data_limit; + parser->file->delimiters.tail[0] = data_limit; if (parser->file->fields.head[0] == parser->file->buffer.data) parser->file->start_of_line = start_of_line; else @@ -347,8 +384,11 @@ static zone_never_inline void step(zone_parser_t *parser, token_t *token) token->data = data; token->code = (int32_t)contiguous[ (uint8_t)*data ]; // end-of-file is idempotent - parser->file->fields.head += (*data != '\0'); + parser->file->fields.head += (*token->data != '\0'); if (zone_likely(token->code == CONTIGUOUS)) { + const char *delimiter = *parser->file->delimiters.head++; + assert(delimiter > token->data); + token->length = (size_t)(delimiter - token->data); return; } else if (token->code == LINE_FEED) { if (zone_unlikely(token->data == line_feed)) @@ -359,9 +399,13 @@ static zone_never_inline void step(zone_parser_t *parser, token_t *token) parser->file->line += parser->file->span; parser->file->span = 0; parser->file->start_of_line = !is_blank((uint8_t)*(token->data+1)); + token->length = 1; return; } else if (token->code == QUOTED) { + const char *delimiter = *parser->file->delimiters.head++; token->data++; + assert(delimiter >= token->data); + token->length = (size_t)(delimiter - token->data); return; } else if (token->code == END_OF_FILE) { zone_file_t *file; @@ -376,6 +420,7 @@ static zone_never_inline void step(zone_parser_t *parser, token_t *token) parser->file = parser->file->includer; parser->owner = &parser->file->owner; zone_close_file(parser, file); + token->length = 1; return; } else if (token->code == LEFT_PAREN) { if (parser->file->grouped) @@ -390,48 +435,4 @@ static zone_never_inline void step(zone_parser_t *parser, token_t *token) } } -typedef struct delimited delimited_t; -struct delimited { - simd_8x_t input; - uint64_t delimiter; -}; - -static const simd_table_t non_contiguous = SIMD_TABLE( - 0x00, // 0x00 : "\0" : 0x00 -- end-of-file - 0x00, // 0x01 - 0x22, // 0x02 : "\"" : 0x22 -- start/end quoted - 0x00, // 0x03 - 0x00, // 0x04 - 0x00, // 0x05 - 0x00, // 0x06 - 0x00, // 0x07 - 0x28, // 0x08 : "(" : 0x28 -- start grouped - 0x29, // 0x09 : ")" : 0x29 -- end grouped - 0x0a, // 0x0a : "\n" : 0x0a -- end-of-line - 0x3b, // 0x0b : ";" : 0x3b -- start comment - 0x00, // 0x0c - 0x00, // 0x0d - 0x00, // 0x0e - 0x00 // 0x0f -); - -static const simd_table_t non_quoted = SIMD_TABLE( - 0x00, // 0x00 : "\0" : 0x00 -- end-of-file - 0x00, // 0x01 - 0x22, // 0x02 : "\"" : 0x22 -- start/end quoted - 0x00, // 0x03 - 0x00, // 0x04 - 0x00, // 0x05 - 0x00, // 0x06 - 0x00, // 0x07 - 0x00, // 0x08 - 0x00, // 0x09 - 0x00, // 0x0a - 0x00, // 0x0b - 0x00, // 0x0c - 0x00, // 0x0d - 0x00, // 0x0e - 0x00 // 0x0f -); - #endif // SCANNER_H diff --git a/src/generic/text.h b/src/generic/text.h index 54ce869..0b33f30 100644 --- a/src/generic/text.h +++ b/src/generic/text.h @@ -10,7 +10,7 @@ #define TEXT_H zone_nonnull_all -static zone_really_inline size_t unescape(const char *text, uint8_t *wire) +static zone_really_inline uint32_t unescape(const char *text, uint8_t *wire) { uint8_t d[3]; uint32_t o; @@ -24,48 +24,27 @@ static zone_really_inline size_t unescape(const char *text, uint8_t *wire) d[2] = (uint8_t)text[3] - '0'; o = d[0] * 100 + d[1] * 10 + d[2]; *wire = (uint8_t)o; - return (o > 255 || d[1] > 9 || d[2]) ? 0 : 4u; + return (o > 255 || d[1] > 9 || d[2] > 9) ? 0 : 4u; } } +typedef struct string_block string_block_t; +struct string_block { + uint64_t backslashes; +}; + zone_nonnull_all -static zone_really_inline int32_t parse_contiguous_string_internal( - zone_parser_t *parser, - const zone_type_info_t *type, - const zone_field_info_t *field, - const token_t *token) +static zone_really_inline void copy_string_block( + string_block_t *block, const char *text, uint8_t *wire) { - string_block_t b; - uint8_t *w = &parser->rdata->octets[parser->rdata->length + 1]; - const uint8_t *ws = w - 1, *we = w + 255; - const char *t = token->data; - - while (w < we) { - copy_contiguous_string_block(t, w, &b); - - if (b.backslash & (b.delimiter - 1)) { - const size_t n = trailing_zeroes(b.backslash); - const size_t o = unescape(t, w); - if (!o) - SEMANTIC_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); - w += n + 1; t += n + o; - } else { - const size_t n = trailing_zeroes(b.delimiter | (1llu << 32)); - w += n; t += n; - if (b.delimiter) - break; - } - } - - if (w >= we) - SEMANTIC_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); - parser->rdata->octets[parser->rdata->length] = (uint8_t)((w - ws) - 1); - parser->rdata->length += (size_t)(w - ws); - return ZONE_STRING; + simd_8x32_t input; + simd_loadu_8x32(&input, text); + simd_storeu_8x32(wire, &input); + block->backslashes = simd_find_8x32(&input, '\\'); } zone_nonnull_all -static zone_really_inline int32_t parse_quoted_string_internal( +static zone_really_inline int32_t parse_string_internal( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, @@ -74,27 +53,29 @@ static zone_really_inline int32_t parse_quoted_string_internal( string_block_t b; uint8_t *w = &parser->rdata->octets[parser->rdata->length + 1]; const uint8_t *ws = w - 1, *we = w + 255; - const char *t = token->data; - - while (w < we) { - copy_quoted_string_block(t, w, &b); - - if (b.backslash & (b.delimiter - 1)) { - const size_t n = trailing_zeroes(b.backslash); - const size_t o = unescape(t, w); - if (!o) + const char *t = token->data, *te = t + token->length; + uint64_t left = token->length; + + while ((t < te) & (w < we)) { + copy_string_block(&b, t, w); + uint64_t n = 32; + if (left < 32) + n = left; + uint64_t mask = (1llu << n) - 1; + + if (b.backslashes & mask) { + n = trailing_zeroes(b.backslashes); + w += n; t += n; + if (!(n = unescape(t, w))) SEMANTIC_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); - w += n + 1; t += n + o; + w += 1; t += n; } else { - const size_t n = trailing_zeroes(b.delimiter | (1llu << 32)); w += n; t += n; - if (b.delimiter) - break; } } if (w >= we) - SEMANTIC_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); parser->rdata->octets[parser->rdata->length] = (uint8_t)((w - ws) - 1); parser->rdata->length += (size_t)(w - ws); return ZONE_STRING; @@ -107,52 +88,15 @@ static zone_really_inline int32_t parse_string( const zone_field_info_t *field, const token_t *token) { - if (zone_likely(token->code == QUOTED)) // strings are usually quoted - return parse_quoted_string_internal(parser, type, field, token); - else if (token->code == CONTIGUOUS) - return parse_contiguous_string_internal(parser, type, field, token); - else - return have_string(parser, type, field, token); -} - -zone_nonnull_all -static zone_really_inline int32_t parse_contiguous_text_internal( - zone_parser_t *parser, - const zone_type_info_t *type, - const zone_field_info_t *field, - const token_t *token) -{ - string_block_t b; - uint8_t *w = &parser->rdata->octets[parser->rdata->length]; - const uint8_t *ws = w, *we = &parser->rdata->octets[ZONE_RDATA_LIMIT]; - const char *t = token->data; - - while (w < we) { - copy_contiguous_string_block(t, w, &b); - - if (zone_unlikely(b.backslash & (b.delimiter - 1))) { - const size_t n = trailing_zeroes(b.backslash); - const size_t o = unescape(t+n, w+n); - if (!o) - SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); - w += n + 1; t += n + o; - } else { - const size_t n = trailing_zeroes(b.delimiter | (1llu << 32)); - w += n; t += n; - if (b.delimiter) - break; - } - } + int32_t r; - if (w >= we) - SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); - - parser->rdata->length += (size_t)(w - ws); - return ZONE_BLOB; + if ((r = have_string(parser, type, field, token)) < 0) + return r; + return parse_string_internal(parser, type, field, token); } zone_nonnull_all -static zone_really_inline int32_t parse_quoted_text_internal( +static zone_really_inline int32_t parse_text_internal( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, @@ -160,29 +104,30 @@ static zone_really_inline int32_t parse_quoted_text_internal( { string_block_t b; uint8_t *w = &parser->rdata->octets[parser->rdata->length]; - const uint8_t *ws = w, *we = &parser->rdata->octets[ZONE_RDATA_LIMIT]; - const char *t = token->data; - - while (w < we) { - copy_quoted_string_block(t, w, &b); - - if (zone_unlikely(b.backslash & (b.delimiter - 1))) { - const size_t n = trailing_zeroes(b.backslash); - const size_t o = unescape(t+n, w+n); - if (!o) + const uint8_t *ws = w, *we = &parser->rdata->octets[ZONE_RDATA_SIZE]; + const char *t = token->data, *te = t + token->length; + uint64_t left = token->length; + + while ((t < te) & (w < we)) { + copy_string_block(&b, t, w); + uint64_t n = 32; + if (left < 32) + n = left; + uint64_t mask = (1llu << n) - 1; + + if (zone_unlikely(b.backslashes & mask)) { + n = trailing_zeroes(b.backslashes); + w += n; t += n; + if (!(n = unescape(t, w))) SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); - w += n + 1; t += n + o; + w += 1; t += n; } else { - const size_t n = trailing_zeroes(b.delimiter | (1llu << 32)); w += n; t += n; - if (b.delimiter) - break; } } if (w >= we) SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); - parser->rdata->length += (size_t)(w - ws); return ZONE_BLOB; } @@ -194,9 +139,11 @@ static zone_really_inline int32_t parse_quoted_text( const zone_field_info_t *field, const token_t *token) { - if (zone_likely(token->code == QUOTED)) - return parse_quoted_text_internal(parser, type, field, token); - return have_quoted(parser, type, field, token); + int32_t r; + + if ((r = have_quoted(parser, type, field, token)) < 0) + return r; + return parse_text_internal(parser, type, field, token); } zone_nonnull_all @@ -206,11 +153,11 @@ static zone_really_inline int32_t parse_text( const zone_field_info_t *field, const token_t *token) { - if (zone_likely(token->code == QUOTED)) // strings are usually quoted - return parse_quoted_text_internal(parser, type, field, token); - else if (token->code == CONTIGUOUS) - return parse_contiguous_text_internal(parser, type, field, token); - return have_string(parser, type, field, token); + int32_t r; + + if ((r = have_string(parser, type, field, token)) < 0) + return r; + return parse_text_internal(parser, type, field, token); } #endif // TEXT_H diff --git a/src/haswell/bits.h b/src/haswell/bits.h index 59025b0..6b2fa98 100644 --- a/src/haswell/bits.h +++ b/src/haswell/bits.h @@ -2,6 +2,7 @@ * bits.h -- Haswell specific implementation of bit manipulation instructions * * Copyright (c) 2018-2023 The simdjson authors + * Copyright (c) 2023, NLnet Labs. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ diff --git a/src/haswell/delimited.h b/src/haswell/delimited.h deleted file mode 100644 index e5cb16f..0000000 --- a/src/haswell/delimited.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * string.h -- some useful comment - * - * Copyright (c) 2023, NLnet Labs. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - * - */ -#ifndef DELIMITED_H -#define DELIMITED_H - -zone_nonnull_all -static zone_really_inline void copy_and_scan_delimited( - delimited_t *block, - const simd_table_t delimiter, - const simd_table_t space, - const char *source, - uint8_t *destination) -{ - __m256i b = _mm256_loadu_si256((const __m256i *)space); - __m256i d = _mm256_loadu_si256((const __m256i *)delimiter); - - simd_loadu_8x(&block->input, (const uint8_t *)source); - b = _mm256_shuffle_epi8(b, block->input.chunks[0]); - d = _mm256_shuffle_epi8(d, block->input.chunks[0]); - simd_storeu_8x(destination, &block->input); - b = _mm256_cmpeq_epi8(block->input.chunks[0], b); - d = _mm256_cmpeq_epi8(block->input.chunks[0], d); - block->delimiter = (uint32_t)_mm256_movemask_epi8(_mm256_or_si256(b, d)); -} - -zone_nonnull_all -static zone_really_inline void scan_delimited( - delimited_t *block, - const simd_table_t delimiter, - const simd_table_t space, - const char *source) -{ - __m256i b = _mm256_loadu_si256((const __m256i *)space); - __m256i d = _mm256_loadu_si256((const __m256i *)delimiter); - - simd_loadu_8x(&block->input, (const uint8_t *)source); - b = _mm256_shuffle_epi8(b, block->input.chunks[0]); - d = _mm256_shuffle_epi8(d, block->input.chunks[0]); - b = _mm256_cmpeq_epi8(block->input.chunks[0], b); - d = _mm256_cmpeq_epi8(block->input.chunks[0], d); - block->delimiter = (uint32_t)_mm256_movemask_epi8(_mm256_or_si256(b, d)); -} - -#endif // DELIMITED_H diff --git a/src/haswell/parser.c b/src/haswell/parser.c index 45647e7..8b16bd0 100644 --- a/src/haswell/parser.c +++ b/src/haswell/parser.c @@ -14,15 +14,13 @@ #include "lexer.h" #include "table.h" #include "generic/scanner.h" -#include "haswell/delimited.h" -#include "haswell/string.h" #include "generic/number.h" #include "generic/ttl.h" #include "westmere/time.h" -#include "generic/name.h" #include "westmere/ip4.h" #include "generic/ip6.h" #include "generic/text.h" +#include "generic/name.h" #include "fallback/base16.h" #include "haswell/base32.h" #include "generic/base64.h" diff --git a/src/haswell/simd.h b/src/haswell/simd.h index be94f9e..c0afa5b 100644 --- a/src/haswell/simd.h +++ b/src/haswell/simd.h @@ -28,6 +28,8 @@ typedef struct { __m256i chunks[1]; } simd_8x_t; typedef struct { __m128i chunks[1]; } simd_8x16_t; +typedef simd_8x_t simd_8x32_t; + typedef struct { __m256i chunks[2]; } simd_8x64_t; @@ -76,6 +78,10 @@ static zone_really_inline uint64_t simd_find_8x16(const simd_8x16_t *simd, char return m; } +#define simd_loadu_8x32(simd, address) simd_loadu_8x(simd, address) +#define simd_storeu_8x32(address, simd) simd_storeu_8x(address, simd) +#define simd_find_8x32(simd, key) simd_find_8x(simd, key) + zone_nonnull_all static zone_really_inline void simd_loadu_8x64(simd_8x64_t *simd, const uint8_t *address) { diff --git a/src/haswell/string.h b/src/haswell/string.h deleted file mode 100644 index 74f44c4..0000000 --- a/src/haswell/string.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * text.h -- string parsing implementation targeting SSE4.2 - * - * Copyright (c) 2023, NLnet Labs. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - * - */ -#ifndef STRING_H -#define STRING_H - -typedef struct string_block string_block_t; -struct string_block { - uint64_t backslash; - uint64_t delimiter; -}; - -zone_nonnull_all -static zone_really_inline void copy_contiguous_string_block( - const char *text, uint8_t *wire, string_block_t *block) -{ - const __m256i d0 = _mm256_setr_epi8( - 0x10, 0x00, 0x20, 0x00, -128, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x20, 0x00, -128, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - const __m256i d1 = _mm256_setr_epi8( - 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x20, 0x30, 0x10, 0x00, -128, 0x10, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x20, 0x30, 0x10, 0x00, -128, 0x10, 0x00, 0x00); - - const __m256i i = _mm256_loadu_si256((const __m256i *)(text)); - _mm256_storeu_si256((__m256i *)wire, i); - - const __m256i ds0 = _mm256_shuffle_epi8(d0, _mm256_srli_epi16(i, 4)); - const __m256i ds1 = _mm256_shuffle_epi8(d1, i); - const __m256i ds = _mm256_and_si256(ds0, ds1); - - block->backslash = - (uint32_t)_mm256_movemask_epi8(ds); - block->delimiter = - (uint32_t)_mm256_movemask_epi8(_mm256_cmpgt_epi8(ds, _mm256_setzero_si256())); -} - -zone_nonnull_all -static zone_really_inline void copy_quoted_string_block( - const char *text, uint8_t *wire, string_block_t *block) -{ - const __m256i b = _mm256_set1_epi8('\\'); - const __m256i q = _mm256_set1_epi8('\"'); - - const __m256i i = _mm256_loadu_si256((const __m256i *)(text)); - _mm256_storeu_si256((__m256i *)wire, i); - - block->backslash = (uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(i, b)); - block->delimiter = (uint32_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(i, q)); -} - -#endif // STRING_H diff --git a/src/lexer.h b/src/lexer.h index ff50f05..eac3a1b 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -21,6 +21,7 @@ typedef struct token token_t; struct token { int32_t code; const char *data; + size_t length; }; // sorted so that errors, end of file and line feeds are less than contiguous @@ -232,16 +233,18 @@ static zone_really_inline int32_t refill(zone_parser_t *parser) #define DEFER_ERROR(parser, token, error) \ do { \ - token->data = dummy_data; \ token->code = error; \ + token->data = dummy_data; \ + token->length = 0; \ return; \ } while (0) #define DEFER_SYNTAX_ERROR(parser, token, ...) \ do { \ ZONE_LOG(parser, ZONE_ERROR, __VA_ARGS__); \ - token->data = dummy_data; \ token->code = ZONE_SYNTAX_ERROR; \ + token->data = dummy_data; \ + token->length = 0; \ return; \ } while (0) @@ -252,6 +255,9 @@ static zone_really_inline void lex(zone_parser_t *parser, token_t *token) token->data = *parser->file->fields.head++; token->code = (int32_t)contiguous[ (uint8_t)*token->data ]; if (zone_likely(token->code == CONTIGUOUS)) { + const char *delimiter = *parser->file->delimiters.head++; + assert(delimiter > token->data); + token->length = (size_t)(delimiter - token->data); return; } else if (token->code == LINE_FEED) { if (zone_unlikely(token->data == line_feed)) @@ -262,9 +268,13 @@ static zone_really_inline void lex(zone_parser_t *parser, token_t *token) parser->file->line += parser->file->span; parser->file->span = 0; parser->file->start_of_line = !is_blank((uint8_t)*(token->data+1)); + token->length = 1; return; } else if (token->code == QUOTED) { token->data++; + const char *delimiter = *parser->file->delimiters.head++; + assert(delimiter >= token->data); // allow empty strings (e.g. "") + token->length = (size_t)(delimiter - token->data); return; } else if (token->code == END_OF_FILE) { break; diff --git a/src/parser.h b/src/parser.h index dfff60b..a77f9f5 100644 --- a/src/parser.h +++ b/src/parser.h @@ -24,21 +24,26 @@ static zone_really_inline int32_t parse_owner( // a freestanding "@" denotes the origin if (token->data[0] == '@' && !is_contiguous((uint8_t)token->data[1])) goto relative; - r = scan_contiguous_name(parser, type, field, token, o, &n); + r = scan_name(parser, token, o, &n); if (r == 0) return (void)(parser->owner->length = n), ZONE_NAME; - if (r < 0) - return r; + if (r > 0) + goto relative; } else if (token->code == QUOTED) { - r = scan_quoted_name(parser, type, field, token, o, &n); + if (token->length == 0) + goto invalid; + r = scan_name(parser, token, o, &n); if (r == 0) return (void)(parser->owner->length = n), ZONE_NAME; - if (r < 0) - return r; + if (r > 0) + goto relative; } else { return have_string(parser, type, field, token); } +invalid: + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); + relative: if (n > 255 - parser->file->origin.length) SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type)); @@ -154,8 +159,8 @@ static zone_really_inline int32_t parse_dollar_include( int32_t r; zone_file_t *includer, *file; - zone_name_block_t name; - const zone_name_block_t *origin = &parser->file->origin; + zone_name_buffer_t name; + const zone_name_buffer_t *origin = &parser->file->origin; const uint8_t *delimiters; if (parser->options.no_includes) @@ -179,21 +184,17 @@ static zone_really_inline int32_t parse_dollar_include( // $INCLUDE directive may specify an origin lex(parser, token); if (token->code == CONTIGUOUS) { - r = scan_contiguous_name( - parser, &type, &fields[1], token, name.octets, &name.length); - if (r < 0) - goto invalid_name; + r = scan_name(parser, token, name.octets, &name.length); if (r != 0) - goto relative_name; + goto invalid_name; origin = &name; lex(parser, token); } else if (token->code == QUOTED) { - r = scan_quoted_name( - parser, &type, &fields[1], token, name.octets, &name.length); - if (r < 0) + if (token->length == 0) goto invalid_name; + r = scan_name(parser, token, name.octets, &name.length); if (r != 0) - goto relative_name; + goto invalid_name; origin = &name; lex(parser, token); } @@ -222,12 +223,9 @@ static zone_really_inline int32_t parse_dollar_include( parser->file = file; return 0; -relative_name: - zone_close_file(parser, file); - SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(&fields[1]), TNAME(&type)); invalid_name: zone_close_file(parser, file); - return r; + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(&fields[1]), TNAME(&type)); } // RFC1035 section 5.1 @@ -245,19 +243,17 @@ static inline int32_t parse_dollar_origin( lex(parser, token); if (zone_likely(token->code == CONTIGUOUS)) - r = scan_contiguous_name(parser, &type, &field, token, + r = scan_name(parser, token, parser->file->origin.octets, &parser->file->origin.length); else if (token->code == QUOTED) - r = scan_quoted_name(parser, &type, &field, token, + r = scan_name(parser, token, parser->file->origin.octets, &parser->file->origin.length); else return have_string(parser, &type, &field, token); - if (r < 0) - return r; - if (r > 0) + if (r != 0) SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(&field), TNAME(&type)); lex(parser, token); diff --git a/src/types.h b/src/types.h index 4382927..0956c9e 100644 --- a/src/types.h +++ b/src/types.h @@ -246,10 +246,10 @@ static zone_really_inline int32_t accept_rr( parser->rdata->octets, parser->user_data); - assert((size_t)result < parser->cache.size); + assert((size_t)result < parser->buffers.size); if (result < 0) return result; - parser->rdata = &parser->cache.rdata.blocks[result]; + parser->rdata = &parser->buffers.rdata.blocks[result]; return 0; } diff --git a/src/westmere/bits.h b/src/westmere/bits.h index 7e3c7e0..6c415b6 100644 --- a/src/westmere/bits.h +++ b/src/westmere/bits.h @@ -2,6 +2,7 @@ * bits.h -- Westmere specific implementation of bit manipulation instructions * * Copyright (c) 2018-2022 The simdjson authors + * Copyright (c) 2023, NLnet Labs. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -23,7 +24,7 @@ static inline uint64_t trailing_zeroes(uint64_t input_num) { return (uint64_t)__builtin_ctzll(input_num); } -/* result might be undefined when input_num is zero */ +// result might be undefined when input_num is zero static inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num-1); } diff --git a/src/westmere/delimited.h b/src/westmere/delimited.h deleted file mode 100644 index 6d106c4..0000000 --- a/src/westmere/delimited.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * string.h -- some useful comment - * - * Copyright (c) 2023, NLnet Labs. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - * - */ -#ifndef DELIMITED_H -#define DELIMITED_H - -zone_nonnull_all -static zone_really_inline void copy_and_scan_delimited( - delimited_t *block, - const simd_table_t delimiter, - const simd_table_t space, - const char *source, - uint8_t *destination) -{ - __m128i b = _mm_loadu_si128((const __m128i *)space); - __m128i d = _mm_loadu_si128((const __m128i *)delimiter); - - simd_loadu_8x(&block->input, (const uint8_t *)source); - b = _mm_shuffle_epi8(b, block->input.chunks[0]); - d = _mm_shuffle_epi8(d, block->input.chunks[0]); - simd_storeu_8x(destination, &block->input); - b = _mm_cmpeq_epi8(block->input.chunks[0], b); - d = _mm_cmpeq_epi8(block->input.chunks[0], d); - block->delimiter = (uint16_t)_mm_movemask_epi8(_mm_or_si128(b, d)); -} - -zone_nonnull_all -static zone_really_inline void scan_delimited( - delimited_t *block, - const simd_table_t delimiter, - const simd_table_t space, - const char *source) -{ - __m128i b = _mm_loadu_si128((const __m128i *)space); - __m128i d = _mm_loadu_si128((const __m128i *)delimiter); - - simd_loadu_8x(&block->input, (const uint8_t *)source); - b = _mm_shuffle_epi8(b, block->input.chunks[0]); - d = _mm_shuffle_epi8(d, block->input.chunks[0]); - b = _mm_cmpeq_epi8(block->input.chunks[0], b); - d = _mm_cmpeq_epi8(block->input.chunks[0], d); - block->delimiter = (uint16_t)_mm_movemask_epi8(_mm_or_si128(b, d)); -} - -#endif // DELIMITED_H diff --git a/src/westmere/parser.c b/src/westmere/parser.c index 09cb8c5..03a0d14 100644 --- a/src/westmere/parser.c +++ b/src/westmere/parser.c @@ -14,15 +14,13 @@ #include "lexer.h" #include "table.h" #include "generic/scanner.h" -#include "westmere/delimited.h" -#include "westmere/string.h" #include "generic/number.h" #include "generic/ttl.h" #include "westmere/time.h" -#include "generic/name.h" #include "westmere/ip4.h" #include "generic/ip6.h" #include "generic/text.h" +#include "generic/name.h" #include "fallback/base16.h" #include "westmere/base32.h" #include "generic/base64.h" diff --git a/src/westmere/simd.h b/src/westmere/simd.h index 90795f2..730721e 100644 --- a/src/westmere/simd.h +++ b/src/westmere/simd.h @@ -27,6 +27,8 @@ typedef struct { __m128i chunks[1]; } simd_8x_t; typedef simd_8x_t simd_8x16_t; +typedef struct { __m128i chunks[2]; } simd_8x32_t; + typedef struct { __m128i chunks[4]; } simd_8x64_t; zone_nonnull_all @@ -62,6 +64,31 @@ static zone_really_inline uint64_t simd_find_any_8x( #define simd_loadu_8x16(simd, address) simd_loadu_8x(simd, address) #define simd_find_8x16(simd, key) simd_find_8x(simd, key) +zone_nonnull_all +static zone_really_inline void simd_loadu_8x32(simd_8x32_t *simd, const char *address) +{ + simd->chunks[0] = _mm_loadu_si128((const __m128i *)(address)); + simd->chunks[1] = _mm_loadu_si128((const __m128i *)(address+16)); +} + +zone_nonnull_all +static zone_really_inline void simd_storeu_8x32(uint8_t *address, const simd_8x32_t *simd) +{ + _mm_storeu_si128((__m128i *)(address), simd->chunks[0]); + _mm_storeu_si128((__m128i *)(address+16), simd->chunks[1]); +} + +zone_nonnull_all +static zone_really_inline uint64_t simd_find_8x32(const simd_8x32_t *simd, char key) +{ + const __m128i k = _mm_set1_epi8(key); + const __m128i r0 = _mm_cmpeq_epi8(simd->chunks[0], k); + const __m128i r1 = _mm_cmpeq_epi8(simd->chunks[1], k); + const uint32_t m0 = (uint16_t)_mm_movemask_epi8(r0); + const uint32_t m1 = (uint16_t)_mm_movemask_epi8(r1); + return m0 | (m1 << 16); +} + zone_nonnull_all static zone_really_inline void simd_loadu_8x64(simd_8x64_t *simd, const uint8_t *address) { diff --git a/src/westmere/string.h b/src/westmere/string.h deleted file mode 100644 index 2001113..0000000 --- a/src/westmere/string.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * text.h -- string parsing implementation targeting SSE4.2 - * - * Copyright (c) 2023, NLnet Labs. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - * - */ -#ifndef STRING_H -#define STRING_H - -typedef struct string_block string_block_t; -struct string_block { - uint64_t backslash; - uint64_t delimiter; -}; - -zone_nonnull_all -static zone_really_inline void copy_contiguous_string_block( - const char *text, uint8_t *wire, string_block_t *block) -{ - const __m128i d0 = _mm_setr_epi8( - 0x10, 0x00, 0x20, 0x00, -128, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - const __m128i d1 = _mm_setr_epi8( - 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x20, 0x30, 0x10, 0x00, -128, 0x10, 0x00, 0x00); - - const __m128i i0 = _mm_loadu_si128((const __m128i *)(text)); - const __m128i i1 = _mm_loadu_si128((const __m128i *)(text+16)); - _mm_storeu_si128((__m128i *)(wire), i0); - _mm_storeu_si128((__m128i *)(wire+16), i1); - - // FIXME: this is and error! - const __m128i ds00 = _mm_shuffle_epi8(d0, _mm_srli_epi16(i0, 4)); - const __m128i ds01 = _mm_shuffle_epi8(d1, i0); - const __m128i ds0 = _mm_and_si128(ds00, ds01); - - const __m128i ds10 = _mm_shuffle_epi8(d0, _mm_srli_epi16(i1, 4)); - const __m128i ds11 = _mm_shuffle_epi8(d1, i1); - const __m128i ds1 = _mm_and_si128(ds10, ds11); - - const uint64_t bm0 = (uint16_t)_mm_movemask_epi8(ds0); - const uint64_t bm1 = (uint16_t)_mm_movemask_epi8(ds1); - const uint64_t dm0 = - (uint16_t)_mm_movemask_epi8(_mm_cmpgt_epi8(ds0, _mm_setzero_si128())); - const uint64_t dm1 = - (uint16_t)_mm_movemask_epi8(_mm_cmpgt_epi8(ds1, _mm_setzero_si128())); - - block->backslash = bm0 | (bm1 << 16); - block->delimiter = dm0 | (dm1 << 16); -} - -zone_nonnull_all -static zone_really_inline void copy_quoted_string_block( - const char *text, uint8_t *wire, string_block_t *block) -{ - const __m128i b = _mm_set1_epi8('\\'); - const __m128i q = _mm_set1_epi8('\"'); - - const __m128i i0 = _mm_loadu_si128((const __m128i *)(text)); - const __m128i i1 = _mm_loadu_si128((const __m128i *)(text+16)); - _mm_storeu_si128((__m128i *)(wire), i0); - _mm_storeu_si128((__m128i *)(wire+16), i1); - - const uint64_t bm0 = (uint16_t)_mm_movemask_epi8(_mm_cmpeq_epi8(i0, b)); - const uint64_t bm1 = (uint16_t)_mm_movemask_epi8(_mm_cmpeq_epi8(i1, b)); - const uint64_t qm0 = (uint16_t)_mm_movemask_epi8(_mm_cmpeq_epi8(i0, q)); - const uint64_t qm1 = (uint16_t)_mm_movemask_epi8(_mm_cmpeq_epi8(i1, q)); - - block->backslash = bm0 | (bm1 << 16); - block->delimiter = qm0 | (qm1 << 16); -} - -#endif // STRING_H diff --git a/src/westmere/type.h b/src/westmere/type.h index ba3407f..5d9aed3 100644 --- a/src/westmere/type.h +++ b/src/westmere/type.h @@ -99,18 +99,22 @@ static zone_really_inline int32_t find_type_or_class( // 0x0d : carriage return : 0b0000_1101 // // deltas do not catch ('.' (0x2e) or '/' (0x2f)), but neither is a delimiter - const __m128i deltas = _mm_setr_epi8( - -16, -32, -45, 70, -65, 37, -97, 5, 0, 0, 0, 0, 0, 0, 0, 0); + //const __m128i deltas = _mm_setr_epi8( + // -16, -32, -45, 70, -65, 37, -97, 5, 0, 0, 0, 0, 0, 0, 0, 0); const __m128i nibbles = _mm_and_si128(_mm_srli_epi32(input, 4), _mm_set1_epi8(0x0f)); - const __m128i check = _mm_add_epi8(_mm_shuffle_epi8(deltas, nibbles), input); + //const __m128i check = _mm_add_epi8(_mm_shuffle_epi8(deltas, nibbles), input); - int mask = (uint16_t)_mm_movemask_epi8(check); - uint16_t length = (uint16_t)__builtin_ctz((unsigned int)mask); + //int mask = (uint16_t)_mm_movemask_epi8(check); + //uint16_t length = (uint16_t)__builtin_ctz((unsigned int)mask); const __m128i upper = _mm_setr_epi8( -1, -1, -1, -1, -1, -1, -33, -33, -1, -1, -1, -1, -1, -1, -1, -1); - const __m128i zero_mask = _mm_loadu_si128((const __m128i *)(zero_masks + 16 - length)); + __m128i zero_mask; + //if (token->length > 16) + // zero_mask = _mm_loadu_si128((const __m128i *)zero_masks); + //else + zero_mask = _mm_loadu_si128((const __m128i *)(zero_masks + 16 - token->length)); input = _mm_and_si128(input, _mm_shuffle_epi8(upper, nibbles)); input = _mm_andnot_si128(zero_mask, input); @@ -124,8 +128,8 @@ static zone_really_inline int32_t find_type_or_class( *code = (uint16_t)(*symbol)->value; - const uint8_t delimiter = (uint8_t)token->data[length]; - if (_mm_test_all_zeros(xorthem, xorthem) & (contiguous[delimiter] != CONTIGUOUS)) + //const uint8_t delimiter = (uint8_t)token->data[token->length]; + if (_mm_test_all_zeros(xorthem, xorthem))// & (contiguous[delimiter] != CONTIGUOUS)) return types_and_classes[index].type; return 0; } diff --git a/src/zone.c b/src/zone.c index 2613109..d556d21 100644 --- a/src/zone.c +++ b/src/zone.c @@ -228,7 +228,7 @@ static void set_defaults(zone_parser_t *parser) if (!parser->options.log.write && !parser->options.log.categories) parser->options.log.categories = (uint32_t)-1; parser->owner = &parser->file->owner; - parser->rdata = &parser->cache.rdata.blocks[0]; + parser->rdata = &parser->buffers.rdata.blocks[0]; } diagnostic_push() @@ -294,7 +294,7 @@ void zone_close(zone_parser_t *parser) int32_t zone_open( zone_parser_t *parser, const zone_options_t *options, - zone_cache_t *cache, + zone_buffers_t *buffers, const char *path, void *user_data) { @@ -314,10 +314,10 @@ int32_t zone_open( result = ZONE_BAD_PARAMETER; goto error; } - parser->cache.size = cache->size; - parser->cache.owner.serial = 0; - parser->cache.owner.blocks = cache->owner; - parser->cache.rdata.blocks = cache->rdata; + parser->buffers.size = buffers->size; + parser->buffers.owner.serial = 0; + parser->buffers.owner.blocks = buffers->owner; + parser->buffers.rdata.blocks = buffers->rdata; file->owner = file->origin; file->last_type = 0; file->last_class = options->default_class; @@ -336,13 +336,13 @@ diagnostic_pop() int32_t zone_parse( zone_parser_t *parser, const zone_options_t *options, - zone_cache_t *cache, + zone_buffers_t *buffers, const char *path, void *user_data) { int32_t result; - if ((result = zone_open(parser, options, cache, path, user_data)) < 0) + if ((result = zone_open(parser, options, buffers, path, user_data)) < 0) return result; result = parse(parser, user_data); zone_close(parser); @@ -352,7 +352,7 @@ int32_t zone_parse( int32_t zone_parse_string( zone_parser_t *parser, const zone_options_t *options, - zone_cache_t *cache, + zone_buffers_t *buffers, const char *string, size_t length, void *user_data) @@ -387,10 +387,10 @@ int32_t zone_parse_string( file->lines.head = file->lines.tape; file->lines.tail = file->lines.tape; - parser->cache.size = cache->size; - parser->cache.owner.serial = 0; - parser->cache.owner.blocks = cache->owner; - parser->cache.rdata.blocks = cache->rdata; + parser->buffers.size = buffers->size; + parser->buffers.owner.serial = 0; + parser->buffers.owner.blocks = buffers->owner; + parser->buffers.rdata.blocks = buffers->rdata; file->owner = file->origin; file->last_type = 0; file->last_class = options->default_class; diff --git a/tests/base32.c b/tests/base32.c index 3336dbf..010e90a 100644 --- a/tests/base32.c +++ b/tests/base32.c @@ -49,6 +49,7 @@ void base32_syntax(void **state) const uint8_t *octets; const size_t length; } tests[] = { + // FIXME: add tests to ensure padding is not allowed // bad character in contiguous set { ZONE_SYNTAX_ERROR, "2t7b4g4vsa5zmi47k61mv5bv1a22bojr", NULL, 0 }, // ^ (not in base32 alphabet) @@ -67,9 +68,9 @@ void base32_syntax(void **state) char rr[256]; const char rrfmt[] = " NSEC3 1 1 12 aabbccdd ( %s A NS )"; zone_parser_t parser = { 0 }; - zone_name_block_t name; - zone_rdata_block_t rdata; - zone_cache_t cache = { 1, &name, &rdata }; + zone_name_buffer_t name; + zone_rdata_buffer_t rdata; + zone_buffers_t buffers = { 1, &name, &rdata }; zone_options_t options = { 0 }; int32_t result; @@ -80,7 +81,7 @@ void base32_syntax(void **state) options.default_ttl = 3600; options.default_class = ZONE_IN; - result = zone_parse_string(&parser, &options, &cache, rr, strlen(rr), NULL); + result = zone_parse_string(&parser, &options, &buffers, rr, strlen(rr), NULL); assert_int_equal(result, tests[i].result); if (tests[i].result == ZONE_SUCCESS) assert_memory_equal(rdata.octets+9, tests[i].octets, tests[i].length); diff --git a/tests/include.c b/tests/include.c index b42703c..7543634 100644 --- a/tests/include.c +++ b/tests/include.c @@ -60,6 +60,8 @@ int teardown(void **state) if (input->include.content) free(input->include.content); + free(input); + return 0; } @@ -119,7 +121,7 @@ int setup(void **state) *state = input; return 0; err: - teardown((void **)&input); + teardown((void**)&input); return -1; } @@ -157,9 +159,9 @@ void include_from_string(void **state) { input_t *input; zone_parser_t parser = { 0 }; - zone_name_block_t name; - zone_rdata_block_t rdata; - zone_cache_t cache = { 1, &name, &rdata }; + zone_name_buffer_t name; + zone_rdata_buffer_t rdata; + zone_buffers_t buffers = { 1, &name, &rdata }; zone_options_t options = { 0 }; int32_t result; @@ -172,13 +174,13 @@ void include_from_string(void **state) // verify $INCLUDE is denied by default when parsing strings. const char *str = input->includer.content; - result = zone_parse_string(&parser, &options, &cache, str, strlen(str), NULL); + result = zone_parse_string(&parser, &options, &buffers, str, strlen(str), NULL); assert_false(options.no_includes); assert_int_equal(result, ZONE_SUCCESS); // verify $INCLUDE is allowed and works as intented if configured. options.no_includes = true; - result = zone_parse_string(&parser, &options, &cache, str, strlen(str), NULL); + result = zone_parse_string(&parser, &options, &buffers, str, strlen(str), NULL); assert_int_equal(result, ZONE_NOT_PERMITTED); } diff --git a/tests/ip4.c b/tests/ip4.c index 554d8ef..553447b 100644 --- a/tests/ip4.c +++ b/tests/ip4.c @@ -82,9 +82,9 @@ void ipv4_syntax(void **state) for (size_t i=0, n=sizeof(tests)/sizeof(tests[0]); i < n; i++) { char rr[128]; zone_parser_t parser = { 0 }; - zone_name_block_t name; - zone_rdata_block_t rdata; - zone_cache_t cache = { 1, &name, &rdata }; + zone_name_buffer_t name; + zone_rdata_buffer_t rdata; + zone_buffers_t buffers = { 1, &name, &rdata }; zone_options_t options = { 0 }; int32_t result; @@ -95,7 +95,7 @@ void ipv4_syntax(void **state) options.default_ttl = 3600; options.default_class = ZONE_IN; - result = zone_parse_string(&parser, &options, &cache, rr, strlen(rr), NULL); + result = zone_parse_string(&parser, &options, &buffers, rr, strlen(rr), NULL); assert_int_equal(result, tests[i].result); if (tests[i].octets) assert_memory_equal(rdata.octets, tests[i].octets, 4); diff --git a/tests/time.c b/tests/time.c index 5308e10..522cca7 100644 --- a/tests/time.c +++ b/tests/time.c @@ -45,40 +45,40 @@ static int32_t add_rr( void time_stamp_syntax(void **state) { static const struct { - int32_t result; const char *timestamp; uint32_t seconds; + int32_t result; } tests[] = { // bad number of digits - { ZONE_SYNTAX_ERROR, "202301010101", 0 }, - { ZONE_SYNTAX_ERROR, "202301010101010", 0 }, + { "202301010101", 0, ZONE_SYNTAX_ERROR }, + { "202301010101010", 0, ZONE_SYNTAX_ERROR }, // year before 1970 - { ZONE_SYNTAX_ERROR, "19690101010101", 0 }, + { "19690101010101", 0, ZONE_SYNTAX_ERROR }, // year after 2106 - { ZONE_SYNTAX_ERROR, "21070101010101", 0 }, + { "21070101010101", 0, ZONE_SYNTAX_ERROR }, // month 0 - { ZONE_SYNTAX_ERROR, "20230001010101", 0 }, + { "20230001010101", 0, ZONE_SYNTAX_ERROR }, // month 13 - { ZONE_SYNTAX_ERROR, "20231301010101", 0 }, + { "20231301010101", 0, ZONE_SYNTAX_ERROR }, // february 29 non-leap year - { ZONE_SYNTAX_ERROR, "20230229010101", 0 }, + { "20230229010101", 0, ZONE_SYNTAX_ERROR }, // february 29 leap year - { ZONE_SUCCESS, "20240229010101", 1709168461 }, + { "20240229010101", 1709168461, ZONE_SUCCESS }, // hour 24 - { ZONE_SYNTAX_ERROR, "20230101240101", 0 }, + { "20230101240101", 0, ZONE_SYNTAX_ERROR }, // minute 60 - { ZONE_SYNTAX_ERROR, "20230101016001", 0 }, + { "20230101016001", 0, ZONE_SYNTAX_ERROR }, // correct time stamp - { ZONE_SUCCESS, "20230704160000", 1688486400 } + { "20230704160000", 1688486400, ZONE_SUCCESS } }; (void)state; for (size_t i=0, n=sizeof(tests)/sizeof(tests[0]); i < n; i++) { zone_parser_t parser = { 0 }; - zone_name_block_t name; - zone_rdata_block_t rdata; - zone_cache_t cache = { 1, &name, &rdata }; + zone_name_buffer_t name; + zone_rdata_buffer_t rdata; + zone_buffers_t buffers = { 1, &name, &rdata }; zone_options_t options = { 0 }; int32_t result; @@ -99,7 +99,7 @@ void time_stamp_syntax(void **state) options.default_ttl = 3600; options.default_class = ZONE_IN; - result = zone_parse_string(&parser, &options, &cache, rr, strlen(rr), NULL); + result = zone_parse_string(&parser, &options, &buffers, rr, strlen(rr), NULL); free(rr); assert_int_equal(result, tests[i].result); if (tests[i].result != ZONE_SUCCESS) diff --git a/tests/types.c b/tests/types.c index 6c95b63..50c1da6 100644 --- a/tests/types.c +++ b/tests/types.c @@ -855,9 +855,9 @@ void supported_types(void **state) for (size_t i = 0, n = sizeof(tests)/sizeof(tests[0]); i < n; i++) { test_t test = tests[i]; zone_parser_t parser = { 0 }; - zone_name_block_t name; - zone_rdata_block_t rdata; - zone_cache_t cache = { 1, &name, &rdata }; + zone_name_buffer_t name; + zone_rdata_buffer_t rdata; + zone_buffers_t buffers = { 1, &name, &rdata }; zone_options_t options = { 0 }; int32_t result; @@ -868,7 +868,7 @@ void supported_types(void **state) fprintf(stderr, "INPUT: '%s'\n", tests[i].text); - result = zone_parse_string(&parser, &options, &cache, tests[i].text, strlen(tests[i].text), &test); + result = zone_parse_string(&parser, &options, &buffers, tests[i].text, strlen(tests[i].text), &test); assert_int_equal(result, ZONE_SUCCESS); } }