From a0f4244cc2f98bcaec3daea1d359f8a2fe44b29e Mon Sep 17 00:00:00 2001 From: Jeroen Koekkoek Date: Thu, 8 Jun 2023 13:22:04 +0200 Subject: [PATCH 1/3] Drop secondary (length) index Drop secondary indexes to simplify tape operation and disallow placement of quotes around values other than domain names and text. Additionally, no attempt is made to parse symbols for which no symbolic constants have been specified and use of pretty TTL notation, e.g. 1m2s, is only allowed if the pretty_ttl option is enabled. Fixes #30. Fixes #31. Fixes #38. Fixes #50. --- .github/workflows/build-test.yml | 1 - CMakeLists.txt | 14 +- include/zone.h | 59 +- include/zone/attributes.h | 35 +- src/bench.c | 71 +-- src/diagnostic.h | 4 +- src/fallback/bench.c | 12 +- src/fallback/ip4.h | 52 ++ src/fallback/name.h | 158 ++++-- src/fallback/parser.c | 11 +- src/fallback/scanner.h | 404 ++++++-------- src/fallback/text.h | 84 +-- src/fallback/type.h | 142 +++-- src/generic/base16.h | 106 ++-- src/generic/base32.h | 172 +++--- src/generic/base64.h | 82 +-- src/generic/ip4.h | 42 -- src/generic/ip6.h | 191 ++++++- src/generic/name.h | 174 +++--- src/generic/nsec.h | 11 +- src/generic/number.h | 181 +++--- src/generic/scanner.h | 382 +++++++------ src/generic/text.h | 72 ++- src/generic/time.h | 110 ++-- src/generic/ttl.h | 212 ++++---- src/generic/type.h | 226 ++++---- src/haswell/bench.c | 12 +- src/haswell/delimited.h | 50 ++ src/haswell/parser.c | 10 +- src/haswell/simd.h | 46 +- src/lexer.c | 57 -- src/lexer.h | 297 +++++++--- src/log.c | 18 - src/log.h | 31 +- src/parser.c | 514 +++++++++-------- src/parser.h | 908 +++++++++++++++++-------------- src/table.c | 31 -- src/table.h | 31 ++ src/visit.h | 12 +- src/westmere/bench.c | 12 +- src/westmere/delimited.h | 50 ++ src/westmere/ip4.h | 124 +---- src/westmere/name.h | 0 src/westmere/parser.c | 10 +- src/westmere/simd.h | 35 +- src/zone.c | 98 ++-- tests/include.c | 6 + tests/types.c | 6 +- 48 files changed, 2871 insertions(+), 2495 deletions(-) create mode 100644 src/fallback/ip4.h delete mode 100644 src/generic/ip4.h create mode 100644 src/haswell/delimited.h delete mode 100644 src/lexer.c delete mode 100644 src/table.c create mode 100644 src/table.h create mode 100644 src/westmere/delimited.h create mode 100644 src/westmere/name.h diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index b26a5ab..88cf979 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -26,7 +26,6 @@ jobs: generator: "Visual Studio 17 2022" build_type: Debug build_tool_options: "-nologo -verbosity:minimal -maxcpucount:4 -p:CL_MPCount=4" - warnings_as_errors: off steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 diff --git a/CMakeLists.txt b/CMakeLists.txt index 6926429..831bb45 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -128,22 +128,20 @@ generate_export_header( zone BASE_NAME ZONE EXPORT_FILE_NAME include/zone/export.h) target_include_directories( - zone PUBLIC $ - $ - PRIVATE $ - $) + zone PUBLIC $ + $ + $ + $) target_sources(zone PRIVATE src/zone.c src/types.c - src/table.c src/log.c src/parser.c - src/lexer.c + src/fallback/parser.c src/generic/base16.c src/generic/base32.c - src/generic/base64.c - src/fallback/parser.c) + src/generic/base64.c) add_executable(zone-bench src/bench.c src/fallback/bench.c) target_include_directories( diff --git a/include/zone.h b/include/zone.h index 3b06369..b80fc9c 100644 --- a/include/zone.h +++ b/include/zone.h @@ -202,19 +202,6 @@ struct zone_table { const zone_symbol_t *symbols; // sorted for use with bsearch }; -ZONE_EXPORT int -zone_compare(const void *s1, const void *s2) -zone_nonnull_all(); - -zone_always_inline() -zone_nonnull_all() -inline zone_symbol_t *zone_lookup( - const zone_table_t *table, const zone_string_t *string) -{ - const zone_symbol_t key = { *string, 0 }; - return bsearch(&key, table->symbols, table->length, sizeof(key), zone_compare); -} - // @private // // bsearch is quite slow compared to a hash table, but a hash table is either @@ -371,21 +358,6 @@ struct zone_type_info { #define ZONE_BLOCK_SIZE (64) #define ZONE_WINDOW_SIZE (256 * ZONE_BLOCK_SIZE) // 16KB -// @private -// non-delimiting tokens may contain (escaped) newlines. tracking newlines -// within tokens by taping them makes the lex operation more complex, resulting -// in a significantly larger binary and slower operation, and may introduce an -// infinite loop if the tape may not be sufficiently large enough. tokens -// containing newlines is very much an edge case, therefore the scanner -// implements an unlikely slow path that tracks the number of escaped newlines -// during tokenization and registers them with each consecutive newline token. -// this mode of operation nicely isolates location tracking in the scanner and -// accommodates parallel processing should that ever be desired -typedef struct zone_index zone_index_t; -struct zone_index { - const char *data; - uint32_t newlines; // number of escaped newlines (stored per newline) -}; // tape capacity must be large enough to hold every token from a single // worst-case read (e.g. 64 consecutive line feeds). in practice a single @@ -405,6 +377,17 @@ struct zone_rdata_block { uint8_t octets[ 65535 + 4096 /* nsec padding */ ]; }; +// @private +// non-delimiting tokens may contain (escaped) newlines. tracking newlines +// within tokens by taping them makes the lex operation more complex, resulting +// in a significantly larger binary and slower operation, and may introduce an +// infinite loop if the tape may not be sufficiently large enough. tokens +// containing newlines is very much an edge case, therefore the scanner +// implements an unlikely slow path that tracks the number of escaped newlines +// during tokenization and registers them with each consecutive newline token. +// this mode of operation nicely isolates location tracking in the scanner and +// accommodates parallel processing should that ever be desired + // @private typedef struct zone_file zone_file_t; struct zone_file { @@ -413,9 +396,13 @@ struct zone_file { uint16_t last_type; uint32_t last_ttl, default_ttl; uint16_t last_class; - size_t line; - const char *name; - const char *path; + // non-terminating line feeds, i.e. escaped line feeds, line feeds in quoted + // sections or within parentheses, are counted, but deferred for consistency + // in error reports + size_t span; /**< number of lines spanned by record */ + size_t line; /**< starting line of record */ + char *name; + char *path; FILE *handle; bool grouped; bool start_of_line; @@ -426,15 +413,15 @@ struct zone_file { } buffer; // indexer state is kept per-file struct { - uint32_t newlines; // number of escaped newlines uint64_t in_comment; uint64_t in_quoted; uint64_t is_escaped; uint64_t follows_contiguous; - // vector of tokens generated by the indexer. guaranteed to be large - // enough to hold every token for a single read + terminators - zone_index_t *head, *tail, tape[ZONE_TAPE_SIZE + 2]; - } indexer; + } state; + // vector of tokens generated by the indexer. guaranteed to be large + // enough to hold every token for a single read + terminators + struct { const char **head, **tail, *tape[ZONE_TAPE_SIZE + 2]; } fields; + struct { uint16_t *head, *tail, tape[ZONE_TAPE_SIZE + 1]; } lines; }; typedef struct zone_parser zone_parser_t; diff --git a/include/zone/attributes.h b/include/zone/attributes.h index 112af6a..03fd5b2 100644 --- a/include/zone/attributes.h +++ b/include/zone/attributes.h @@ -40,38 +40,29 @@ #endif #define zone_nonnull(params) zone_attribute((__nonnull__ params)) -#define zone_nonnull_all() zone_attribute((__nonnull__)) +#define zone_nonnull_all zone_attribute((__nonnull__)) #if _MSC_VER -# define zone_always_inline() __forceinline -# define zone_never_inline() __declspec(noinline) -# define zone_noreturn() __declspec(noreturn) -# define zone_allocator(...) +# define zone_really_inline __forceinline +# define zone_never_inline __declspec(noinline) +# define zone_warn_unused_result -# define zone_unlikely(x) +# define zone_likely(params) (params) +# define zone_unlikely(params) (params) # define zone_format(params) # define zone_format_printf(string_index, first_to_check) #else // _MSC_VER -# define zone_always_inline() zone_attribute((always_inline)) -# define zone_never_inline() zone_attribute((noinline)) -# if zone_has_attribute(noreturn) -# define zone_noreturn() zone_attribute((noreturn)) +# define zone_really_inline inline zone_attribute((always_inline)) +# define zone_never_inline zone_attribute((noinline)) +# if zone_has_attribute(warn_unused_result) +# define zone_warn_unused_result zone_attribute((warn_unused_result)) # else -# define zone_noreturn() +# define zone_warn_unused_result # endif -# if zone_has_attribute(malloc) -# if zone_gcc -# define zone_allocator(...) zone_attribute((malloc(__VA_ARGS__))) -# else -# define zone_allocator(...) zone_attribute((malloc)) -# endif -# else -# define zone_allocator(...) -# endif - -# define zone_unlikely(params) __builtin_expect((params), 0) +# define zone_likely(params) __builtin_expect(!!(params), 1) +# define zone_unlikely(params) __builtin_expect(!!(params), 0) # if zone_has_attribute(format) # define zone_format(params) zone_attribute((__format__ params)) diff --git a/src/bench.c b/src/bench.c index 4787b25..d84027c 100644 --- a/src/bench.c +++ b/src/bench.c @@ -7,7 +7,6 @@ * */ #include -#include #include #include #include @@ -21,6 +20,7 @@ #include "zone.h" #include "config.h" #include "isadetection.h" +#include "diagnostic.h" #if _WIN32 #define strcasecmp(s1, s2) _stricmp(s1, s2) @@ -28,24 +28,24 @@ #endif #if HAVE_HASWELL -extern zone_return_t zone_bench_haswell_lex(zone_parser_t *, size_t *); -extern zone_return_t zone_haswell_parse(zone_parser_t *, void *); +extern int32_t zone_bench_haswell_lex(zone_parser_t *, size_t *); +extern int32_t zone_haswell_parse(zone_parser_t *); #endif #if HAVE_WESTMERE -extern zone_return_t zone_bench_westmere_lex(zone_parser_t *, size_t *); -extern zone_return_t zone_westmere_parse(zone_parser_t *, void *); +extern int32_t zone_bench_westmere_lex(zone_parser_t *, size_t *); +extern int32_t zone_westmere_parse(zone_parser_t *); #endif -extern zone_return_t zone_bench_fallback_lex(zone_parser_t *, size_t *); -extern zone_return_t zone_fallback_parse(zone_parser_t *, void *); +extern int32_t zone_bench_fallback_lex(zone_parser_t *, size_t *); +extern int32_t zone_fallback_parse(zone_parser_t *); typedef struct target target_t; struct target { const char *name; uint32_t instruction_set; - zone_return_t (*bench_lex)(zone_parser_t *, size_t *); - zone_return_t (*parse)(zone_parser_t *, void *); + int32_t (*bench_lex)(zone_parser_t *, size_t *); + int32_t (*parse)(zone_parser_t *); }; static const target_t targets[] = { @@ -58,7 +58,7 @@ static const target_t targets[] = { { "fallback", 0, &zone_bench_fallback_lex, &zone_fallback_parse } }; -extern zone_return_t zone_open( +extern int32_t zone_open( zone_parser_t *, const zone_options_t *, zone_cache_t *, @@ -68,29 +68,19 @@ extern zone_return_t zone_open( extern void zone_close( zone_parser_t *); -static zone_return_t bench_lex(zone_parser_t *parser, const target_t *target) +static int32_t bench_lex(zone_parser_t *parser, const target_t *target) { size_t tokens = 0; - zone_return_t result; - volatile jmp_buf environment; - - switch ((result = setjmp((void *)environment))) { - case 0: - parser->environment = environment; - result = target->bench_lex(parser, &tokens); - assert(result == ZONE_SUCCESS); - break; - default: - assert(result < 0); - assert(parser->environment == environment); - break; - } + int32_t result; + + if ((result = target->bench_lex(parser, &tokens)) < 0) + return result; printf("Lexed %zu tokens\n", tokens); - return result; + return 0; } -static zone_return_t bench_accept( +static int32_t bench_accept( zone_parser_t *parser, const zone_name_t *owner, uint16_t type, @@ -111,28 +101,21 @@ static zone_return_t bench_accept( return ZONE_SUCCESS; } -static zone_return_t bench_parse(zone_parser_t *parser, const target_t *target) +static int32_t bench_parse(zone_parser_t *parser, const target_t *target) { size_t records = 0; - zone_return_t result; - volatile jmp_buf environment; - - switch ((result = setjmp((void *)environment))) { - case 0: - parser->environment = environment; - result = target->parse(parser, &records); - assert(result == ZONE_SUCCESS); - break; - default: - assert(result < 0); - assert(parser->environment == environment); - break; - } + int32_t result; + + parser->user_data = &records; + result = target->parse(parser); printf("Parsed %zu records\n", records); return result; } +diagnostic_push() +msvc_diagnostic_ignored(4996) + static const target_t *select_target(const char *name) { const size_t n = sizeof(targets)/sizeof(targets[0]); @@ -160,6 +143,8 @@ static const target_t *select_target(const char *name) return target; } +diagnostic_pop() + static void help(const char *program) { const char *format = @@ -207,7 +192,7 @@ int main(int argc, char *argv[]) if (optind > argc || argc - optind < 2) usage(program); - zone_return_t (*bench)(zone_parser_t *, const target_t *) = 0; + int32_t (*bench)(zone_parser_t *, const target_t *) = 0; if (strcasecmp(argv[optind], "lex") == 0) bench = &bench_lex; else if (strcasecmp(argv[optind], "parse") == 0) diff --git a/src/diagnostic.h b/src/diagnostic.h index 5e45259..feb84e5 100644 --- a/src/diagnostic.h +++ b/src/diagnostic.h @@ -12,8 +12,8 @@ #if _MSC_VER # define diagnostic_push() \ __pragma(warning(push)) -# define msvc_diagnostic_ignored(warning) \ - __pragma(warning(disable: ## warning)) +# define msvc_diagnostic_ignored(warning_specifier) \ + __pragma(warning(disable:warning_specifier)) # define diagnostic_pop() \ __pragma(warning(pop)) #elif __GNUC__ diff --git a/src/fallback/bench.c b/src/fallback/bench.c index 9d464cd..039fadb 100644 --- a/src/fallback/bench.c +++ b/src/fallback/bench.c @@ -15,16 +15,18 @@ diagnostic_push() clang_diagnostic_ignored(missing-prototypes) -zone_return_t zone_bench_fallback_lex(zone_parser_t *parser, size_t *tokens) +int32_t zone_bench_fallback_lex(zone_parser_t *parser, size_t *tokens) { - zone_token_t token; - zone_return_t result; + token_t token; (*tokens) = 0; - while ((result = lex(parser, &token)) >= 0 && token.data != zone_end_of_file) + lex(parser, &token); + while (token.code > 0) { (*tokens)++; + lex(parser, &token); + } - return result; + return token.code ? -1 : 0; } diagnostic_pop() diff --git a/src/fallback/ip4.h b/src/fallback/ip4.h new file mode 100644 index 0000000..79504c5 --- /dev/null +++ b/src/fallback/ip4.h @@ -0,0 +1,52 @@ +/* + * ip4.h -- fallback parser for IPv4 addresses + * + * Copyright (c) 2022-2023, NLnet Labs. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + */ +#ifndef IP4_H +#define IP4_H + +zone_nonnull_all +static zone_really_inline int32_t parse_ip4( + zone_parser_t *parser, + const zone_type_info_t *type, + const zone_field_info_t *field, + token_t *token) +{ + int32_t r; + + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; + + uint8_t *o = &parser->rdata->octets[parser->rdata->length]; + const uint8_t *os = o; + uint64_t n = 0; + const char *p = token->data; + + *o = 0; + for (const char *ps = p;; p++) { + const uint64_t d = (uint8_t)*p - '0'; + if (d <= 9) { + n = n * 10 + (uint8_t)d; + } else { + if (!(p - ps) || p - ps > 3 || n > 255 || o - os > 3) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + ps = p + 1; + *o++ = (uint8_t)n; + if (*p != '.') + break; + n = 0; + } + } + + if (is_contiguous((uint8_t)*p) || o - os != 4) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + parser->rdata->length += 4; + return ZONE_IP4; +} + +#endif // IP4_H diff --git a/src/fallback/name.h b/src/fallback/name.h index a862ae7..0223b62 100644 --- a/src/fallback/name.h +++ b/src/fallback/name.h @@ -9,84 +9,128 @@ #ifndef NAME_H #define NAME_H -#include - -static inline zone_return_t scan_name( +zone_nonnull_all +static zone_really_inline int32_t scan_name( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token, - uint8_t octets[256], + const uint8_t delimiters[256], + const token_t *token, + uint8_t octets[255 + ZONE_BLOCK_SIZE], size_t *length) { - size_t label = 0, octet = 1; - - (void)type; + uint8_t *l = octets, *b = octets + 1; + const uint8_t *bs = octets + 255; + const char *s = token->data; - for (size_t i=0; i < token->length; i++) { - if (octet >= 255) - SYNTAX_ERROR(parser, "Invalid name in %s, name exceeds maximum", - field->name.data); + l[0] = 0; - // FIXME: implement support for escape sequences + while (b < bs) { + const uint8_t c = (uint8_t)s[0]; + if (c == '\\') { + uint8_t d[3]; + d[0] = (uint8_t)s[1] - '0'; - switch (token->data[i]) { - case '.': - if (octet - 1 == label) - SYNTAX_ERROR(parser, "Invalid name in %s, empty label", - field->name.data); - // fall through - case '\0': - if ((octet - 1) - label > 63) - SYNTAX_ERROR(parser, "Invalid name in %s, label exceeds maximum", - field->name.data); - octets[label] = (uint8_t)((octet - label) - 1); - if (token->data[i] != '.') - break; - label = octet; - octets[octet++] = 0; - break; - default: - octets[octet++] = (unsigned char)token->data[i]; - break; + if (d[0] > 2) { + b[0] = (uint8_t)s[1]; + b += 1; s += 2; + } else { + uint8_t m = d[0] < 2 ? 9 : 5; + d[1] = (uint8_t)s[2] - '0'; + d[2] = (uint8_t)s[3] - '0'; + if (d[1] > m || d[2] > m) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + b[0] = d[0] * 100 + d[1] * 10 + d[0]; + b += 1; s += 4; + } + } else if (c == '.') { + if ((b - 1) - l > 63 || (b - 1) - l == 0) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + l[0] = (uint8_t)((b - 1) - l); + l = b; + l[0] = 0; + b += 1; s += 1; + } else if (delimiters[c] != token->code) { + if ((b - 1) - l > 63) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + l[0] = (uint8_t)((b - 1) - l); + break; + } else { + b[0] = c; + b += 1; s += 1; } } - *length = octet; - return 0; + if (delimiters[(uint8_t)*s] == token->code) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + *length = (size_t)(b - octets); + return l[0] == 0 ? 0 : ZONE_NAME; } -static inline void parse_name( +zone_nonnull_all +static zone_really_inline int32_t scan_contiguous_name( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + const token_t *token, + uint8_t octets[255 + ZONE_BLOCK_SIZE], + size_t *length) { - // a freestanding "@" denotes the current origin - if (token->length == 1 && token->data[0] == '@') { - memcpy(&parser->rdata->octets[parser->rdata->length], - parser->file->origin.octets, - parser->file->origin.length); - parser->rdata->length += parser->file->origin.length; - return; - } + return scan_name(parser, type, field, contiguous, token, octets, length); +} - size_t length; - uint8_t *data = &parser->rdata->octets[parser->rdata->length]; +zone_nonnull_all +static zone_really_inline int32_t scan_quoted_name( + zone_parser_t *parser, + const zone_type_info_t *type, + const zone_field_info_t *field, + const token_t *token, + uint8_t octets[255 + ZONE_BLOCK_SIZE], + size_t *length) +{ + return scan_name(parser, type, field, quoted, token, octets, length); +} - scan_name(parser, type, field, token, data, &length); - assert(length != 0); - if (data[length - 1] == 0) - return; +zone_nonnull_all +static zone_really_inline int32_t parse_name( + zone_parser_t *parser, + const zone_type_info_t *type, + const zone_field_info_t *field, + const token_t *token) +{ + int32_t r; + size_t n = 0; + uint8_t *o = &parser->rdata->octets[parser->rdata->length]; - if (length > 256 - parser->file->origin.length) - SYNTAX_ERROR(parser, "Invalid name in %s, exceeds 255 octets", field->name.data); + if (zone_likely(token->code == CONTIGUOUS)) { + // a freestanding "@" denotes the current origin + if (token->data[0] == '@' && !is_contiguous((uint8_t)token->data[1])) + goto relative; + r = scan_contiguous_name(parser, type, field, token, o, &n); + if (r == 0) + goto absolute; + if (r < 0) + return r; + } else if (token->code == QUOTED) { + r = scan_quoted_name(parser, type, field, token, o, &n); + if (r == 0) + goto absolute; + if (r < 0) + return r; + } else { + return have_string(parser, type, field, token); + } - parser->rdata->length += length; - memcpy(&parser->rdata->octets[parser->rdata->length], - parser->file->origin.octets, - parser->file->origin.length); - parser->rdata->length += parser->file->origin.length; +relative: + if (n > 255 - parser->file->origin.length) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + memcpy(o+n, parser->file->origin.octets, parser->file->origin.length); + parser->rdata->length += n + parser->file->origin.length; + return ZONE_NAME; +absolute: + parser->rdata->length += n; + return ZONE_NAME; } #endif // NAME_H diff --git a/src/fallback/parser.c b/src/fallback/parser.c index b1e6f4d..45b1116 100644 --- a/src/fallback/parser.c +++ b/src/fallback/parser.c @@ -6,21 +6,18 @@ * SPDX-License-Identifier: BSD-3-Clause * */ -#define _XOPEN_SOURCE -#include -#undef _XOPEN_SOURCE - #include "zone.h" #include "diagnostic.h" #include "log.h" #include "lexer.h" +#include "table.h" #include "fallback/scanner.h" #include "generic/number.h" #include "generic/ttl.h" #include "generic/time.h" #include "fallback/name.h" #include "fallback/type.h" -#include "generic/ip4.h" +#include "fallback/ip4.h" #include "generic/ip6.h" #include "fallback/text.h" #include "generic/base16.h" @@ -33,9 +30,9 @@ diagnostic_push() clang_diagnostic_ignored(missing-prototypes) -zone_return_t zone_fallback_parse(zone_parser_t *parser, void *user_data) +int32_t zone_fallback_parse(zone_parser_t *parser) { - return parse(parser, user_data); + return parse(parser); } diagnostic_pop() diff --git a/src/fallback/scanner.h b/src/fallback/scanner.h index 2df99ed..7ebdda4 100644 --- a/src/fallback/scanner.h +++ b/src/fallback/scanner.h @@ -1,5 +1,5 @@ /* - * scanner.h -- fallback (non-simd) lexical analyzer for (DNS) zone files + * scanner.h -- fallback (non-simd) lexical analyzer for (DNS) zone data * * Copyright (c) 2022-2023, NLnet Labs. All rights reserved. * @@ -13,290 +13,248 @@ #include #include -zone_always_inline() -zone_nonnull_all() -static inline const char *scan_comment( +zone_nonnull_all +static zone_really_inline const char *scan_comment( zone_parser_t *parser, const char *start, const char *end) { - for (; start < end; start++) { - if (*start == '\n') + while (start < end) { + if (zone_unlikely(*start == '\n')) return start; + start += 1; } - parser->file->indexer.in_comment = 1; + parser->file->state.in_comment = 1; return end; } -zone_always_inline() -zone_nonnull_all() -static inline const char *scan_quoted( +zone_nonnull_all +static zone_really_inline const char *scan_quoted( zone_parser_t *parser, const char *start, const char *end) { while (start < end) { - switch (*start) { - case '\\': - parser->file->indexer.newlines += start[1] == '\n'; - start += 2; - break; - case '\"': - *parser->file->indexer.tail++ = (zone_index_t){ start, 0 }; - return start + 1; - case '\n': - parser->file->indexer.newlines += 1; - start += 1; - break; - default: - start += 1; - break; + if (*start == '\\') { + parser->file->lines.tail[0] += *(start + 1) == '\n'; + start += 2; + } else if (*start == '\"') { + return start + 1; + } else if (*start == '\n') { + parser->file->lines.tail[0]++; + start += 1; + } else { + start += 1; } } - parser->file->indexer.newlines -= *end == '\n'; - parser->file->indexer.in_quoted = 1; - parser->file->indexer.is_escaped = (start > end); + parser->file->lines.tail[0] -= *end == '\n'; + parser->file->state.in_quoted = 1; + parser->file->state.is_escaped = (start > end); return end; } -zone_always_inline() -zone_nonnull_all() -static inline const char *scan_contiguous( +zone_nonnull_all +static zone_really_inline const char *scan_contiguous( zone_parser_t *parser, const char *start, const char *end) { while (start < end) { - switch (*start) { - case '\\': - parser->file->indexer.newlines += start[1] == '\n'; - start += 2; - break; - case '\n': - case '(': - case ')': - case '"': - return start; - case '\t': - case '\r': - case ' ': - *parser->file->indexer.tail++ = (zone_index_t){ start, 0 }; - return start + 1; - case ';': - *parser->file->indexer.tail++ = (zone_index_t){ start, 0 }; - return start; - default: + if (zone_likely(is_contiguous((uint8_t)*start))) { + if (zone_likely(*start != '\\')) { start += 1; - break; + } else { + parser->file->lines.tail[0] += *(start + 1) == '\n'; + start += 2; + } + } else { + return start; } } - parser->file->indexer.newlines -= *end == '\n'; - parser->file->indexer.is_escaped = (start > end); - parser->file->indexer.follows_contiguous = 1; + parser->file->lines.tail[0] -= *end == '\n'; + parser->file->state.is_escaped = (start > end); + parser->file->state.follows_contiguous = 1; return end; } -zone_always_inline() -zone_nonnull_all() -static inline void scan( +zone_nonnull_all +static zone_really_inline void scan( zone_parser_t *parser, const char *start, const char *end) { - zone_file_t *file = parser->file; - - if (file->indexer.is_escaped) { - file->indexer.is_escaped = 0; - file->indexer.newlines = *start++ == '\n'; + if (parser->file->state.is_escaped) { + parser->file->state.is_escaped = 0; + parser->file->lines.tail[0] += (*start++ == '\n'); } - if (file->indexer.in_comment) { - file->indexer.in_comment = 0; + if (parser->file->state.follows_contiguous) { + parser->file->state.follows_contiguous = 0; + start = scan_contiguous(parser, start, end); + } if (parser->file->state.in_comment) { + parser->file->state.in_comment = 0; start = scan_comment(parser, start, end); - } else if (file->indexer.in_quoted) { - file->indexer.in_quoted = 0; + } else if (parser->file->state.in_quoted) { + parser->file->state.in_quoted = 0; start = scan_quoted(parser, start, end); - } else if (file->indexer.follows_contiguous) { - file->indexer.follows_contiguous = 0; - start = scan_contiguous(parser, start, end); } while (start < end) { - switch (*start) { - case '\n': - *file->indexer.tail++ = - (zone_index_t){ start++, file->indexer.newlines }; - file->indexer.newlines = 0; - break; - case '\t': - case '\r': - case ' ': - start++; - break; - case ';': - start = scan_comment(parser, start, end); - break; - case '(': - case ')': - *file->indexer.tail++ = (zone_index_t){ start++, 0 }; - break; - case '"': - *file->indexer.tail++ = (zone_index_t){ start++, 0 }; - start = scan_quoted(parser, start, end); - break; - default: - *file->indexer.tail++ = (zone_index_t){ start, 0 }; - start = scan_contiguous(parser, start, end); - break; + const int32_t code = contiguous[(uint8_t)*start]; + if (code == BLANK) { + start++; + } else if (code == CONTIGUOUS) { + *parser->file->fields.tail++ = start; + start = scan_contiguous(parser, start, end); + } else if (code == LINE_FEED) { + if (parser->file->lines.tail[0]) + *parser->file->fields.tail++ = line_feed; + else + *parser->file->fields.tail++ = start; + start++; + } else if (code == QUOTED) { + *parser->file->fields.tail++ = start; + start = scan_quoted(parser, start, end); + } else if (code == LEFT_PAREN) { + *parser->file->fields.tail++ = start; + start++; + } else if (code == RIGHT_PAREN) { + *parser->file->fields.tail++ = start; + start++; + } else { + assert(code == COMMENT); + start = scan_comment(parser, start, end); } } } -zone_always_inline() -zone_nonnull_all() -static inline void refill(zone_parser_t *parser) -{ - zone_file_t *file = parser->file; - - // grow buffer if necessary - if (file->buffer.length == file->buffer.size) { - size_t size = file->buffer.size + ZONE_WINDOW_SIZE; - char *data = file->buffer.data; - if (!(data = realloc(data, size + 1))) - SYNTAX_ERROR(parser, "actually out of memory"); - file->buffer.size = size; - file->buffer.data = data; - } - - size_t count = fread(file->buffer.data + file->buffer.length, - sizeof(file->buffer.data[0]), - file->buffer.size - file->buffer.length, - file->handle); - - if (count == 0 && ferror(file->handle)) - SYNTAX_ERROR(parser, "actually a read error"); - - // always null-terminate so terminating token can point to something - file->buffer.length += (size_t)count; - file->buffer.data[file->buffer.length] = '\0'; - file->end_of_file = feof(file->handle) != 0; -} - -extern const uint8_t *zone_forward; -extern const uint8_t *zone_jump; - -zone_never_inline() -zone_nonnull_all() -static zone_return_t step(zone_parser_t *parser, zone_token_t *token) +zone_nonnull_all +static zone_never_inline void step(zone_parser_t *parser, token_t *token) { - zone_file_t *file = parser->file; - const char *start, *end; bool start_of_line = false; + const char *data_limit, **tape_limit; - // start of line is initially always true - if (file->indexer.tail == file->indexer.tape) + // start of line is initially true + if (parser->file->fields.tail == parser->file->fields.tape) start_of_line = true; - else if (*(end = file->indexer.tail[-1].data) == '\n') - start_of_line = (file->buffer.data + file->buffer.index) - end == 1; + else if (parser->file->fields.tail[-1][0] == '\n') + start_of_line = !is_blank((uint8_t)parser->file->fields.tail[-1][1]); - file->indexer.head = file->indexer.tape; - file->indexer.tail = file->indexer.tape; + // restore deferred line count + parser->file->lines.tape[0] = parser->file->lines.tail[0]; + parser->file->lines.head = parser->file->lines.tape; + parser->file->lines.tail = parser->file->lines.tape; + // restore (possibly) deferred field + parser->file->fields.tape[0] = parser->file->fields.tail[1]; + parser->file->fields.head = parser->file->fields.tape; + parser->file->fields.tail = parser->file->fields.tape; + if (parser->file->fields.tape[0]) + parser->file->fields.tail++; shuffle: // refill if required - if (file->end_of_file == ZONE_HAVE_DATA) { - memmove(file->buffer.data, - file->buffer.data + file->buffer.index, - file->buffer.length - file->buffer.index); - file->buffer.length -= file->buffer.index; - file->buffer.index = 0; - refill(parser); + if (parser->file->end_of_file == ZONE_HAVE_DATA) { + int32_t code; + const char *data; + if (parser->file->fields.head[0]) + data = parser->file->fields.head[0]; + else + data = parser->file->buffer.data + parser->file->buffer.index; + parser->file->fields.head[0] = parser->file->buffer.data; + const size_t length = + (size_t)((parser->file->buffer.data+parser->file->buffer.length) - data); + const size_t index = + (size_t)((parser->file->buffer.data+parser->file->buffer.index) - data); + memmove(parser->file->buffer.data, data, length); + parser->file->buffer.length = length; + parser->file->buffer.index = index; + parser->file->buffer.data[length] = '\0'; + if ((code = refill(parser)) < 0) + DEFER_ERROR(parser, token, code); } - start = file->buffer.data + file->buffer.index; - - while (file->buffer.length - file->buffer.index >= ZONE_BLOCK_SIZE) { - if ((file->indexer.tape + ZONE_TAPE_SIZE) - file->indexer.tail < ZONE_BLOCK_SIZE) + data_limit = parser->file->buffer.data + parser->file->buffer.length; + tape_limit = parser->file->fields.tape + ZONE_TAPE_SIZE; + for (;;) { + const char *data = parser->file->buffer.data + parser->file->buffer.index; + if (data_limit - data < ZONE_BLOCK_SIZE) + break; + if (tape_limit - parser->file->fields.tail < ZONE_BLOCK_SIZE) goto terminate; - const char *block = &file->buffer.data[file->buffer.index]; - scan(parser, block, block + ZONE_BLOCK_SIZE); - file->buffer.index += ZONE_BLOCK_SIZE; + scan(parser, data, data + ZONE_BLOCK_SIZE); + parser->file->buffer.index += ZONE_BLOCK_SIZE; } - const size_t length = file->buffer.length - file->buffer.index; - if (length > (size_t)((file->indexer.tape + ZONE_TAPE_SIZE) - file->indexer.tail)) + const size_t length = parser->file->buffer.length - parser->file->buffer.index; + assert(length <= ZONE_BLOCK_SIZE); + if (parser->file->end_of_file == ZONE_HAVE_DATA) + goto terminate; + if (length > (size_t)(tape_limit - parser->file->fields.tail)) goto terminate; - const char *block = &file->buffer.data[file->buffer.index]; - scan(parser, block, block + length); - file->buffer.index += length; - file->end_of_file = ZONE_NO_MORE_DATA; + const char *data = &parser->file->buffer.data[parser->file->buffer.index]; + scan(parser, data, data + length); + parser->file->buffer.index += length; + parser->file->end_of_file = ZONE_NO_MORE_DATA; terminate: - // ensure tape contains no partial tokens - if ((file->indexer.follows_contiguous || file->indexer.in_quoted) && file->end_of_file != ZONE_NO_MORE_DATA) { - assert(file->indexer.tail > file->indexer.tape); - assert(file->indexer.in_comment == 0); - file->indexer.tail--; - file->indexer.in_quoted = 0; - file->indexer.is_escaped = 0; - file->indexer.follows_contiguous = 0; - file->buffer.index = (size_t)(file->indexer.tail[0].data - file->buffer.data); + // make sure tape contains no partial tokens + if (parser->file->end_of_file == ZONE_NO_MORE_DATA) { + parser->file->fields.tail[1] = NULL; + } else if (parser->file->state.follows_contiguous || parser->file->state.in_quoted) { + parser->file->fields.tail[0] = parser->file->fields.tail[-1]; + parser->file->fields.tail--; + } else { + parser->file->fields.tail[1] = NULL; } - file->indexer.tail[0] = - (zone_index_t){ file->buffer.data + file->buffer.length, 0 }; - file->indexer.tail[1] = - (zone_index_t){ file->buffer.data + file->buffer.length, 0 }; - file->start_of_line = file->indexer.head[0].data == start && start_of_line; + parser->file->fields.tail[0] = data_limit; + if (parser->file->fields.head[0] == parser->file->buffer.data) + parser->file->start_of_line = start_of_line; + else + parser->file->start_of_line = false; - do { - start = file->indexer.head[0].data; - end = file->indexer.head[1].data; - assert(start < end || (start == end && *start == '\0' && *end == '\0')); + for (;;) { + data = *parser->file->fields.head; + token->data = data; + token->code = (int32_t)contiguous[ (uint8_t)*data ]; + // end-of-file is idempotent + parser->file->fields.head += (*data != '\0'); + if (zone_likely(token->code == CONTIGUOUS)) { + return; + } else if (token->code == LINE_FEED) { + if (zone_unlikely(token->data == line_feed)) + parser->file->span += *parser->file->lines.head++; + parser->file->span++; + if (parser->file->grouped) + continue; + parser->file->line += parser->file->span; + parser->file->span = 0; + parser->file->start_of_line = !is_blank((uint8_t)*(token->data+1)); + return; + } else if (token->code == QUOTED) { + token->data++; + return; + } else if (token->code == END_OF_FILE) { + zone_file_t *file; - switch (zone_jump[ (unsigned char)*start ]) { - case 0: // contiguous - *token = (zone_token_t){ (size_t)(end - start), start }; - // discard index for blank or semicolon - file->indexer.head += zone_forward[ (unsigned char)*end ]; - return ZONE_CONTIGUOUS; - case 1: // quoted - *token = (zone_token_t){ (size_t)(end - start), start + 1 }; - // discard index for closing quote - file->indexer.head += 2; - return ZONE_QUOTED; - case 2: // newline - file->line += file->indexer.head[0].newlines + 1; - file->indexer.head++; - if (file->grouped) - break; - file->start_of_line = (end - start) == 1; - *token = (zone_token_t){ 1, start }; - return ZONE_DELIMITER; - case 3: // end of file - if (file->end_of_file != ZONE_NO_MORE_DATA) - goto shuffle; - if (file->grouped) - SYNTAX_ERROR(parser, "Missing closing brace"); - assert(start == file->buffer.data + file->buffer.length); - assert(end == file->buffer.data + file->buffer.length); - if (file->includer) { - parser->file = file->includer; - parser->owner = &parser->file->owner; - zone_close_file(parser, file); - } - *token = (zone_token_t){ 1, zone_end_of_file }; - return ZONE_DELIMITER; - case 4: // left parenthesis - if (file->grouped) - SYNTAX_ERROR(parser, "Nested opening brace"); - file->grouped = true; - file->indexer.head++; - break; - case 5: // right parenthesis - if (!file->grouped) - SYNTAX_ERROR(parser, "Closing brace without opening brace"); - file->grouped = false; - file->indexer.head++; - break; + if (parser->file->end_of_file != ZONE_NO_MORE_DATA) + goto shuffle; + if (parser->file->grouped) + DEFER_SYNTAX_ERROR(parser, token, "Missing closing brace"); + if (!parser->file->includer) + return; + file = parser->file; + parser->file = parser->file->includer; + parser->owner = &parser->file->owner; + zone_close_file(parser, file); + return; + } else if (token->code == LEFT_PAREN) { + if (parser->file->grouped) + DEFER_SYNTAX_ERROR(parser, token, "Nested opening brace"); + parser->file->grouped = true; + } else { + assert(token->code == RIGHT_PAREN); + if (!parser->file->grouped) + DEFER_SYNTAX_ERROR(parser, token, "Missing opening brace"); + parser->file->grouped = false; } - } while (1); + } } #endif // SCANNER_H diff --git a/src/fallback/text.h b/src/fallback/text.h index 61b59cd..ff41248 100644 --- a/src/fallback/text.h +++ b/src/fallback/text.h @@ -9,57 +9,65 @@ #ifndef TEXT_H #define TEXT_H -zone_always_inline() -zone_nonnull_all() -static inline void parse_string( +zone_nonnull_all +static zone_really_inline int32_t parse_string_internal( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + const uint8_t delimiters[256], + const token_t *token) { - uint8_t *wire = &parser->rdata->octets[parser->rdata->length + 1]; - uint8_t *limit = wire + 255; - const char *text = token->data, *end = token->data + token->length; + uint8_t *b = &parser->rdata->octets[parser->rdata->length + 1]; + const uint8_t *bs = b + 255; + const char *s = token->data; - while (text < end && wire < limit) { - if (*text == '\\') { - uint8_t digits[3]; - digits[0] = (unsigned char)text[1] - '0'; + while (b < bs) { + const uint8_t c = (uint8_t)*s; + if (c == '\\') { + uint8_t d[3]; + d[0] = (uint8_t)s[1] - '0'; - if (digits[0] > 2) { - digits[1] = (unsigned char)text[2] - '0'; - digits[2] = (unsigned char)text[3] - '0'; - if (digits[0] < 2) { - if (digits[1] > 9 || digits[2] > 9) - SEMANTIC_ERROR(parser, "Invalid %s in %s, bad escape sequence", - field->name.data, type->name.data); - } else { - if (digits[1] > 5 || digits[2] > 5) - SEMANTIC_ERROR(parser, "Invalid %s in %s, bad escape sequence", - field->name.data, type->name.data); - } - - wire[0] = digits[0] * 100 + digits[1] * 10 + digits[0]; - wire += 1; - text += 4; + if (d[0] > 2) { + b[0] = (uint8_t)s[1]; + b += 1; s += 2; } else { - wire[0] = (unsigned char)text[1]; - wire += 1; - text += 2; + uint8_t m = d[0] < 2 ? 9 : 5; + d[1] = (uint8_t)s[2] - '0'; + d[2] = (uint8_t)s[3] - '0'; + if (d[1] > m || d[2] > m) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(type), NAME(field)); + b[0] = d[0] * 100 + d[1] * 10 + d[0]; + b += 1; s += 4; } + } else if (delimiters[c] != token->code) { + break; } else { - wire[0] = (unsigned char)text[0]; - text += 1; - wire += 1; + b[0] = c; + b += 1; s += 1; } } - if (text != end) - SYNTAX_ERROR(parser, "Invalid string in %s", - field->name.data); + if (delimiters[(uint8_t)*s] == token->code) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + parser->rdata->octets[parser->rdata->length] = (uint8_t)((b - parser->rdata->octets) - 1); + parser->rdata->length += (size_t)(b - parser->rdata->octets); + return ZONE_STRING; +} - parser->rdata->octets[parser->rdata->length] = (uint8_t)((wire - parser->rdata->octets) - 1); - parser->rdata->length += (size_t)(wire - parser->rdata->octets); +zone_nonnull_all +static zone_really_inline int32_t parse_string( + zone_parser_t *parser, + const zone_type_info_t *type, + const zone_field_info_t *field, + const token_t *token) +{ + if (token->code == QUOTED) + return parse_string_internal(parser, type, field, quoted, token); + else if (token->code == CONTIGUOUS) + return parse_string_internal(parser, type, field, contiguous, token); + else + return have_string(parser, type, field, token); } #endif // TEXT_H diff --git a/src/fallback/type.h b/src/fallback/type.h index d0648d0..da3730a 100644 --- a/src/fallback/type.h +++ b/src/fallback/type.h @@ -20,103 +20,97 @@ extern const zone_table_t *zone_identifiers; -zone_always_inline() -zone_nonnull_all() -static inline zone_return_t scan_type_or_class( +zone_nonnull_all +static zone_really_inline int32_t scan_type_or_class( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - const zone_token_t *token, + const token_t *token, uint16_t *code) { - const zone_symbol_t *symbol = NULL; - - if ((symbol = zone_lookup(zone_identifiers, token))) { - *code = symbol->value & 0xffff; - return symbol->value >> 16; - } else if (token->length > 4 && strncasecmp(token->data, "TYPE", 4) == 0) { - uint64_t v = 0; - for (size_t i=4; i < token->length; i++) { - const uint64_t n = (uint8_t)token->data[i] - '0'; - if (n > 9) - goto bad_type; - v = v * 10 + n; - if (v > UINT16_MAX) - goto bad_type; - } - - *code = (uint16_t)v; - return ZONE_TYPE; - } else if (token->length > 5 && strncasecmp(token->data, "CLASS", 5) == 0) { - uint64_t v = 0; - for (size_t i=5; i < token->length; i++) { - const uint64_t n = (uint8_t)token->data[i] - '0'; - if (n > 9) - goto bad_type; - v = v * 10 + n; - if (v > UINT16_MAX) - goto bad_type; - } - - *code = (uint16_t)v; - return ZONE_CLASS; + int32_t r; + const zone_symbol_t *s = NULL; + + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; + if ((s = lookup_symbol(zone_identifiers, token))) + return (void)(*code = s->value & 0xffff), s->value >> 16; + + if (strncasecmp(token->data, "TYPE", 4) == 0) + r = ZONE_TYPE; + else if (strncasecmp(token->data, "CLASS", 5) == 0) + r = ZONE_CLASS; + else + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + uint64_t n = 0; + const char *p, *q; + p = q = token->data + 4 + (r == ZONE_CLASS); + for (;; p++) { + const uint64_t d = (uint8_t)*p - '0'; + if (d > 9) + break; + n = n * 10 + d; } -bad_type: - SEMANTIC_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); + if (!n || n > UINT16_MAX || p - q >= 5 || is_contiguous((uint8_t)*p)) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + *code = (uint16_t)n; + return r; } -zone_always_inline() -zone_nonnull_all() -static inline zone_return_t scan_type( +zone_nonnull_all +static zone_really_inline int32_t scan_type( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - const zone_token_t *token, + const token_t *token, uint16_t *code) { - const zone_symbol_t *symbol = NULL; - - if ((symbol = zone_lookup(zone_identifiers, token))) { - if (symbol->value >> 16 != ZONE_TYPE) - goto bad_type; - *code = symbol->value & 0xffff; - return ZONE_TYPE; - } else if (token->length > 4 && strncasecmp(token->data, "TYPE", 4) == 0) { - uint64_t v = 0; - for (size_t i=4; i < token->length; i++) { - const uint64_t n = (uint8_t)token->data[i] - '0'; - if (n > 9) - goto bad_type; - v = v * 10 + n; - if (v > UINT16_MAX) - goto bad_type; - } - - *code = (uint16_t)v; - return ZONE_TYPE; + int32_t r; + const zone_symbol_t *s; + + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; + if ((s = lookup_symbol(zone_identifiers, token))) + return (void)(*code = s->value & 0xffff), s->value >> 16; + + if (strncasecmp(token->data, "TYPE", 4) != 0) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + uint64_t n = 0; + const char *p, *q; + p = q = token->data + 4; + for (;; p++) { + const uint64_t d = (uint8_t)*p - '0'; + if (d > 9) + break; + n = n * 10 + d; } -bad_type: - SEMANTIC_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + return ZONE_NAME; } -zone_always_inline() -zone_nonnull_all() -static inline void parse_type( +zone_nonnull_all +static zone_really_inline int32_t parse_type( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + token_t *token) { - uint16_t code; + int32_t r; + uint16_t c = 0; + + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; - scan_type(parser, type, field, token, &code); - code = htons(code); - memcpy(&parser->rdata->octets[parser->rdata->length], &code, sizeof(code)); - parser->rdata->length += sizeof(uint16_t); + scan_type(parser, type, field, token, &c); + c = htons(c); + memcpy(&parser->rdata->octets[parser->rdata->length], &c, sizeof(c)); + parser->rdata->length += sizeof(c); + return ZONE_NAME; } #endif // TYPE_H diff --git a/src/generic/base16.h b/src/generic/base16.h index 7927fce..056ab0d 100644 --- a/src/generic/base16.h +++ b/src/generic/base16.h @@ -15,95 +15,91 @@ static const uint8_t b16rmap_special = 0xf0; static const uint8_t b16rmap_end = 0xfd; static const uint8_t b16rmap_space = 0xfe; -zone_always_inline() -zone_nonnull_all() -static inline void parse_base16( +zone_nonnull_all +static zone_really_inline int32_t parse_base16( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + token_t *token) { + int32_t r; uint32_t state = 0; + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; + do { - for (size_t i=0; i < token->length; i++) { - const uint8_t ofs = zone_b16rmap[(uint8_t)token->data[i]]; - - if (ofs >= b16rmap_special) { - // ignore whitespace - if (ofs == b16rmap_space) - continue; - // end of base16 characters - if (ofs == b16rmap_end) - break; - SEMANTIC_ERROR(parser, "Invalid %s in %s record", - field->name.data, type->name.data); - } - - if (state == 0) { + const char *p = token->data; + for (;; p++) { + const uint8_t ofs = zone_b16rmap[(uint8_t)*p]; + + if (ofs >= b16rmap_special) + break; + + if (state == 0) parser->rdata->octets[parser->rdata->length] = (uint8_t)(ofs << 4); - state = 1; - } else { + else parser->rdata->octets[parser->rdata->length++] |= ofs; - state = 0; - } + + state = !state; } - } while (lex(parser, token)); + + if (is_contiguous((uint8_t)*p)) + SYNTAX_ERROR(parser, "Invalid %s in %s record", NAME(field), NAME(type)); + lex(parser, token); + } while (token->code == CONTIGUOUS); if (state != 0) - SEMANTIC_ERROR(parser, "Invalid %s in %s record", - field->name.data, type->name.data); + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + if ((r = have_delimiter(parser, type, token)) < 0) + return r; + + return ZONE_BLOB; } -zone_always_inline() -zone_nonnull_all() -static inline void parse_salt( +zone_nonnull_all +static zone_really_inline int32_t parse_salt( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + token_t *token) { + int32_t r; uint32_t state = 0; - if (token->length == 1 && token->data[0] == '-') { + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; + + const char *p = token->data; + + if (*p == '-' && contiguous[ (uint8_t)*(p+1) ] != CONTIGUOUS) { parser->rdata->octets[parser->rdata->length++] = 0; - return; + return ZONE_STRING; } - if (token->length > 2 * 255) - SEMANTIC_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); - size_t rdlength = parser->rdata->length++; - for (size_t i=0; i < token->length; i++) { - const uint8_t ofs = zone_b16rmap[(uint8_t)token->data[i]]; + for (;; p++) { + const uint8_t ofs = zone_b16rmap[(uint8_t)*p]; - if (ofs >= b16rmap_special) { - // ignore whitespace - if (ofs == b16rmap_space) - continue; - // end of base16 characters - if (ofs == b16rmap_end) - break; - SEMANTIC_ERROR(parser, "Invalid %s in %s record", - field->name.data, type->name.data); - } + if (ofs >= b16rmap_special) + break; - if (state == 0) { + if (state == 0) parser->rdata->octets[parser->rdata->length] = (uint8_t)(ofs << 4); - state = 1; - } else { + else parser->rdata->octets[parser->rdata->length++] |= ofs; - state = 0; - } + + state = !state; } + if (p == token->data || contiguous[ (uint8_t)*p ] == CONTIGUOUS) + SYNTAX_ERROR(parser, "Invalid %s in %s record", NAME(field), NAME(type)); if (state != 0) - SEMANTIC_ERROR(parser, "Invalid %s in %s record", - field->name.data, type->name.data); + SYNTAX_ERROR(parser, "Invalid %s in %s record", NAME(field), NAME(type)); parser->rdata->octets[rdlength] = (uint8_t)(parser->rdata->length - rdlength); + return ZONE_STRING; } #endif // BASE16_H diff --git a/src/generic/base32.h b/src/generic/base32.h index 8505ac0..58c3016 100644 --- a/src/generic/base32.h +++ b/src/generic/base32.h @@ -15,114 +15,100 @@ static const uint8_t b32rmap_special = 0xf0; static const uint8_t b32rmap_end = 0xfd; static const uint8_t b32rmap_space = 0xfe; -zone_always_inline() -zone_nonnull_all() -static inline void parse_base32( +zone_nonnull_all +static zone_really_inline int32_t parse_base32( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + token_t *token) { + int32_t r; uint32_t state = 0; - size_t i = 0; + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; - for (; i < token->length; i++) { - const uint8_t ofs = zone_b32rmap[(uint8_t)token->data[i]]; + const char *p = token->data; + for (;; p++) { + const uint8_t ofs = zone_b32rmap[(uint8_t)*p]; - if (ofs >= b32rmap_special) { - // ignore whitespace - if (ofs == b32rmap_space) - continue; - // end of base32 characters - if (ofs == b32rmap_end) - break; - goto bad_char; - } + if (ofs >= b32rmap_special) + break; - switch (state) { - case 0: - parser->rdata->octets[parser->rdata->length ] = (uint8_t)(ofs << 3); - state = 1; - break; - case 1: - parser->rdata->octets[parser->rdata->length++] |= (uint8_t)(ofs >> 2); - parser->rdata->octets[parser->rdata->length ] = (uint8_t)(ofs << 6); - state = 2; - break; - case 2: - parser->rdata->octets[parser->rdata->length ] |= (uint8_t)(ofs << 1); - state = 3; - break; - case 3: - parser->rdata->octets[parser->rdata->length++] |= (uint8_t)(ofs >> 4); - parser->rdata->octets[parser->rdata->length ] = (uint8_t)(ofs << 4); - state = 4; - break; - case 4: - parser->rdata->octets[parser->rdata->length++] |= (uint8_t)(ofs >> 1); - parser->rdata->octets[parser->rdata->length ] = (uint8_t)(ofs << 7); - state = 5; - break; - case 5: - parser->rdata->octets[parser->rdata->length ] |= (uint8_t)(ofs << 2); - state = 6; - break; - case 6: - parser->rdata->octets[parser->rdata->length++] |= (uint8_t)(ofs >> 3); - parser->rdata->octets[parser->rdata->length ] = (uint8_t)(ofs << 5); - state = 7; - break; - case 7: - parser->rdata->octets[parser->rdata->length++] |= ofs; - state = 0; - break; - } + switch (state) { + case 0: + parser->rdata->octets[parser->rdata->length ] = (uint8_t)(ofs << 3); + state = 1; + break; + case 1: + parser->rdata->octets[parser->rdata->length++] |= (uint8_t)(ofs >> 2); + parser->rdata->octets[parser->rdata->length ] = (uint8_t)(ofs << 6); + state = 2; + break; + case 2: + parser->rdata->octets[parser->rdata->length ] |= (uint8_t)(ofs << 1); + state = 3; + break; + case 3: + parser->rdata->octets[parser->rdata->length++] |= (uint8_t)(ofs >> 4); + parser->rdata->octets[parser->rdata->length ] = (uint8_t)(ofs << 4); + state = 4; + break; + case 4: + parser->rdata->octets[parser->rdata->length++] |= (uint8_t)(ofs >> 1); + parser->rdata->octets[parser->rdata->length ] = (uint8_t)(ofs << 7); + state = 5; + break; + case 5: + parser->rdata->octets[parser->rdata->length ] |= (uint8_t)(ofs << 2); + state = 6; + break; + case 6: + parser->rdata->octets[parser->rdata->length++] |= (uint8_t)(ofs >> 3); + parser->rdata->octets[parser->rdata->length ] = (uint8_t)(ofs << 5); + state = 7; + break; + case 7: + parser->rdata->octets[parser->rdata->length++] |= ofs; + state = 0; + break; } + } - if (i < token->length) { - assert(token->data[i] == '='); - for (; i < token->length ; i++) { - if (zone_b32rmap[(uint8_t)token->data[i]] == b32rmap_space) - continue; - if (token->data[i] != '=') - goto bad_char; - - switch (state) { - case 0: // invalid - case 1: - case 3: - case 6: - goto bad_char; - case 2: // require six pad characters - state = 13; - continue; - case 4: // require four pad characters - state = 11; - continue; - case 5: // require three pad characters - state = 10; - break; - case 7: // require one pad character - state = 8; - break; - default: - if (state == 8) - goto bad_char; - assert(state > 8); - state--; - break; - } - } + for (; *p == '-'; p++) { + switch (state) { + case 0: // invalid + case 1: + case 3: + case 6: + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + case 2: // require six pad characters + state = 13; + continue; + case 4: // require four pad characters + state = 11; + continue; + case 5: // require three pad characters + state = 10; + break; + case 7: // require one pad character + state = 8; + break; + default: + if (state == 8) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + assert(state > 8); + state--; + break; } + } + if (contiguous[ (uint8_t)*p ] == CONTIGUOUS) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); if (state != 0 && state != 8) - SEMANTIC_ERROR(parser, "Invalid %s in %s record", - field->name.data, type->name.data); + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); - return; -bad_char: - SEMANTIC_ERROR(parser, "Invalid base32 sequence"); + return ZONE_STRING; } #endif // BASE32_H diff --git a/src/generic/base64.h b/src/generic/base64.h index 055a81d..8f9ed36 100644 --- a/src/generic/base64.h +++ b/src/generic/base64.h @@ -11,38 +11,31 @@ extern const uint8_t *zone_b64rmap; -static const char Pad64 = '='; - static const uint8_t b64rmap_special = 0xf0; static const uint8_t b64rmap_end = 0xfd; static const uint8_t b64rmap_space = 0xfe; -zone_always_inline() -zone_nonnull_all() -static inline void parse_base64( +zone_nonnull_all +static zone_really_inline int32_t parse_base64( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + token_t *token) { + int32_t r; uint32_t state = 0; + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; + do { - size_t i=0; + const char *p = token->data; - for (; i < token->length; i++) { - const uint8_t ofs = zone_b64rmap[(uint8_t)token->data[i]]; + for (;; p++) { + const uint8_t ofs = zone_b64rmap[(uint8_t)*p]; - if (ofs >= b64rmap_special) { - // ignore whitespaces - if (ofs == b64rmap_space) - continue; - // end of base64 characters - if (ofs == b64rmap_end) - break; - // non-base64 character - goto bad_char; - } + if (ofs >= b64rmap_special) + break; switch (state) { case 0: @@ -65,58 +58,41 @@ static inline void parse_base64( state = 0; break; default: - goto bad_char; + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); } } - assert(i == token->length || token->data[i] == Pad64); - if (i < token->length) { + if (*p == '=') { switch (state) { case 0: // invalid, pad character in first position case 1: // invalid, pad character in second position - goto bad_char; - + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); case 2: // valid, one byte of info state = 4; - // fall through - case 4: - for (++i; i < token->length; i++) { - const uint8_t ofs = zone_b64rmap[(uint8_t)token->data[i]]; - if (ofs == b64rmap_space) - continue; - if (ofs == b64rmap_end) - break; - goto bad_char; - } - - if (i == token->length) + if (*p++ != '=') break; // fall through - case 3: // valid, two bytes of info + case 4: state = 5; - // fall through - case 5: - for (++i; i < token->length; i++) { - const uint8_t ofs = zone_b64rmap[(uint8_t)token->data[i]]; - if (ofs == b64rmap_space) - continue; - goto bad_char; - } + p++; + break; + default: break; } } - } while (lex(parser, token)); - if (state != 0 && state != 5) - SEMANTIC_ERROR(parser, "Invalid %s in %s record", - field->name.data, type->name.data); + if (is_contiguous((uint8_t)*p)) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + lex(parser, token); + } while (token->code == CONTIGUOUS); - return; + if ((r = have_delimiter(parser, type, token)) < 0) + return r; + if (state != 0 && state != 5) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); -bad_char: - SEMANTIC_ERROR(parser, "Invalid %s in %s record", - field->name.data, type->name.data); + return ZONE_BLOB; } #endif // BASE64_H diff --git a/src/generic/ip4.h b/src/generic/ip4.h deleted file mode 100644 index 00d7dbb..0000000 --- a/src/generic/ip4.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * ip4.h -- fallback parser for IPv4 addresses - * - * Copyright (c) 2022-2023, NLnet Labs. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - * - */ -#ifndef IP4_H -#define IP4_H - -#if _WIN32 -#include -#else -#include -#include -#include -#endif - -zone_always_inline() -zone_nonnull_all() -static inline void parse_ip4( - zone_parser_t *parser, - const zone_type_info_t *type, - const zone_field_info_t *field, - zone_token_t *token) -{ - char buf[INET_ADDRSTRLEN + 1]; - - if (token->length > INET_ADDRSTRLEN) - SEMANTIC_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); - - memcpy(buf, token->data, token->length); - buf[token->length] = '\0'; - if (inet_pton(AF_INET, buf, &parser->rdata->octets[parser->rdata->length]) != 1) - SEMANTIC_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); - parser->rdata->length += sizeof(struct in_addr); -} - -#endif // IP4_H diff --git a/src/generic/ip6.h b/src/generic/ip6.h index 560794e..5eee793 100644 --- a/src/generic/ip6.h +++ b/src/generic/ip6.h @@ -9,34 +9,181 @@ #ifndef IP6_H #define IP6_H -#if _WIN32 -#include -#else -#include -#include -#include +#ifndef NS_INT16SZ +#define NS_INT16SZ 2 #endif -zone_always_inline() -zone_nonnull_all() -static inline void parse_ip6( +#ifndef NS_IN6ADDRSZ +#define NS_IN6ADDRSZ 16 +#endif + +#ifndef NS_INADDRSZ +#define NS_INADDRSZ 4 +#endif + +/* int + * inet_pton4(src, dst) + * like inet_aton() but without all the hexadecimal and shorthand. + * return: + * 1 if `src' is a valid dotted quad, else 0. + * notice: + * does not touch `dst' unless it's returning 1. + * author: + * Paul Vixie, 1996. + */ +static int +inet_pton4(const char *src, uint8_t *dst) +{ + static const char digits[] = "0123456789"; + int saw_digit, octets, ch; + uint8_t tmp[NS_INADDRSZ], *tp; + + saw_digit = 0; + octets = 0; + *(tp = tmp) = 0; + while (contiguous[ (ch = *src++) ] == CONTIGUOUS) { + const char *pch; + + if ((pch = strchr(digits, ch)) != NULL) { + uint32_t new = *tp * 10 + (uint32_t)(pch - digits); + + if (new > 255) + return (0); + *tp = (uint8_t)new; + if (! saw_digit) { + if (++octets > 4) + return (0); + saw_digit = 1; + } + } else if (ch == '.' && saw_digit) { + if (octets == 4) + return (0); + *++tp = 0; + saw_digit = 0; + } else + return (0); + } + if (octets < 4) + return (0); + + memcpy(dst, tmp, NS_INADDRSZ); + return (1); +} + +/* int + * inet_pton6(src, dst) + * convert presentation level address to network order binary form. + * return: + * 1 if `src' is a valid [RFC1884 2.2] address, else 0. + * notice: + * (1) does not touch `dst' unless it's returning 1. + * (2) :: in a full address is silently ignored. + * credit: + * inspired by Mark Andrews. + * author: + * Paul Vixie, 1996. + */ +static int +inet_pton6(const char *src, uint8_t *dst) +{ + static const char xdigits_l[] = "0123456789abcdef", + xdigits_u[] = "0123456789ABCDEF"; + uint8_t tmp[NS_IN6ADDRSZ], *tp, *endp, *colonp; + const char *xdigits, *curtok; + int ch, saw_xdigit; + uint32_t val; + + memset((tp = tmp), '\0', NS_IN6ADDRSZ); + endp = tp + NS_IN6ADDRSZ; + colonp = NULL; + /* Leading :: requires some special handling. */ + if (*src == ':') + if (*++src != ':') + return (0); + curtok = src; + saw_xdigit = 0; + val = 0; + while (contiguous[ (ch = *src++) ] == CONTIGUOUS) { + const char *pch; + + if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL) + pch = strchr((xdigits = xdigits_u), ch); + if (pch != NULL) { + val <<= 4; + val |= (pch - xdigits); + if (val > 0xffff) + return (0); + saw_xdigit = 1; + continue; + } + if (ch == ':') { + curtok = src; + if (!saw_xdigit) { + if (colonp) + return (0); + colonp = tp; + continue; + } + if (tp + NS_INT16SZ > endp) + return (0); + *tp++ = (uint8_t) (val >> 8) & 0xff; + *tp++ = (uint8_t) val & 0xff; + saw_xdigit = 0; + val = 0; + continue; + } + if (ch == '.' && ((tp + NS_INADDRSZ) <= endp) && + inet_pton4(curtok, tp) > 0) { + tp += NS_INADDRSZ; + saw_xdigit = 0; + break; /* '\0' was seen by inet_pton4(). */ + } + return (0); + } + if (saw_xdigit) { + if (tp + NS_INT16SZ > endp) + return (0); + *tp++ = (uint8_t) (val >> 8) & 0xff; + *tp++ = (uint8_t) val & 0xff; + } + if (colonp != NULL) { + /* + * Since some memmove()'s erroneously fail to handle + * overlapping regions, we'll do the shift by hand. + */ + const int n = (int)(tp - colonp); + int i; + + for (i = 1; i <= n; i++) { + endp[- i] = colonp[n - i]; + colonp[n - i] = 0; + } + tp = endp; + } + if (tp != endp) + return (0); + memcpy(dst, tmp, NS_IN6ADDRSZ); + return (1); +} + +zone_nonnull_all +static zone_really_inline int32_t parse_ip6( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + token_t *token) { - char buf[INET6_ADDRSTRLEN + 1]; - - if (token->length > INET6_ADDRSTRLEN) - SEMANTIC_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); - - memcpy(buf, token->data, token->length); - buf[token->length] = '\0'; - if (inet_pton(AF_INET6, buf, &parser->rdata->octets[parser->rdata->length]) != 1) - SEMANTIC_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); - parser->rdata->length += sizeof(struct in6_addr); + int32_t r; + + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; + + if (inet_pton6(token->data, &parser->rdata->octets[parser->rdata->length]) == 1) { + parser->rdata->length += 16; + return ZONE_IP6; + } + + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); } #endif // IP6_H diff --git a/src/generic/name.h b/src/generic/name.h index fd5d168..32ea0a2 100644 --- a/src/generic/name.h +++ b/src/generic/name.h @@ -11,58 +11,64 @@ typedef struct name_block name_block_t; struct name_block { - size_t length; - uint64_t escape_bits; - uint64_t label_bits; + delimited_t delimited; + uint64_t backslash; + uint64_t label; }; -zone_always_inline() -zone_nonnull_all() -static inline void copy_name_block( - name_block_t *block, const char *text, size_t size, uint8_t *wire) +zone_nonnull_all +static zone_really_inline void copy_name_block( + name_block_t *block, + const simd_table_t delimiter, + const simd_table_t space, + const char *source, + uint8_t *destination) { - simd_8x_t input; - - simd_loadu_8x(&input, (const uint8_t *)text); - simd_storeu_8x(wire, &input); - - block->length = size < SIMD_8X_SIZE ? size : SIMD_8X_SIZE; - const uint64_t mask = (1llu << block->length) - 1; - block->escape_bits = simd_find_8x(&input, '\\') & mask; - block->label_bits = simd_find_8x(&input, '.') & mask; + copy_and_scan_delimited( + &block->delimited, delimiter, space, source, destination); + block->backslash = simd_find_8x(&block->delimited.input, '\\'); + block->label = simd_find_8x(&block->delimited.input, '.'); } -zone_always_inline() -zone_nonnull_all() -static inline void scan_name( +zone_nonnull_all +static zone_really_inline int32_t scan_name( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token, + const simd_table_t delimiter, + const simd_table_t space, + const token_t *token, uint8_t octets[255 + ZONE_BLOCK_SIZE], size_t *length) { name_block_t block; uint8_t *wire = octets + 1, *label = octets; - const char *text = token->data, *limit = token->data + token->length; + const char *text = token->data; *label = 0; - while (text < limit) { - copy_name_block(&block, text, (size_t)(limit - text), wire); + for (bool loop=true; loop; ) { + copy_name_block(&block, delimiter, space, text, wire); - if (block.escape_bits) { - const uint64_t count = trailing_zeroes(block.escape_bits); + uint64_t size; + if (!(block.backslash & (block.delimited.delimiter - 1))) { + block.label &= block.delimited.delimiter - 1; + size = trailing_zeroes(block.delimited.delimiter | (1llu << SIMD_8X_SIZE)); + loop = !block.delimited.delimiter; + text += size; + wire += size; + } else { + size = trailing_zeroes(block.backslash); uint8_t digits[3]; - digits[0] = (unsigned char)text[count + 1] - '0'; + digits[0] = (unsigned char)text[size + 1] - '0'; if (digits[0] > 2) { - wire[count] = (unsigned char)text[count + 1]; - wire += count + 1; - text += count + 2; + wire[size] = (unsigned char)text[size + 1]; + wire += size + 1; + text += size + 2; } else { - digits[1] = (unsigned char)text[count + 2] - '0'; - digits[2] = (unsigned char)text[count + 3] - '0'; + digits[1] = (unsigned char)text[size + 2] - '0'; + digits[2] = (unsigned char)text[size + 3] - '0'; if (digits[0] < 2) { if (digits[1] > 9 || digits[2] > 9) SEMANTIC_ERROR(parser, "Bad escape sequence in %s of %s record", @@ -73,28 +79,24 @@ static inline void scan_name( field->name.data, type->name.data); } - wire[count] = digits[0] * 100 + digits[1] * 10 + digits[0]; - wire += count + 1; - text += count + 4; + wire[size] = digits[0] * 100 + digits[1] * 10 + digits[0]; + wire += size + 1; + text += size + 4; } - block.length = count; - block.label_bits &= block.escape_bits - 1; - } else { - text += block.length; - wire += block.length; + block.label &= block.backslash - 1; } if (wire - octets > 255) SEMANTIC_ERROR(parser, "Bad domain name in %s of %s", field->name.data, type->name.data); - if (block.label_bits) { + if (block.label) { uint64_t count = 0, last = 0; - const uint64_t labels = count_ones(block.label_bits); + const uint64_t labels = count_ones(block.label); for (uint64_t i = 0; i < labels; i++) { - count = trailing_zeroes(block.label_bits) - last; - block.label_bits = clear_lowest_bit(block.label_bits); + count = trailing_zeroes(block.label) - last; + block.label = clear_lowest_bit(block.label); *label += count; if (!*label || *label > 63) SEMANTIC_ERROR(parser, "Bad domain name in %s of %s record", @@ -106,7 +108,7 @@ static inline void scan_name( } *label += (wire - label) - 1; } else { - *label += (uint8_t)block.length; + *label += (uint8_t)size; if (*label > 63) SEMANTIC_ERROR(parser, "Bad domain name in %s of %s record", field->name.data, type->name.data); @@ -119,41 +121,73 @@ static inline void scan_name( } *length = (size_t)(wire - octets); + if (!*label) + return 0; + return ZONE_NAME; } -zone_always_inline() -zone_nonnull_all() -static inline void parse_name( +zone_nonnull_all +static zone_really_inline int32_t scan_contiguous_name( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + const token_t *token, + uint8_t octets[255 + ZONE_BLOCK_SIZE], + size_t *length) { - // a freestanding "@" denotes the current origin - if (token->length == 1 && token->data[0] == '@') { - memcpy(&parser->rdata->octets[parser->rdata->length], - parser->file->origin.octets, - parser->file->origin.length); - parser->rdata->length += parser->file->origin.length; - return; - } - - size_t length; - uint8_t *data = &parser->rdata->octets[parser->rdata->length]; + return scan_name( + parser, type, field, non_contiguous, blank, token, octets, length); +} - scan_name(parser, type, field, token, data, &length); - parser->rdata->length += length; - assert(length != 0); - if (data[length - 1] == 0) - return; +zone_nonnull_all +static zone_really_inline int32_t scan_quoted_name( + zone_parser_t *parser, + const zone_type_info_t *type, + const zone_field_info_t *field, + const token_t *token, + uint8_t octets[255 + ZONE_BLOCK_SIZE], + size_t *length) +{ + return scan_name( + parser, type, field, non_quoted, non_quoted, token, octets, length); +} - if (length > 256 - parser->file->origin.length) - SYNTAX_ERROR(parser, "Invalid name in %s, exceeds 255 octets", field->name.data); +zone_nonnull_all +static zone_really_inline int32_t parse_name( + zone_parser_t *parser, + const zone_type_info_t *type, + const zone_field_info_t *field, + const token_t *token) +{ + int32_t r; + size_t n = 0; + uint8_t *o = &parser->rdata->octets[parser->rdata->length]; + + if (zone_likely(token->code == CONTIGUOUS)) { + // a freestanding "@" denotes the current origin + if (token->data[0] == '@' && !is_contiguous((uint8_t)token->data[1])) + goto relative; + r = scan_contiguous_name(parser, type, field, token, o, &n); + if (r == 0) + return (void)(parser->rdata->length += n), ZONE_NAME; + if (r < 0) + return r; + } else if (token->code == QUOTED) { + r = scan_quoted_name(parser, type, field, token, o, &n); + if (r == 0) + return (void)(parser->rdata->length += n), ZONE_NAME; + if (r < 0) + return r; + } else { + return have_string(parser, type, field, token); + } - memcpy(&parser->rdata->octets[parser->rdata->length], - parser->file->origin.octets, - parser->file->origin.length); - parser->rdata->length += parser->file->origin.length; +relative: + if (n > 255 - parser->file->origin.length) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + memcpy(o+n, parser->file->origin.octets, parser->file->origin.length); + parser->rdata->length += n + parser->file->origin.length; + return ZONE_NAME; } #endif // NAME_H diff --git a/src/generic/nsec.h b/src/generic/nsec.h index b77683a..54965a8 100644 --- a/src/generic/nsec.h +++ b/src/generic/nsec.h @@ -11,13 +11,12 @@ typedef uint8_t zone_nsec_t[256 + 2]; -zone_always_inline() -zone_nonnull_all() -static inline void parse_nsec( +zone_nonnull_all +static zone_really_inline int32_t parse_nsec( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + token_t *token) { uint16_t code; uint16_t highest_bit = 0; @@ -46,7 +45,8 @@ static inline void parse_nsec( if (bit > bitmap[window][1]) bitmap[window][1] = bit; bitmap[window][2 + bit / 8] |= (1 << (7 - bit % 8)); - } while (lex(parser, token)); + lex(parser, token); + } while (token->code == CONTIGUOUS); // iterate and compress all (maybe 256) windows size_t length = 0; @@ -62,6 +62,7 @@ static inline void parse_nsec( } parser->rdata->length += length; + return ZONE_TYPE_BITMAP; } #endif // NSEC_H diff --git a/src/generic/number.h b/src/generic/number.h index d9ef590..d2eea7f 100644 --- a/src/generic/number.h +++ b/src/generic/number.h @@ -15,112 +15,129 @@ #include #endif -zone_always_inline() -zone_nonnull_all() -static inline zone_return_t parse_int8( +zone_nonnull_all +static zone_really_inline int32_t parse_symbol( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + token_t *token) { - uint64_t v = 0; - zone_symbol_t *symbol; - - for (size_t i=0; i < token->length; i++) { - const uint64_t n = (unsigned char)token->data[i] - '0'; - if (n > 9) - goto parse_symbol; - v = (v * 10) + n; - if (v > UINT8_MAX) - SEMANTIC_ERROR(parser, "Invalid %s in %s, value exceeds maximum", - field->name.data, type->name.data); + int32_t r; + + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; + + uint64_t n = 0; + const char *p = token->data; + for (;; p++) { + const uint64_t d = (uint8_t)*p - '0'; + if (d > 9) + break; + n = n * 10 + d; + } + + if (is_contiguous((uint8_t)*p)) { + const zone_symbol_t *s; + if (!(s = lookup_symbol(&field->symbols, token))) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + n = (uint8_t)s->value; + } else { + if (n > UINT8_MAX || p - token->data > 3) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); } - parser->rdata->octets[parser->rdata->length] = (uint8_t)v; + parser->rdata->octets[parser->rdata->length] = (uint8_t)n; parser->rdata->length += sizeof(uint8_t); - return 0; -parse_symbol: - if (!(symbol = zone_lookup(&field->symbols, token))) - SYNTAX_ERROR(parser, "Invalid %s in %s, not a number", - field->name.data, type->name.data); - assert(symbol->value <= UINT8_MAX); - parser->rdata->octets[parser->rdata->length] = (uint8_t)symbol->value; + return ZONE_INT8; +} + +zone_nonnull_all +static zone_really_inline int32_t parse_int8( + zone_parser_t *parser, + const zone_type_info_t *type, + const zone_field_info_t *field, + token_t *token) +{ + int32_t r; + + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; + + uint64_t n = 0; + const char *p = token->data; + for (;; p++) { + const uint64_t d = (uint8_t)*p - '0'; + if (d > 9) + break; + n = n * 10 + d; + } + + if (n > UINT8_MAX || p - token->data > 3 || is_contiguous((uint8_t)*p)) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + parser->rdata->octets[parser->rdata->length] = (uint8_t)n; parser->rdata->length += sizeof(uint8_t); - return 0; + return ZONE_INT8; } -zone_always_inline() -zone_nonnull_all() -static inline zone_return_t parse_int16( +zone_nonnull_all +static zone_really_inline int32_t parse_int16( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + token_t *token) { - uint64_t v = 0; - uint16_t v16; - zone_symbol_t *symbol; - - for (size_t i=0; i < token->length; i++) { - const uint64_t n = (unsigned char)token->data[i] - '0'; - if (n > 9) - goto parse_symbol; - v = (v * 10) + n; - if (v > UINT16_MAX) - SEMANTIC_ERROR(parser, "Invalid %s in %s, value exceeds maximum", - field->name.data, type->name.data); + int32_t r; + + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; + + uint64_t n = 0; + const char *p = token->data; + for (;; p++) { + const uint64_t d = (uint8_t)*p - '0'; + if (d > 9) + break; + n = n * 10 + d; } - v16 = htons((uint16_t)v); - memcpy(&parser->rdata->octets[parser->rdata->length], &v16, sizeof(v16)); - parser->rdata->length += sizeof(uint16_t); - return 0; -parse_symbol: - if (!(symbol = zone_lookup(&field->symbols, token))) - SYNTAX_ERROR(parser, "Invalid %s in %s, not a number", - field->name.data, type->name.data); - assert(symbol->value <= UINT16_MAX); - v16 = htons((uint16_t)symbol->value); - memcpy(&parser->rdata->octets[parser->rdata->length], &v16, sizeof(v16)); - parser->rdata->length += sizeof(uint16_t); - return 0; + if (n > UINT16_MAX || p - token->data > 5 || is_contiguous((uint8_t)*p)) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + uint16_t n16 = htons((uint16_t)n); + memcpy(&parser->rdata->octets[parser->rdata->length], &n16, sizeof(n16)); + parser->rdata->length += sizeof(n16); + return ZONE_INT16; } -zone_always_inline() -zone_nonnull_all() -static inline zone_return_t parse_int32( +zone_nonnull_all +static zone_really_inline zone_return_t parse_int32( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + token_t *token) { - uint64_t v = 0; - uint32_t v32; - zone_symbol_t *symbol; - - for (size_t i=0; i < token->length; i++) { - const uint64_t n = (unsigned char)token->data[i] - '0'; - if (n > 9) - goto parse_symbol; - v = (v * 10) + n; - if (v > UINT32_MAX) - SEMANTIC_ERROR(parser, "Invalid %s in %s, value exceeds maximum", - field->name.data, type->name.data); + int32_t r; + + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; + + uint64_t n = 0; + const char *p = token->data; + for (;; p++) { + const uint64_t d = (uint8_t)*p - '0'; + if (d > 9) + break; + n = n * 10 + d; } - v32 = htonl((uint32_t)v); - memcpy(&parser->rdata->octets[parser->rdata->length], &v32, sizeof(v32)); - parser->rdata->length += sizeof(uint32_t); - return 0; -parse_symbol: - if (!(symbol = zone_lookup(&field->symbols, token))) - SYNTAX_ERROR(parser, "Invalid %s in %s, not a number", - field->name.data, type->name.data); - assert(symbol->value <= UINT16_MAX); - v32 = htonl(symbol->value); - memcpy(&parser->rdata->octets[parser->rdata->length], &v32, sizeof(v32)); - parser->rdata->length += sizeof(uint32_t); - return 0; + if (n > UINT32_MAX || p - token->data > 10 || is_contiguous((uint8_t)*p)) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + const uint32_t n32 = htonl((uint32_t)n); + memcpy(&parser->rdata->octets[parser->rdata->length], &n32, sizeof(n32)); + parser->rdata->length += sizeof(n32); + return ZONE_INT32; } #endif // NUMBER_H diff --git a/src/generic/scanner.h b/src/generic/scanner.h index 12a1506..76ae483 100644 --- a/src/generic/scanner.h +++ b/src/generic/scanner.h @@ -49,7 +49,7 @@ static inline void find_delimiters( uint64_t newlines, uint64_t in_quoted, uint64_t in_comment, - uint64_t *quoted, + uint64_t *quoted_, uint64_t *comment) { uint64_t delimiters, starts = quotes | semicolons; @@ -78,7 +78,7 @@ static inline void find_delimiters( starts &= -end - end; } - *quoted = delimiters & quotes; + *quoted_ = delimiters & quotes; *comment = delimiters & ~quotes; } @@ -89,18 +89,42 @@ static inline uint64_t follows(const uint64_t match, uint64_t *overflow) return result; } -static const simd_table_t blank_table = SIMD_TABLE( - 0x20, 0x00, 0x00, 0x00, // " " = 0x20 - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x09, 0x00, 0x00, // "\t" = 0x09 - 0x00, 0x0d, 0x00, 0x00 // "\r" = 0x0d +static const simd_table_t blank = SIMD_TABLE( + 0x20, // 0x00 : " " : 0x20 -- space + 0x00, // 0x01 + 0x00, // 0x02 + 0x00, // 0x03 + 0x00, // 0x04 + 0x00, // 0x05 + 0x00, // 0x06 + 0x00, // 0x07 + 0x00, // 0x08 + 0x09, // 0x09 : "\t" : 0x09 -- tab + 0x00, // 0x0a + 0x00, // 0x0b + 0x00, // 0x0c + 0x0d, // 0x0d : "\r" : 0x0d -- carriage return + 0x00, // 0x0e + 0x00 // 0x0f ); -static const simd_table_t special_table = SIMD_TABLE( - 0xff, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x28, 0x29, 0x0a, 0x00, // "(" = 0x28, ")" = 0x29, "\n" = 0x0a - 0x00, 0x00, 0x00, 0x00 +static const simd_table_t special = SIMD_TABLE( + 0x00, // 0x00 : "\0" : 0x00 -- end-of-file + 0x00, // 0x01 + 0x00, // 0x02 + 0x00, // 0x03 + 0x00, // 0x04 + 0x00, // 0x05 + 0x00, // 0x06 + 0x00, // 0x07 + 0x28, // 0x08 : "(" : 0x28 -- start grouped + 0x29, // 0x09 : ")" : 0x29 -- end grouped + 0x0a, // 0x0a : "\n" : 0x0a -- end-of-line + 0x00, // 0x0b + 0x00, // 0x0c + 0x00, // 0x0d + 0x00, // 0x0e + 0x00 // 0x0f ); typedef struct block block_t; @@ -121,8 +145,7 @@ struct block { uint64_t bits; }; -zone_always_inline() -static inline void scan(zone_parser_t *parser, block_t *block) +static zone_really_inline void scan(zone_parser_t *parser, block_t *block) { // escaped newlines are classified as contiguous. however, escape sequences // have no meaning in comments and newlines, escaped or not, have no @@ -130,14 +153,14 @@ static inline void scan(zone_parser_t *parser, block_t *block) block->newline = simd_find_8x64(&block->input, '\n'); block->backslash = simd_find_8x64(&block->input, '\\'); block->escaped = find_escaped( - block->backslash, &parser->file->indexer.is_escaped); + block->backslash, &parser->file->state.is_escaped); block->comment = 0; block->quoted = simd_find_8x64(&block->input, '"') & ~block->escaped; block->semicolon = simd_find_8x64(&block->input, ';') & ~block->escaped; - block->in_quoted = parser->file->indexer.in_quoted; - block->in_comment = parser->file->indexer.in_comment; + block->in_quoted = parser->file->state.in_quoted; + block->in_comment = parser->file->state.in_comment; if (block->in_comment || block->semicolon) { find_delimiters( @@ -150,58 +173,29 @@ static inline void scan(zone_parser_t *parser, block_t *block) &block->comment); block->in_quoted ^= prefix_xor(block->quoted); - parser->file->indexer.in_quoted = (uint64_t)((int64_t)block->in_quoted >> 63); + parser->file->state.in_quoted = (uint64_t)((int64_t)block->in_quoted >> 63); block->in_comment ^= prefix_xor(block->comment); - parser->file->indexer.in_comment = (uint64_t)((int64_t)block->in_comment >> 63); + parser->file->state.in_comment = (uint64_t)((int64_t)block->in_comment >> 63); } else { block->in_quoted ^= prefix_xor(block->quoted); - parser->file->indexer.in_quoted = (uint64_t)((int64_t)block->in_quoted >> 63); + parser->file->state.in_quoted = (uint64_t)((int64_t)block->in_quoted >> 63); } block->blank = - simd_find_any_8x64(&block->input, blank_table) & ~(block->escaped | block->in_quoted | block->in_comment); + simd_find_any_8x64(&block->input, blank) & ~(block->escaped | block->in_quoted | block->in_comment); block->special = - simd_find_any_8x64(&block->input, special_table) & ~(block->escaped | block->in_quoted | block->in_comment); + simd_find_any_8x64(&block->input, special) & ~(block->escaped | block->in_quoted | block->in_comment); block->contiguous = ~(block->blank | block->special | block->quoted) & ~(block->in_quoted | block->in_comment); block->follows_contiguous = - follows(block->contiguous, &parser->file->indexer.follows_contiguous); + follows(block->contiguous, &parser->file->state.follows_contiguous); // quoted and contiguous have dynamic lengths, write two indexes - block->bits = (block->contiguous ^ block->follows_contiguous) | block->quoted | block->special; + block->bits = (block->contiguous & ~block->follows_contiguous) | (block->quoted & block->in_quoted) | block->special; } -static inline void refill(zone_parser_t *parser) -{ - zone_file_t *file = parser->file; - - // grow buffer if necessary - if (file->buffer.length == file->buffer.size) { - size_t size = file->buffer.size + ZONE_WINDOW_SIZE; - char *data = file->buffer.data; - if (!(data = realloc(data, size + 1))) - SYNTAX_ERROR(parser, "actually out of memory"); - file->buffer.size = size; - file->buffer.data = data; - } - - size_t count = fread(file->buffer.data + file->buffer.length, - sizeof(file->buffer.data[0]), - file->buffer.size - file->buffer.length, - file->handle); - - if (count == 0 && ferror(file->handle)) - SYNTAX_ERROR(parser, "actually a read error"); - - // always null-terminate so terminating token can point to something - file->buffer.length += (size_t)count; - file->buffer.data[file->buffer.length] = '\0'; - file->end_of_file = feof(file->handle) != 0; -} - -zone_always_inline() -static inline void tokenize(zone_parser_t *parser, const block_t *block) +static zone_really_inline void tokenize(zone_parser_t *parser, const block_t *block) { uint64_t bits = block->bits; uint64_t count = count_ones(bits); @@ -214,188 +208,230 @@ static inline void tokenize(zone_parser_t *parser, const block_t *block) // edge case, but must be supported and handled in the scanner for ease of // use and to accommodate for parallel processing in the parser. note that // escaped newlines may have been present in the last block - if (zone_unlikely(parser->file->indexer.newlines || (newline & in_string))) { + if (zone_unlikely(parser->file->lines.tail[0] || (newline & in_string))) { for (uint64_t i=0; i < count; i++) { uint64_t bit = -bits & bits; bits ^= bit; if (bit & newline) { - parser->file->indexer.tail[i] = - (zone_index_t){ - base + trailing_zeroes(bit), parser->file->indexer.newlines }; - parser->file->indexer.newlines = 0; + parser->file->lines.tail++; + parser->file->fields.tail[i] = line_feed; newline &= -bit; } else { // count newlines here so number of newlines remains correct if last // token is start of contiguous or quoted and index must be reset - parser->file->indexer.tail[i] = - (zone_index_t){ base + trailing_zeroes(bit), 0 }; - parser->file->indexer.newlines += count_ones(newline & ~(-bit)); + *parser->file->lines.tail += count_ones(newline & ~(-bit)); + parser->file->fields.tail[i] = base + trailing_zeroes(bit); newline &= -bit; } } - parser->file->indexer.tail += count; + parser->file->fields.tail += count; } else { for (uint64_t i=0; i < 6; i++) { - parser->file->indexer.tail[i] = - (zone_index_t){ base + trailing_zeroes(bits), 0 }; + parser->file->fields.tail[i] = base + trailing_zeroes(bits); bits = clear_lowest_bit(bits); } if (zone_unlikely(count > 6)) { for (uint64_t i=6; i < 12; i++) { - parser->file->indexer.tail[i] = - (zone_index_t){ base + trailing_zeroes(bits), 0 }; + parser->file->fields.tail[i] = base + trailing_zeroes(bits); bits = clear_lowest_bit(bits); } if (zone_unlikely(count > 12)) { for (uint64_t i=12; i < count; i++) { - parser->file->indexer.tail[i] = - (zone_index_t){ base + trailing_zeroes(bits), 0 }; + parser->file->fields.tail[i] = base + trailing_zeroes(bits); bits = clear_lowest_bit(bits); } } } - parser->file->indexer.tail += count; + parser->file->fields.tail += count; } } -extern const uint8_t *zone_forward; -extern const uint8_t *zone_jump; - -zone_never_inline() -zone_nonnull_all() -static zone_return_t step(zone_parser_t *parser, zone_token_t *token) +zone_nonnull_all +static zone_never_inline void step(zone_parser_t *parser, token_t *token) { block_t block = { 0 }; - zone_file_t *file = parser->file; - const char *start, *end; bool start_of_line = false; + const char *data_limit, **tape_limit; - // start of line is initially always true - if (file->indexer.tail == file->indexer.tape) + // start of line is initially true + if (parser->file->fields.tail == parser->file->fields.tape) start_of_line = true; - else if (*(end = file->indexer.tail[-1].data) == '\n') - start_of_line = (file->buffer.data + file->buffer.index) - end == 1; - - file->indexer.head = file->indexer.tape; - file->indexer.tail = file->indexer.tape; + else if (parser->file->fields.tail[-1][0] == '\n') + start_of_line = !is_blank((uint8_t)parser->file->fields.tail[-1][1]); + + // restore deferred line count + parser->file->lines.tape[0] = parser->file->lines.tail[0]; + parser->file->lines.head = parser->file->lines.tape; + parser->file->lines.tail = parser->file->lines.tape; + // restore (possibly) deferred field + parser->file->fields.tape[0] = parser->file->fields.tail[1]; + parser->file->fields.head = parser->file->fields.tape; + parser->file->fields.tail = parser->file->fields.tape; + if (parser->file->fields.tape[0]) + parser->file->fields.tail++; shuffle: - if (file->end_of_file == ZONE_HAVE_DATA) { - memmove(file->buffer.data, - file->buffer.data + file->buffer.index, - file->buffer.length - file->buffer.index); - file->buffer.length -= file->buffer.index; - file->buffer.index = 0; - refill(parser); + if (parser->file->end_of_file == ZONE_HAVE_DATA) { + int32_t code; + const char *start; + if (parser->file->fields.head[0]) + start = parser->file->fields.head[0]; + else + start = parser->file->buffer.data + parser->file->buffer.index; + parser->file->fields.head[0] = parser->file->buffer.data; + const size_t length = + (size_t)((parser->file->buffer.data+parser->file->buffer.length) - start); + const size_t index = + (size_t)((parser->file->buffer.data+parser->file->buffer.index) - start); + memmove(parser->file->buffer.data, start, length); + parser->file->buffer.length = length; + parser->file->buffer.index = index; + parser->file->buffer.data[length] = '\0'; + if ((code = refill(parser)) < 0) + DEFER_ERROR(parser, token, code); } - start = file->buffer.data + file->buffer.index; - - while (file->buffer.length - file->buffer.index >= ZONE_BLOCK_SIZE) { - if ((file->indexer.tape + ZONE_TAPE_SIZE) - file->indexer.tail < ZONE_BLOCK_SIZE) + data_limit = parser->file->buffer.data + parser->file->buffer.length; + tape_limit = parser->file->fields.tape + ZONE_TAPE_SIZE; + for (;;) { + const char *data = parser->file->buffer.data + parser->file->buffer.index; + if (data_limit - data < ZONE_BLOCK_SIZE) + break; + if (tape_limit - parser->file->fields.tail < ZONE_BLOCK_SIZE) goto terminate; - simd_loadu_8x64(&block.input, (uint8_t *)&file->buffer.data[file->buffer.index]); + simd_loadu_8x64(&block.input, (const uint8_t *)data); scan(parser, &block); tokenize(parser, &block); - file->buffer.index += ZONE_BLOCK_SIZE; + parser->file->buffer.index += ZONE_BLOCK_SIZE; } - size_t length = file->buffer.length - file->buffer.index; + const size_t length = parser->file->buffer.length - parser->file->buffer.index; assert(length <= ZONE_BLOCK_SIZE); - if (file->end_of_file == ZONE_HAVE_DATA) + if (parser->file->end_of_file == ZONE_HAVE_DATA) goto terminate; - if (length > (size_t)((file->indexer.tape + ZONE_TAPE_SIZE) - file->indexer.tail)) + if (length > (size_t)(tape_limit - parser->file->fields.tail)) goto terminate; uint8_t buffer[ZONE_BLOCK_SIZE] = { 0 }; - memcpy(buffer, &file->buffer.data[file->buffer.index], length); + memcpy(buffer, &parser->file->buffer.data[parser->file->buffer.index], length); const uint64_t clear = ~((1llu << length) - 1); simd_loadu_8x64(&block.input, buffer); scan(parser, &block); block.bits &= ~clear; block.contiguous &= ~clear; tokenize(parser, &block); - file->buffer.index += length; - file->end_of_file = ZONE_NO_MORE_DATA; + parser->file->buffer.index += length; + parser->file->end_of_file = ZONE_NO_MORE_DATA; terminate: - // ensure tape contains no partial tokens + // make sure tape contains no partial tokens if ((uint64_t)((int64_t)(block.contiguous | block.in_quoted) >> 63)) { - // FIXME: .com (for example) uses single fields for base64 data, hence a - // lot of reprocessing is required for those types of zones. it may - // be beneficial to store where we left off - assert(file->indexer.tail > file->indexer.tape); - file->indexer.tail--; - file->indexer.in_comment = 0; - file->indexer.in_quoted = 0; - file->indexer.is_escaped = 0; - file->indexer.follows_contiguous = 0; - file->buffer.index = - (size_t)(file->indexer.tail[0].data - file->buffer.data); + parser->file->fields.tail[0] = parser->file->fields.tail[-1]; + parser->file->fields.tail--; + } else { + parser->file->fields.tail[1] = NULL; } - file->indexer.tail[0] = - (zone_index_t) { file->buffer.data + file->buffer.length, 0 }; - file->indexer.tail[1] = - (zone_index_t) { file->buffer.data + file->buffer.length, 0 }; - file->start_of_line = file->indexer.head[0].data == start && start_of_line; - - do { - start = file->indexer.head[0].data; - end = file->indexer.head[1].data; - assert(start < end || (start == end && *start == '\0')); - - switch (zone_jump[ (unsigned char)*start ]) { - case 0: // contiguous - *token = (zone_token_t){ (size_t)(end - start), start }; - // discard index for blank or semicolon - file->indexer.head += zone_forward[ (unsigned char)*end ]; - return ZONE_CONTIGUOUS; - case 1: // quoted - *token = (zone_token_t){ (size_t)(end - start), start + 1 }; - // discard index for closing quote - file->indexer.head += 2; - return ZONE_QUOTED; - case 2: // newline - file->line += file->indexer.head[0].newlines + 1; - file->indexer.head++; - if (file->grouped) - break; - file->start_of_line = (end - start) == 1; - *token = (zone_token_t){ 1, start }; - return ZONE_DELIMITER; - case 3: // end of file - if (file->end_of_file != ZONE_NO_MORE_DATA) - goto shuffle; - if (file->grouped) - SYNTAX_ERROR(parser, "Missing closing brace"); - assert(start == file->buffer.data + file->buffer.length); - assert(end == file->buffer.data + file->buffer.length); - if (file->includer) { - parser->file = file->includer; - parser->owner = &parser->file->owner; - zone_close_file(parser, file); - } - *token = (zone_token_t){ 1, zone_end_of_file }; - return ZONE_DELIMITER; - case 4: // left parenthesis - if (file->grouped) - SYNTAX_ERROR(parser, "Nested opening brace"); - file->grouped = true; - file->indexer.head++; - break; - case 5: // right parenthesis - if (!file->grouped) - SYNTAX_ERROR(parser, "Closing brace without opening brace"); - file->grouped = false; - file->indexer.head++; - break; + parser->file->fields.tail[0] = data_limit; + if (parser->file->fields.head[0] == parser->file->buffer.data) + parser->file->start_of_line = start_of_line; + else + parser->file->start_of_line = false; + + for (;;) { + const char *data = parser->file->fields.head[0]; + token->data = data; + token->code = (int32_t)contiguous[ (uint8_t)*data ]; + // end-of-file is idempotent + parser->file->fields.head += (*data != '\0'); + if (zone_likely(token->code == CONTIGUOUS)) { + return; + } else if (token->code == LINE_FEED) { + if (zone_unlikely(token->data == line_feed)) + parser->file->span += *parser->file->lines.head++; + parser->file->span++; + if (parser->file->grouped) + continue; + parser->file->line += parser->file->span; + parser->file->span = 0; + parser->file->start_of_line = !is_blank((uint8_t)*(token->data+1)); + return; + } else if (token->code == QUOTED) { + token->data++; + return; + } else if (token->code == END_OF_FILE) { + zone_file_t *file; + + if (parser->file->end_of_file != ZONE_NO_MORE_DATA) + goto shuffle; + if (parser->file->grouped) + DEFER_SYNTAX_ERROR(parser, token, "Missing closing brace"); + if (!parser->file->includer) + return; + file = parser->file; + parser->file = parser->file->includer; + parser->owner = &parser->file->owner; + zone_close_file(parser, file); + return; + } else if (token->code == LEFT_PAREN) { + if (parser->file->grouped) + DEFER_SYNTAX_ERROR(parser, token, "Nested opening brace"); + parser->file->grouped = true; + } else { + assert(token->code == RIGHT_PAREN); + if (!parser->file->grouped) + DEFER_SYNTAX_ERROR(parser, token, "Missing opening brace"); + parser->file->grouped = false; } - } while (1); + } } +typedef struct delimited delimited_t; +struct delimited { + simd_8x_t input; + uint64_t delimiter; +}; + +static const simd_table_t non_contiguous = SIMD_TABLE( + 0x00, // 0x00 : "\0" : 0x00 -- end-of-file + 0x00, // 0x01 + 0x22, // 0x02 : "\"" : 0x22 -- start/end quoted + 0x00, // 0x03 + 0x00, // 0x04 + 0x00, // 0x05 + 0x00, // 0x06 + 0x00, // 0x07 + 0x28, // 0x08 : "(" : 0x28 -- start grouped + 0x29, // 0x09 : ")" : 0x29 -- end grouped + 0x0a, // 0x0a : "\n" : 0x0a -- end-of-line + 0x3b, // 0x0b : ";" : 0x3b -- start comment + 0x00, // 0x0c + 0x00, // 0x0d + 0x00, // 0x0e + 0x00 // 0x0f +); + +static const simd_table_t non_quoted = SIMD_TABLE( + 0x00, // 0x00 : "\0" : 0x00 -- end-of-file + 0x00, // 0x01 + 0x22, // 0x02 : "\"" : 0x22 -- start/end quoted + 0x00, // 0x03 + 0x00, // 0x04 + 0x00, // 0x05 + 0x00, // 0x06 + 0x00, // 0x07 + 0x00, // 0x08 + 0x00, // 0x09 + 0x00, // 0x0a + 0x00, // 0x0b + 0x00, // 0x0c + 0x00, // 0x0d + 0x00, // 0x0e + 0x00 // 0x0f +); + #endif // SCANNER_H diff --git a/src/generic/text.h b/src/generic/text.h index 12d2d37..0d41331 100644 --- a/src/generic/text.h +++ b/src/generic/text.h @@ -11,42 +11,40 @@ typedef struct string_block string_block_t; struct string_block { - size_t length; - uint64_t escape_bits; + delimited_t delimited; + uint64_t backslash; }; -zone_always_inline() -zone_nonnull_all() -static inline void copy_string_block( - string_block_t *block, const char *text, size_t size, uint8_t *wire) +zone_nonnull_all +static zone_really_inline void copy_string_block( + string_block_t *block, + const simd_table_t delimiter, + const simd_table_t space, + const char *source, + uint8_t *destination) { - simd_8x_t input; - - simd_loadu_8x(&input, (const uint8_t *)text); - simd_storeu_8x(wire, &input); - - block->length = size < SIMD_8X_SIZE ? size : SIMD_8X_SIZE; - const uint64_t mask = (1llu << block->length) - 1; - block->escape_bits = simd_find_8x(&input, '\\') & mask; + copy_and_scan_delimited(&block->delimited, delimiter, space, source, destination); + block->backslash = simd_find_8x(&block->delimited.input, '\\'); } -zone_always_inline() -zone_nonnull_all() -static inline void parse_string( +zone_nonnull_all +static zone_really_inline int32_t parse_string_in( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + const simd_table_t delimiter, + const simd_table_t space, + const token_t *token) { string_block_t block; uint8_t *wire = &parser->rdata->octets[parser->rdata->length + 1]; - const char *text = token->data, *limit = token->data + token->length; + const char *text = token->data; - while (text < limit) { - copy_string_block(&block, text, (size_t)(limit - text), wire); + for (bool loop=true; loop; ) { + copy_string_block(&block, delimiter, space, text, wire); - if (block.escape_bits) { - const uint64_t count = trailing_zeroes(block.escape_bits); + if (block.backslash & (block.delimited.delimiter - 1)) { + size_t count = trailing_zeroes(block.backslash); uint8_t digits[3]; digits[0] = (unsigned char)text[count + 1] - '0'; @@ -72,8 +70,10 @@ static inline void parse_string( text += count + 4; } } else { - text += block.length; - wire += block.length; + size_t count = trailing_zeroes(block.delimited.delimiter | (1llu << SIMD_8X_SIZE)); + loop = !block.delimited.delimiter; + text += count; + wire += count; } if (wire - parser->rdata->octets > 256) @@ -83,6 +83,28 @@ static inline void parse_string( parser->rdata->octets[parser->rdata->length] = (uint8_t)((wire - parser->rdata->octets) - 1); parser->rdata->length += (size_t)(wire - parser->rdata->octets); + return ZONE_STRING; +} + +#define parse_contiguous_string(parser, type, field, token) \ + parse_string_in(parser, type, field, non_contiguous, blank, token) + +#define parse_quoted_string(parser, type, field, token) \ + parse_string_in(parser, type, field, non_quoted, non_quoted, token) + +zone_nonnull_all +static zone_really_inline int32_t parse_string( + zone_parser_t *parser, + const zone_type_info_t *type, + const zone_field_info_t *field, + const token_t *token) +{ + if (zone_likely(token->code == QUOTED)) + return parse_quoted_string(parser, type, field, token); + else if (token->code == CONTIGUOUS) + return parse_contiguous_string(parser, type, field, token); + else + return have_string(parser, type, field, token); } #endif // TEXT_H diff --git a/src/generic/time.h b/src/generic/time.h index 7853cb2..0df58c3 100644 --- a/src/generic/time.h +++ b/src/generic/time.h @@ -9,76 +9,80 @@ #ifndef TIME_H #define TIME_H -/* Number of days per month (except for February in leap years). */ -static const int mdays[] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; +/* number of days per month (except for February in leap years) */ +static const uint8_t days_in_month[13] = { + 0 /* no --month */, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; -static int is_leap_year(int year) +static const uint16_t days_to_month[13] = { + 0 /* no --month */, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; + +static uint64_t is_leap_year(uint64_t year) { return year % 4 == 0 && (year % 100 != 0 || year % 400 == 0); } -static int leap_days(int y1, int y2) +static uint64_t leap_days(uint64_t y1, uint64_t y2) { --y1; --y2; return (y2/4 - y1/4) - (y2/100 - y1/100) + (y2/400 - y1/400); } -/* - * Code adapted from Python 2.4.1 sources (Lib/calendar.py). - */ -static time_t mktime_from_utc(const struct tm *tm) +// FIXME: very likely eligible for vectorization, see issue #22 +zone_nonnull_all +static zone_really_inline int32_t parse_time( + zone_parser_t *parser, + const zone_type_info_t *type, + const zone_field_info_t *field, + token_t *token) { - int year = 1900 + tm->tm_year; - time_t days = 365 * (year - 1970) + leap_days(1970, year); - time_t hours; - time_t minutes; - time_t seconds; - int i; - - for (i = 0; i < tm->tm_mon; ++i) { - days += mdays[i]; - } - if (tm->tm_mon > 1 && is_leap_year(year)) { - ++days; + int32_t r; + + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; + + uint64_t d[14]; // YYYYmmddHHMMSS + const char *p = token->data; + for (int i = 0; i < 14; i++) { + d[i] = (uint8_t)p[i] - '0'; + if (d[i] > 9) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); } - days += tm->tm_mday - 1; - hours = days * 24 + tm->tm_hour; - minutes = hours * 60 + tm->tm_min; - seconds = minutes * 60 + tm->tm_sec; + if (contiguous[ (uint8_t)p[14] ] == CONTIGUOUS) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); - return seconds; -} + // code adapted from Python 2.4.1 sources (Lib/calendar.py) + const uint64_t year = (d[0] * 1000) + (d[1] * 100) + (d[2] * 10) + d[3]; + const uint64_t mon = (d[4] * 10) + d[5]; + const uint64_t mday = (d[6] * 10) + d[7]; + const uint64_t hour = (d[8] * 10) + d[9]; + const uint64_t min = (d[10] * 10) + d[11]; + const uint64_t sec = (d[12] * 10) + d[13]; -// FIXME: likely eligible for vectorization, see issue #22 -zone_nonnull_all() -static inline void parse_time( - zone_parser_t *parser, - const zone_type_info_t *type, - const zone_field_info_t *field, - zone_token_t *token) -{ - char buf[] = "YYYYmmddHHMMSS"; - - if (token->length >= sizeof(buf)) - SYNTAX_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); - memcpy(buf, token->data, token->length); - buf[token->length] = '\0'; - - int matched; - struct tm tm; - matched = sscanf(buf, "%4d%2d%2d%2d%2d%2d", - &tm.tm_year, &tm.tm_mon, &tm.tm_mday, &tm.tm_hour, &tm.tm_min, &tm.tm_sec); - if (matched != 6 || matched == EOF) - SYNTAX_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); - tm.tm_year -= 1900; - tm.tm_mon -= 1; - uint32_t time = htonl((uint32_t)mktime_from_utc(&tm)); + uint64_t days = 365 * (year - 1970) + leap_days(1970, year); + + if (!mon || mon > 12) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + if (!mday || mday > days_in_month[mon]) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + if (hour > 23 || min > 59 || sec > 59) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + days += days_to_month[mday]; + if (mon > 1 && is_leap_year(year)) + days++; + + days += mday - 1; + + const uint64_t hours = days * 24 + hour; + const uint64_t minutes = hours * 60 + min; + const uint64_t seconds = minutes * 60 + sec; + + uint32_t time = htonl((uint32_t)seconds); memcpy(&parser->rdata->octets[parser->rdata->length], &time, sizeof(time)); - parser->rdata->length += sizeof(uint32_t); + parser->rdata->length += sizeof(time); + return ZONE_INT32; } #endif // TIME_H diff --git a/src/generic/ttl.h b/src/generic/ttl.h index 4f33d43..0d34f11 100644 --- a/src/generic/ttl.h +++ b/src/generic/ttl.h @@ -9,121 +9,137 @@ #ifndef TTL_H #define TTL_H -static inline uint64_t is_unit(char c) -{ - static const uint32_t s = 1u, m = 60u*1u, h = 60u*60u, d = 24u*60u*60u, w = 7u*24u*60u*60u; - - switch (c) { - case 's': - case 'S': - return s; - case 'm': - case 'M': - return m; - case 'h': - case 'H': - return h; - case 'd': - case 'D': - return d; - case 'w': - case 'W': - return w; - } +// [sS] = 1, [mM] = 60, [hH] = 60*60, [dD] = 24*60*60, [wW] = 7*24*60*60 +static const uint32_t units[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3f + 0, 0, 0, 0, 86400, 0, 0, 0, 3600, 0, 0, 0, 0, 60, 0, 0, // 0x40 - 0x4f + 0, 0, 0, 1, 0, 0, 0, 604800, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0xf5 + 0, 0, 0, 0, 86400, 0, 0, 0, 3600, 0, 0, 0, 0, 60, 0, 0, // 0x60 - 0x6f + 0, 0, 0, 1, 0, 0, 0, 604800, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xa0 - 0xaf + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xb0 - 0xbf + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xc0 - 0xcf + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xd0 - 0xdf + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xe0 - 0xef + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // 0xf0 - 0xff +}; - return 0; -} - -// FIXME: scan_ttl should fallback to recognizing units instead -zone_always_inline() -zone_nonnull_all() -static inline zone_return_t scan_ttl( +zone_nonnull_all +static zone_really_inline int32_t scan_ttl( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token, + token_t *token, uint32_t *seconds) { - uint64_t value = 0, unit = 0, number, factor = 0; - enum { NUMBER, UNIT } state = NUMBER; - - // ttls must start with a number - number = (unsigned char)token->data[0] - '0'; - if (number > 9) - SYNTAX_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); - - for (size_t i=1; i < token->length; i++) { - const uint64_t digit = (unsigned char)token->data[i] - '0'; - - switch (state) { - case NUMBER: - if (digit <= 9) { - number = (number * 10) + digit; - if (value > INT32_MAX) - SEMANTIC_ERROR(parser, "Invalid %s in %s, value exceeds maximum", - field->name.data, type->name.data); - } else if ((factor = is_unit(token->data[i]))) { - // units must not be repeated e.g. 1m1m - if (unit == factor) - SYNTAX_ERROR(parser, "Invalid %s in %s, reuse of unit %c", - field->name.data, type->name.data, token->data[i]); - // greater units must precede smaller units. e.g. 1m1s, not 1s1m - if (unit && unit < factor) - SYNTAX_ERROR(parser, "Invalid %s in %s, unit %c follows smaller unit", - field->name.data, type->name.data, token->data[i]); - unit = factor; - number = number * unit; - state = UNIT; - } else { - SYNTAX_ERROR(parser, "Invalid %s in %s, invalid unit", - field->name.data, type->name.data); - } - break; - case UNIT: - // units must be followed by a number. e.g. 1h30m, not 1hh - if (digit > 9) - SYNTAX_ERROR(parser, "Invalid %s in %s, non-digit follows unit", - field->name.data, type->name.data); - // units must not be followed by a number if smallest unit, - // i.e. seconds, was previously specified - if (unit == 1) - SYNTAX_ERROR(parser, "Invalid %s in %s, digit follows unit s", - field->name.data, type->name.data); - value = value + number; - number = digit; - state = NUMBER; + int32_t r; + uint64_t t = 0, m = parser->options.secondary ? UINT32_MAX : INT32_MAX; + + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; + + const char *p = token->data; + for (;; p++) { + const uint64_t d = (uint8_t)*p - '0'; + if (d > 9) + break; + t = t * 10 + d; + } + + if (zone_likely(contiguous[ (uint8_t)*p ] != CONTIGUOUS)) { + // FIXME: comment RFC2308 msb + if (t > m || !t || p - token->data > 10) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + if (t & (1llu << 31)) + SEMANTIC_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + *seconds = (uint32_t)t; + return ZONE_TTL; + } else if (p == token->data || !parser->options.pretty_ttls) { + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + } + + uint64_t n = t, u = 0, f = 0; + enum { NUMBER, UNIT } s = UNIT; + + for (t = 0; ; p++) { + const uint64_t d = (uint8_t)*p - '0'; + + if (s == NUMBER) { + if (d <= 9) { + n = n * 10 + d; + } else if (!(u = units[ (uint8_t)*p ])) { break; + // units must not be repeated e.g. 1m1m + } else if (u == f) { + SYNTAX_ERROR(parser, "Invalid %s in %s, reuse of unit %c", + NAME(field), NAME(type), *p); + // greater units must precede smaller units. e.g. 1m1s, not 1s1m + } else if (u < f) { + SYNTAX_ERROR(parser, "Invalid %s in %s, unit %c follows smaller unit", + NAME(field), NAME(type), *p); + } else { + f = u; + n = n * u; + s = UNIT; + } + + if (n > m) + SYNTAX_ERROR(parser, "Invalid %s in %s", + NAME(field), NAME(type)); + } else if (s == UNIT) { + // units must be followed by a number. e.g. 1h30m, not 1hh + if (d > 9) + SYNTAX_ERROR(parser, "Invalid %s in %s, non-digit follows unit", + NAME(field), NAME(type)); + // units must not be followed by a number if smallest unit, + // i.e. seconds, was previously specified + if (f == 1) + SYNTAX_ERROR(parser, "Invalid %s in %s, digit follows unit s", + NAME(field), NAME(type)); + t = t + n; + n = d; + s = NUMBER; + + if (t > m) + SYNTAX_ERROR(parser, "Invalid %s in %s", + NAME(field), NAME(type)); } } - value = value + number; - // FIXME: comment RFC2308 msb - if (value > INT32_MAX) - SEMANTIC_ERROR(parser, "Invalid %s in %s, value exceeds maximum", - field->name.data, type->name.data); - *seconds = (uint32_t)value; + if (zone_unlikely(contiguous[ (uint8_t)*p ] != CONTIGUOUS)) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + t = t + n; + if (t > m || !t) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + if (t & (1llu << 31)) + SEMANTIC_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + *seconds = (uint32_t)t; return ZONE_TTL; } -zone_always_inline() -zone_nonnull_all() -static inline zone_return_t parse_ttl( +zone_nonnull_all +static zone_really_inline int32_t parse_ttl( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + token_t *token) { - uint32_t seconds = 0; - zone_return_t result; - - if ((result = scan_ttl(parser, type, field, token, &seconds)) < 0) - return result; - assert(seconds <= INT32_MAX); - seconds = htonl(seconds); - memcpy(&parser->rdata->octets[parser->rdata->length], &seconds, sizeof(seconds)); - parser->rdata->length += sizeof(uint32_t); - return 0; + int32_t r; + uint32_t t = 0; + + if ((r = scan_ttl(parser, type, field, token, &t)) < 0) + return r; + t = htonl(t); + memcpy(&parser->rdata->octets[parser->rdata->length], &t, sizeof(t)); + parser->rdata->length += sizeof(t); + return ZONE_TTL; } #endif // TTL_H diff --git a/src/generic/type.h b/src/generic/type.h index 436304c..c6f9591 100644 --- a/src/generic/type.h +++ b/src/generic/type.h @@ -14,162 +14,138 @@ #include #endif -#include "zone.h" - extern const zone_table_t *zone_identifiers; extern const zone_fast_table_t *zone_fast_identifiers; -zone_always_inline() -static inline uint8_t subs(uint8_t x, uint8_t y) +zone_nonnull_all +static zone_really_inline const zone_symbol_t *lookup_type_or_class( + zone_parser_t *parser, const token_t *token) { - uint8_t res = x - y; - res &= -(res <= x); - return res; -} + delimited_t delimited; -zone_always_inline() -zone_nonnull_all() -static inline zone_return_t scan_type_or_class( - zone_parser_t *parser, - const zone_type_info_t *type, - const zone_field_info_t *field, - const zone_token_t *token, - uint16_t *code) -{ - const uint8_t n = subs(token->length & 0xdf, 0x01); - uint8_t k = ((uint8_t)(token->data[0] & 0xdf) - 0x41) & 0x1f; - uint8_t h = (token->data[n] & 0xdf); - h *= 0x07; - h += (uint8_t)token->length; + (void)parser; + // FIXME: Not explicitly specified, but RRTYPE names (so far) consist of + // [0-9a-zA-Z-]. A simple range check (as described on #66) may + // outperform scanning for delimiters. + scan_delimited(&delimited, non_contiguous, blank, token->data); - const zone_fast_table_t *table = &zone_fast_identifiers[k]; + const size_t length = trailing_zeroes(delimited.delimiter | (1llu << 63)); + uint8_t key = ((uint8_t)(token->data[0] & 0xdf) - 0x41) & 0x1f; + uint8_t hash = token->data[length - 1] & 0xdf; + hash *= 0x07; // better distribution (A + 1 != B) + hash += (uint8_t)length; + + const zone_fast_table_t *table = &zone_fast_identifiers[key]; simd_8x16_t keys; simd_loadu_8x16(&keys, table->keys); - const uint64_t bits = simd_find_8x16(&keys, (char)h) | (1u << 15); - const uint64_t index = trailing_zeroes(bits); - const zone_symbol_t *symbol = table->symbols[index]; - - if (symbol && - token->length == symbol->key.length && - strncasecmp(token->data, symbol->key.data, symbol->key.length) == 0) - { - *code = symbol->value & 0xffffu; - return symbol->value >> 16; - } + const uint64_t bits = simd_find_8x16(&keys, (char)hash); + const uint64_t index = trailing_zeroes(bits | (1llu << 15)); - if (token->length > 4 && - strncasecmp(token->data, "TYPE", 4) == 0) - { - uint64_t v = 0; - for (size_t i=4; i < token->length; i++) { - const uint64_t x = (uint8_t)token->data[i] - '0'; - if (x > 9) - goto bad_type; - v = v * 10 + x; - if (v > UINT16_MAX) - goto bad_type; - } - - *code = (uint16_t)v; - return ZONE_TYPE; -bad_type: - SEMANTIC_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); - } + const zone_symbol_t *symbol = table->symbols[index]; - if (token->length > 5 && - strncasecmp(token->data, "CLASS", 5) == 0) - { - uint64_t v = 0; - for (size_t i=5; i < token->length; i++) { - const uint64_t x = (uint8_t)token->data[i] - '0'; - if (x > 9) - goto bad_class; - v = v * 10 + x; - if (v > UINT16_MAX) - goto bad_class; - } - - *code = (uint16_t)v; - return ZONE_CLASS; -bad_class: - SEMANTIC_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); - } + if (!symbol || strncasecmp(token->data, symbol->key.data, symbol->key.length)) + return NULL; + if (contiguous[ (uint8_t)token->data[symbol->key.length] ] == CONTIGUOUS) + return NULL; - SEMANTIC_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); + return symbol; } -zone_always_inline() -zone_nonnull_all() -static inline zone_return_t scan_type( +zone_nonnull_all +static zone_really_inline int32_t scan_type_or_class( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - const zone_token_t *token, + const token_t *token, uint16_t *code) { - const uint8_t n = subs(token->length & 0xdf, 0x01); - uint8_t k = ((uint8_t)(token->data[0] & 0xdf) - 0x41) & 0x1f; - uint8_t h = (token->data[n] & 0xdf); - h *= 0x07; - h += (uint8_t)token->length; + int32_t r; + const zone_symbol_t *s; + + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; + + if ((s = lookup_type_or_class(parser, token))) + return (void)(*code = s->value & 0xffffu), s->value >> 16; + + if (strncasecmp(token->data, "TYPE", 4) == 0) + r = ZONE_TYPE; + else if (strncasecmp(token->data, "CLASS", 5) == 0) + r = ZONE_CLASS; + else + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + uint64_t n = 0; + const char *p = token->data + 4 + (r == ZONE_CLASS); + for (;; p++) { + const uint64_t d = (uint8_t)*p - '0'; + if (d > 9) + break; + n = n * 10 + d; + } - const zone_fast_table_t *table = &zone_fast_identifiers[k]; + if (!n || n > UINT16_MAX || p - token->data >= 5 || is_contiguous((uint8_t)*p)) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); - simd_8x16_t keys; - simd_loadu_8x16(&keys, table->keys); - const uint64_t bits = simd_find_8x16(&keys, (char)h) | (1u << 15); - const uint64_t index = trailing_zeroes(bits); - const zone_symbol_t *symbol = table->symbols[index]; + *code = (uint16_t)n; + return r; +} - if (symbol && - token->length == symbol->key.length && - strncasecmp(token->data, symbol->key.data, symbol->key.length) == 0) - { - *code = symbol->value & 0xffff; - //return symbol->value >> 16; - return ZONE_TYPE; +zone_nonnull_all +static zone_really_inline int32_t scan_type( + zone_parser_t *parser, + const zone_type_info_t *type, + const zone_field_info_t *field, + const token_t *token, + uint16_t *code) +{ + int32_t r; + const zone_symbol_t *s; + + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; + + if ((s = lookup_type_or_class(parser, token))) + return (void)(*code = s->value & 0xffffu), s->value >> 16; + + if (strncasecmp(token->data, "TYPE", 4) == 0) + r = ZONE_TYPE; + else + SEMANTIC_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + uint64_t n = 0; + const char *p = token->data + 4; + for (;; p++) { + const uint64_t d = (uint8_t)*p - '0'; + if (d > 9) + break; + n = n * 10 + d; } - if (token->length > 4 && - strncasecmp(token->data, "TYPE", 4) == 0) - { - uint64_t v = 0; - for (size_t i=4; i < token->length; i++) { - const uint64_t x = (uint8_t)token->data[i] - '0'; - if (x > 9) - goto bad_type; - v = v * 10 + x; - if (v > UINT16_MAX) - goto bad_type; - } - - *code = (uint16_t)v; - return ZONE_TYPE; - } + if (!n || n > UINT16_MAX || p - token->data > 5 || is_contiguous((uint8_t)*p)) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); -bad_type: - SEMANTIC_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); + *code = (uint16_t)n; + return r; } -zone_always_inline() -zone_nonnull_all() -static inline void parse_type( +zone_nonnull_all +static zone_really_inline int32_t parse_type( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + const token_t *token) { - uint16_t code; - - scan_type(parser, type, field, token, &code); - code = htons(code); - memcpy(&parser->rdata->octets[parser->rdata->length], &code, sizeof(code)); - parser->rdata->length += sizeof(uint16_t); + int32_t r; + uint16_t c; + + if ((r = scan_type(parser, type, field, token, &c)) < 0) + return r; + c = htons(c); + memcpy(&parser->rdata->octets[parser->rdata->length], &c, sizeof(c)); + parser->rdata->length += sizeof(c); + return ZONE_TYPE; } #endif // TYPE_H diff --git a/src/haswell/bench.c b/src/haswell/bench.c index c185592..c53a13a 100644 --- a/src/haswell/bench.c +++ b/src/haswell/bench.c @@ -17,16 +17,18 @@ diagnostic_push() clang_diagnostic_ignored(missing-prototypes) -zone_return_t zone_bench_haswell_lex(zone_parser_t *parser, size_t *tokens) +int32_t zone_bench_haswell_lex(zone_parser_t *parser, size_t *tokens) { - zone_token_t token; - zone_return_t result; + token_t token; (*tokens) = 0; - while ((result = lex(parser, &token)) >= 0 && token.data != zone_end_of_file) + lex(parser, &token); + while (token.code > 0) { (*tokens)++; + lex(parser, &token); + } - return result; + return token.code ? -1 : 0; } diagnostic_pop() diff --git a/src/haswell/delimited.h b/src/haswell/delimited.h new file mode 100644 index 0000000..d899597 --- /dev/null +++ b/src/haswell/delimited.h @@ -0,0 +1,50 @@ +/* + * string.h -- some useful comment + * + * Copyright (c) 2023, NLnet Labs. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + */ +#ifndef STRING_H +#define STRING_H + +zone_nonnull_all +static zone_really_inline void copy_and_scan_delimited( + delimited_t *block, + const simd_table_t delimiter, + const simd_table_t space, + const char *source, + uint8_t *destination) +{ + __m256i b = _mm256_loadu_si256((const __m256i *)space); + __m256i d = _mm256_loadu_si256((const __m256i *)delimiter); + + simd_loadu_8x(&block->input, (const uint8_t *)source); + b = _mm256_shuffle_epi8(b, block->input.chunks[0]); + d = _mm256_shuffle_epi8(d, block->input.chunks[0]); + simd_storeu_8x(destination, &block->input); + b = _mm256_cmpeq_epi8(block->input.chunks[0], b); + d = _mm256_cmpeq_epi8(block->input.chunks[0], d); + block->delimiter = (uint32_t)_mm256_movemask_epi8(_mm256_or_si256(b, d)); +} + +zone_nonnull_all +static zone_really_inline void scan_delimited( + delimited_t *block, + const simd_table_t delimiter, + const simd_table_t space, + const char *source) +{ + __m256i b = _mm256_loadu_si256((const __m256i *)space); + __m256i d = _mm256_loadu_si256((const __m256i *)delimiter); + + simd_loadu_8x(&block->input, (const uint8_t *)source); + b = _mm256_shuffle_epi8(b, block->input.chunks[0]); + d = _mm256_shuffle_epi8(d, block->input.chunks[0]); + b = _mm256_cmpeq_epi8(block->input.chunks[0], b); + d = _mm256_cmpeq_epi8(block->input.chunks[0], d); + block->delimiter = (uint32_t)_mm256_movemask_epi8(_mm256_or_si256(b, d)); +} + +#endif // STRING_H diff --git a/src/haswell/parser.c b/src/haswell/parser.c index 092a7c1..330271e 100644 --- a/src/haswell/parser.c +++ b/src/haswell/parser.c @@ -6,17 +6,15 @@ * See LICENSE for the license. * */ -#define _XOPEN_SOURCE -#include -#undef _XOPEN_SOURCE - #include "zone.h" #include "diagnostic.h" #include "log.h" #include "haswell/simd.h" #include "haswell/bits.h" #include "lexer.h" +#include "table.h" #include "generic/scanner.h" +#include "haswell/delimited.h" #include "generic/number.h" #include "generic/ttl.h" #include "generic/time.h" @@ -35,9 +33,9 @@ diagnostic_push() clang_diagnostic_ignored(missing-prototypes) -zone_return_t zone_haswell_parse(zone_parser_t *parser, void *user_data) +zone_return_t zone_haswell_parse(zone_parser_t *parser) { - return parse(parser, user_data); + return parse(parser); } diagnostic_pop() diff --git a/src/haswell/simd.h b/src/haswell/simd.h index c566fdb..be94f9e 100644 --- a/src/haswell/simd.h +++ b/src/haswell/simd.h @@ -24,7 +24,6 @@ typedef uint8_t simd_table_t[SIMD_8X_SIZE]; v08, v09, v0a, v0b, v0c, v0d, v0e, v0f \ } - typedef struct { __m256i chunks[1]; } simd_8x_t; typedef struct { __m128i chunks[1]; } simd_8x16_t; @@ -32,32 +31,28 @@ typedef struct { __m128i chunks[1]; } simd_8x16_t; typedef struct { __m256i chunks[2]; } simd_8x64_t; -zone_always_inline() -zone_nonnull_all() -static inline void simd_loadu_8x(simd_8x_t *simd, const void *address) +zone_nonnull_all +static zone_really_inline void simd_loadu_8x(simd_8x_t *simd, const void *address) { simd->chunks[0] = _mm256_loadu_si256((const __m256i *)(address)); } -zone_always_inline() -zone_nonnull_all() -static inline void simd_storeu_8x(void *address, simd_8x_t *simd) +zone_nonnull_all +static zone_really_inline void simd_storeu_8x(void *address, simd_8x_t *simd) { _mm256_storeu_si256((__m256i *)address, simd->chunks[0]); } -zone_always_inline() -zone_nonnull_all() -static inline uint64_t simd_find_8x(const simd_8x_t *simd, char key) +zone_nonnull_all +static zone_really_inline uint64_t simd_find_8x(const simd_8x_t *simd, char key) { const __m256i k = _mm256_set1_epi8(key); const __m256i r = _mm256_cmpeq_epi8(simd->chunks[0], k); return (uint32_t)_mm256_movemask_epi8(r); } -zone_always_inline() -zone_nonnull_all() -static inline uint64_t simd_find_any_8x( +zone_nonnull_all +static zone_really_inline uint64_t simd_find_any_8x( const simd_8x_t *simd, const simd_table_t table) { const __m256i t = _mm256_loadu_si256((const __m256i *)table); @@ -66,16 +61,14 @@ static inline uint64_t simd_find_any_8x( return (uint32_t)_mm256_movemask_epi8(r); } -zone_always_inline() -zone_nonnull_all() -static inline void simd_loadu_8x16(simd_8x16_t *simd, const uint8_t *address) +zone_nonnull_all +static zone_really_inline void simd_loadu_8x16(simd_8x16_t *simd, const uint8_t *address) { simd->chunks[0] = _mm_loadu_si128((const __m128i *)address); } -zone_always_inline() -zone_nonnull_all() -static inline uint64_t simd_find_8x16(const simd_8x16_t *simd, char key) +zone_nonnull_all +static zone_really_inline uint64_t simd_find_8x16(const simd_8x16_t *simd, char key) { const __m128i k = _mm_set1_epi8(key); const __m128i r = _mm_cmpeq_epi8(simd->chunks[0], k); @@ -83,17 +76,15 @@ static inline uint64_t simd_find_8x16(const simd_8x16_t *simd, char key) return m; } -zone_always_inline() -zone_nonnull_all() -static inline void simd_loadu_8x64(simd_8x64_t *simd, const uint8_t *address) +zone_nonnull_all +static zone_really_inline void simd_loadu_8x64(simd_8x64_t *simd, const uint8_t *address) { simd->chunks[0] = _mm256_loadu_si256((const __m256i *)(address)); simd->chunks[1] = _mm256_loadu_si256((const __m256i *)(address+32)); } -zone_always_inline() -zone_nonnull_all() -static inline uint64_t simd_find_8x64(const simd_8x64_t *simd, char key) +zone_nonnull_all +static zone_really_inline uint64_t simd_find_8x64(const simd_8x64_t *simd, char key) { const __m256i k = _mm256_set1_epi8(key); @@ -106,9 +97,8 @@ static inline uint64_t simd_find_8x64(const simd_8x64_t *simd, char key) return m0 | (m1 << 32); } -zone_always_inline() -zone_nonnull_all() -static inline uint64_t simd_find_any_8x64( +zone_nonnull_all +static zone_really_inline uint64_t simd_find_any_8x64( const simd_8x64_t *simd, const simd_table_t table) { const __m256i t = _mm256_loadu_si256((const __m256i *)table); diff --git a/src/lexer.c b/src/lexer.c deleted file mode 100644 index bd83fff..0000000 --- a/src/lexer.c +++ /dev/null @@ -1,57 +0,0 @@ -/* - * scanner.c -- some useful comment - * - * Copyright (c) 2022, NLnet Labs. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - * - */ -#include - -static const uint8_t forward[256] = { - // "\t" = 0x09, "\r" = 0x0d, "\0" = 0x00 (if nothing follows contiguous) - 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, // 0x00 - 0x0f - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10 - 0x1f - // " " = 0x20 - 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20 - 0x2f - // ";" = 0x3b (if comment directly follows contiguous, e.g. "foo;bar") - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, // 0x30 - 0x3f - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 0x4f - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50 - 0x5f - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 0x6f - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 - 0x7f - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 - 0x8f - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90 - 0x9f - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0 - 0xaf - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0 - 0xbf - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xc0 - 0xcf - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xd0 - 0xdf - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xe0 - 0xef - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 - 0xff -}; - -static const uint8_t jump[256] = { - 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, // 0x00 - 0x0f - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1f - 0, 0, 1, 0, 0, 0, 0, 0, 4, 5, 0, 0, 0, 0, 0, 0, // 0x20 - 0x2f - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3f - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40 - 0x4f - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50 - 0x5f - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60 - 0x6f - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70 - 0x7f - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8f - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9f - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xa0 - 0xaf - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xb0 - 0xbf - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xc0 - 0xcf - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xd0 - 0xdf - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xe0 - 0xef - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // 0xf0 - 0xff -}; - -const uint8_t *zone_forward = forward; -const uint8_t *zone_jump = jump; - -static const char end_of_file[1] = { '\0' }; - -const char *zone_end_of_file = end_of_file; diff --git a/src/lexer.h b/src/lexer.h index 40d8d0d..694a409 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -11,96 +11,255 @@ #include -extern zone_return_t zone_open_file( +extern int32_t zone_open_file( zone_parser_t *, const zone_string_t *, zone_file_t **); extern void zone_close_file( zone_parser_t *, zone_file_t *); -extern const uint8_t *zone_forward; -extern const uint8_t *zone_jump; -extern const char *zone_end_of_file; +typedef struct token token_t; +struct token { + int32_t code; + const char *data; +}; -#define ZONE_DELIMITER (0u) -#define ZONE_CONTIGUOUS (1u<<1) -#define ZONE_QUOTED (1u<<2) +// sorted so that errors, end of file and line feeds are less than contiguous +#define END_OF_FILE (0) +#define CONTIGUOUS (1<<0) +#define QUOTED (1<<1) +#define LINE_FEED (1<<2) +#define LEFT_PAREN (1<<4) +#define RIGHT_PAREN (1<<5) +#define BLANK (1<<6) +#define COMMENT (1<<7) -typedef zone_string_t zone_token_t; +static const uint8_t contiguous[256] = { + // 0x00 = "\0" + 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x00 - 0x07 + // 0x09 = "\t", 0x0a = "\n", 0x0d = "\r" + 0x01, 0x40, 0x04, 0x01, 0x01, 0x40, 0x01, 0x01, // 0x08 - 0x0f + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x10 - 0x17 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x18 - 0x1f + // 0x20 = " ", 0x22 = "\"" + 0x40, 0x01, 0x02, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x20 - 0x27 + // 0x28 = "(", 0x29 = ")" + 0x10, 0x20, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x28 - 0x2f + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x30 - 0x37 + // 0x3b = ";" + 0x01, 0x01, 0x01, 0x80, 0x01, 0x01, 0x01, 0x01, // 0x38 - 0x3f + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x40 - 0x47 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x48 - 0x4f + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x50 - 0x57 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x58 - 0x5f + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x60 - 0x67 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x68 - 0x6f + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x70 - 0x77 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x78 - 0x7f + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x80 - 0x87 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x88 - 0x8f + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x90 - 0x97 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x98 - 0x9f + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xa0 - 0xa7 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xa8 - 0xaf + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xb0 - 0xb7 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xb8 - 0xbf + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xc0 - 0xc7 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xc8 - 0xcf + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xd0 - 0xd7 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xd8 - 0xdf + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xe0 - 0xe7 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xe8 - 0xef + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xf8 - 0xf7 + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 // 0xf8 - 0xff +}; -zone_never_inline() -zone_nonnull_all() -static zone_return_t step(zone_parser_t *parser, zone_token_t *token); +static const uint8_t quoted[256] = { + // 0x00 = "\0" + 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x00 - 0x07 + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x08 - 0x0f + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x10 - 0x17 + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x18 - 0x1f + // 0x22 = "\"" + 0x02, 0x02, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x20 - 0x27 + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x28 - 0x2f + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x30 - 0x37 + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x30 - 0x3f + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x40 - 0x47 + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x40 - 0x4f + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x50 - 0x57 + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x50 - 0x5f + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x60 - 0x67 + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x60 - 0x6f + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x70 - 0x77 + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x70 - 0x7f + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x80 - 0x87 + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x80 - 0x8f + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x90 - 0x97 + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0x90 - 0x9f + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xa0 - 0xa7 + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xa0 - 0xaf + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xb0 - 0xb7 + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xb0 - 0xbf + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xc0 - 0xc7 + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xc0 - 0xcf + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xd0 - 0xd7 + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xd0 - 0xdf + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xe0 - 0xe7 + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xe0 - 0xef + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xf0 - 0xf7 + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02 // 0xf0 - 0xff +}; -zone_always_inline() -zone_nonnull_all() -static inline zone_return_t lex(zone_parser_t *parser, zone_token_t *token) +// +// special buffer used to mark newlines with additional embedded newline count +// +static const char line_feed[ZONE_BLOCK_SIZE] = { '\n', '\0' }; + +zone_nonnull_all +static zone_never_inline void step(zone_parser_t *parser, token_t *token); + +zone_nonnull_all +zone_warn_unused_result +static zone_really_inline int32_t have_contiguous( + zone_parser_t *parser, + const zone_type_info_t *type, + const zone_field_info_t *field, + const token_t *token) { - do { - // safe, as tape is doubly terminated - const char *start = parser->file->indexer.head[0].data; - const char *end = parser->file->indexer.head[1].data; - assert(start < end || (start == end && *start == '\0')); - - switch (zone_jump[ (unsigned char)*start ]) { - case 0: // contiguous - *token = (zone_token_t){ (size_t)(end - start), start }; - // discard index for blank or semicolon - parser->file->indexer.head += zone_forward[ (unsigned char)*end ]; - return ZONE_CONTIGUOUS; - case 1: // quoted - *token = (zone_token_t){ (size_t)(end - start), start + 1 }; - // discard index for closing quote - parser->file->indexer.head += 2; - return ZONE_QUOTED; - case 2: // newline - parser->file->line += parser->file->indexer.head[0].newlines + 1; - parser->file->indexer.head++; - if (parser->file->grouped) - break; - parser->file->start_of_line = (end - start) == 1; - *token = (zone_token_t){ 1, start }; - return ZONE_DELIMITER; - case 3: // end of file - return step(parser, token); - case 4: // left parenthesis - if (parser->file->grouped) - SYNTAX_ERROR(parser, "Nested opening brace"); - parser->file->indexer.head++; - parser->file->grouped = true; - break; - case 5: // right parenthesis - if (!parser->file->grouped) - SYNTAX_ERROR(parser, "Closing brace without opening brace"); - parser->file->indexer.head++; - parser->file->grouped = false; - break; - } - } while (1); + if (zone_likely(token->code == CONTIGUOUS)) + return token->code; + else if (token->code < 0) + return token->code; + else if (token->code == QUOTED) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + assert(token->code == END_OF_FILE || token->code == LINE_FEED); + SYNTAX_ERROR(parser, "Missing %s in %s", NAME(field), NAME(type)); } -zone_always_inline() -zone_nonnull_all() -static inline void lex_field( +zone_nonnull_all +zone_warn_unused_result +static zone_really_inline int32_t have_string( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + const token_t *token) { - if (!lex(parser, token)) - SYNTAX_ERROR(parser, "Missing %s in %s record", - type->name.data, field->name.data); + if (zone_likely(token->code & (CONTIGUOUS | QUOTED))) + return token->code; + else if (token->code < 0) + return token->code; + assert(token->code == END_OF_FILE || token->code == LINE_FEED); + SYNTAX_ERROR(parser, "Missing %s in %s", NAME(field), NAME(type)); } -zone_always_inline() -zone_nonnull_all() -static inline void lex_delimiter( +zone_nonnull_all +zone_warn_unused_result +static zone_really_inline int32_t have_delimiter( zone_parser_t *parser, const zone_type_info_t *type, - zone_token_t *token) + const token_t *token) +{ + if (zone_likely(!(token->code & (CONTIGUOUS | QUOTED)))) + return token->code; + else if (token->code < 0) + return token->code; + assert(token->code == CONTIGUOUS || token->code == QUOTED); + SYNTAX_ERROR(parser, "Trailing data in %s", NAME(type)); +} + +static zone_really_inline bool is_quoted(uint8_t octet) +{ + return quoted[octet] == QUOTED; +} + +static zone_really_inline bool is_contiguous(uint8_t octet) +{ + return contiguous[octet] == CONTIGUOUS; +} + +static zone_really_inline bool is_blank(uint8_t octet) +{ + return contiguous[octet] == BLANK; +} + +zone_nonnull_all +static zone_really_inline int32_t refill(zone_parser_t *parser) +{ + if (parser->file->buffer.length == parser->file->buffer.size) { + size_t size = parser->file->buffer.size + ZONE_WINDOW_SIZE; + char *data = parser->file->buffer.data; + if (!(data = realloc(data, size + 1))) + OUT_OF_MEMORY(parser, "Cannot increase buffer size to %zu", size); + parser->file->buffer.size = size; + parser->file->buffer.data = data; + } + + size_t count = fread(parser->file->buffer.data + parser->file->buffer.length, + sizeof(parser->file->buffer.data[0]), + parser->file->buffer.size - parser->file->buffer.length, + parser->file->handle); + + if (count == 0 && ferror(parser->file->handle)) + SYNTAX_ERROR(parser, "actually a read error"); + + // always null-terminate so terminating token can point to something + parser->file->buffer.length += (size_t)count; + parser->file->buffer.data[parser->file->buffer.length] = '\0'; + parser->file->end_of_file = feof(parser->file->handle) != 0; + return 0; +} + +#define DEFER_ERROR(parser, token, error) \ + do { \ + token->data = NULL; \ + token->code = error; \ + return; \ + } while (0) + +#define DEFER_SYNTAX_ERROR(parser, token, ...) \ + do { \ + ZONE_LOG(parser, ZONE_ERROR, __VA_ARGS__); \ + token->data = NULL; \ + token->code = ZONE_SYNTAX_ERROR; \ + return; \ + } while (0) + +zone_nonnull_all +static zone_really_inline void lex(zone_parser_t *parser, token_t *token) { - if (lex(parser, token)) - SYNTAX_ERROR(parser, "Trailing data in %s record", - type->name.data); + for (;;) { + token->data = *parser->file->fields.head++; + token->code = (int32_t)contiguous[ (uint8_t)*token->data ]; + if (zone_likely(token->code == CONTIGUOUS)) { + return; + } else if (token->code == LINE_FEED) { + if (zone_unlikely(token->data == line_feed)) + parser->file->span += *parser->file->lines.head++; + parser->file->span++; + if (parser->file->grouped) + continue; + parser->file->line += parser->file->span; + parser->file->span = 0; + parser->file->start_of_line = !is_blank((uint8_t)*(token->data+1)); + return; + } else if (token->code == QUOTED) { + token->data++; + return; + } else if (token->code == END_OF_FILE) { + break; + } else if (token->code == LEFT_PAREN) { + if (parser->file->grouped) + DEFER_SYNTAX_ERROR(parser, token, "Nested opening brace"); + parser->file->grouped = true; + } else { + assert(token->code == RIGHT_PAREN); + if (!parser->file->grouped) + DEFER_SYNTAX_ERROR(parser, token, "Missing opening brace"); + parser->file->grouped = false; + } + } + + step(parser, token); } #endif // LEXER_H diff --git a/src/log.c b/src/log.c index 985eb21..0d4d655 100644 --- a/src/log.c +++ b/src/log.c @@ -10,7 +10,6 @@ #include #include #include -#include #include "zone.h" #include "log.h" @@ -75,20 +74,3 @@ void zone_log( log_message(parser, file, line, function, category, format, ap); va_end(ap); } - -void zone_raise( - zone_parser_t *parser, - const char *file, - size_t line, - const char *function, - zone_return_t code, - const char *format, - ...) -{ - va_list ap; - - va_start(ap, format); - log_message(parser, file, line, function, ZONE_ERROR, format, ap); - va_end(ap); - longjmp((void *)parser->environment, code); -} diff --git a/src/log.h b/src/log.h index e5bd805..10bf0df 100644 --- a/src/log.h +++ b/src/log.h @@ -11,30 +11,33 @@ #include #include +#include #include "zone.h" -ZONE_EXPORT -zone_noreturn() -void zone_raise( - zone_parser_t *parser, - const char *file, - size_t line, - const char *function, - zone_return_t code, - const char *format, - ...) -zone_nonnull((1,2,4,6)) -zone_format_printf(6,7); +#define NAME(info) ((info)->name.data) #define RAISE(parser, code, ...) \ - zone_raise(parser, __FILE__, __LINE__, __func__, code, __VA_ARGS__) + do { \ + ZONE_LOG(parser, ZONE_ERROR, __VA_ARGS__); \ + return code; \ + } while (0) #define SYNTAX_ERROR(parser, ...) \ RAISE(parser, ZONE_SYNTAX_ERROR, __VA_ARGS__) +// semantic errors in the zone format are special as a secondary may choose +// to report, but otherwise ignore them. e.g. a TTL with the MSB set. cases +// where the data can be presented in wire format but is otherwise considered +// invalid. e.g. a TTL is limited to 32-bits, values that require more bits +// are invalid without exception, but secondaries may choose to accept values +// with the MSB set in order to update the zone #define SEMANTIC_ERROR(parser, ...) \ - RAISE(parser, ZONE_SEMANTIC_ERROR, __VA_ARGS__) + do { \ + ZONE_LOG(parser, ZONE_ERROR, __VA_ARGS__); \ + if (!parser->options.secondary) \ + return ZONE_SEMANTIC_ERROR; \ + } while (0) #define NOT_IMPLEMENTED(parser, ...) \ RAISE(parser, ZONE_NOT_IMPLEMENTED, __VA_ARGS__) diff --git a/src/parser.c b/src/parser.c index 1d4f17d..11f46df 100644 --- a/src/parser.c +++ b/src/parser.c @@ -20,9 +20,12 @@ #include "log.h" #include "visit.h" +#if _WIN32 +typedef SSIZE_T ssize_t; +#endif + zone_nonnull((1,2,3,4)) -zone_always_inline() -static inline size_t check_bytes( +static zone_really_inline ssize_t check_bytes( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, @@ -32,9 +35,8 @@ static inline size_t check_bytes( { (void)data; if (length < size) - SEMANTIC_ERROR(parser, "Missing %s in %s record", - field->name.data, type->name.data); - return size; + SYNTAX_ERROR(parser, "Missing %s in %s", NAME(field), NAME(type)); + return (ssize_t)size; } #define check_int8(...) check_bytes(__VA_ARGS__, sizeof(uint8_t)) @@ -47,9 +49,9 @@ static inline size_t check_bytes( #define check_ip6(...) check_bytes(__VA_ARGS__, sizeof(struct in6_addr)) -zone_always_inline() + zone_nonnull((1,2,3,4)) -static inline size_t check_ttl( +static zone_really_inline ssize_t check_ttl( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, @@ -59,22 +61,19 @@ static inline size_t check_ttl( uint32_t number; if (length < sizeof(number)) - SEMANTIC_ERROR(parser, "Missing %s in %s record", - field->name.data, type->name.data); + SYNTAX_ERROR(parser, "Missing %s in %s", NAME(field), NAME(type)); memcpy(&number, data, sizeof(number)); number = ntohl(number); if (number > INT32_MAX) - SEMANTIC_ERROR(parser, "Invalid %s in %s record", - field->name.data, type->name.data); + SEMANTIC_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); return 4; } -zone_always_inline() zone_nonnull((1,2,3,4)) -static inline size_t check_type( +static zone_really_inline ssize_t check_type( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, @@ -84,20 +83,18 @@ static inline size_t check_type( uint16_t number; if (length < sizeof(number)) - SEMANTIC_ERROR(parser, "Missing %s in %s record", - field->name.data, type->name.data); + SYNTAX_ERROR(parser, "Missing %s in %s", NAME(field), NAME(type)); memcpy(&number, data, sizeof(number)); if (!number) - SEMANTIC_ERROR(parser, "Invalid %s in %s record", - field->name.data, type->name.data); + SEMANTIC_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + return 2; } -zone_always_inline() zone_nonnull((1,2,3,4)) -static inline size_t check_name( +static zone_really_inline ssize_t check_name( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, @@ -111,14 +108,13 @@ static inline size_t check_name( } if (!count || count > length) - SEMANTIC_ERROR(parser, "Invalid %s in %s record", - field->name.data, type->name.data); - return count; + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + return (ssize_t)count; } -zone_always_inline() zone_nonnull((1,2,3,4)) -static inline size_t check_string( +static zone_really_inline ssize_t check_string( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, @@ -128,14 +124,13 @@ static inline size_t check_string( size_t count; if (!length || (count = 1 + (size_t)data[0]) > length) - SEMANTIC_ERROR(parser, "Invalid %s in %s record", - field->name.data, type->name.data); - return count; + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + return (ssize_t)count; } -zone_always_inline() zone_nonnull((1,2,3,4)) -static inline size_t check_nsec( +static zone_really_inline ssize_t check_nsec( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, @@ -149,19 +144,28 @@ static inline size_t check_nsec( const size_t window = (size_t)data[0]; const size_t blocks = 1 + (size_t)data[1]; if (window < last_window || !window != !last_window) - SEMANTIC_ERROR(parser, "Invalid %s in %s, windows are out-of-order", - field->name.data, type->name.data); + SYNTAX_ERROR(parser, "Invalid %s in %s, windows are out-of-order", + NAME(field), NAME(type)); if (blocks > 32) - SEMANTIC_ERROR(parser, "Invalid %s in %s, blocks are out-of-bounds", - field->name.data, type->name.data); + SYNTAX_ERROR(parser, "Invalid %s in %s, blocks are out-of-bounds", + NAME(field), NAME(type)); count += 2 + blocks; last_window = window; } if (count != length) - SEMANTIC_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); - return count; + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + + return (ssize_t)count; +} + +zone_nonnull((1)) +static int32_t add(size_t *length, ssize_t count) +{ + if (count < 0) + return (int32_t)count; + *length = (size_t)count; + return 0; } diagnostic_push() @@ -169,269 +173,301 @@ clang_diagnostic_ignored(implicit-function-declaration) clang_diagnostic_ignored(missing-prototypes) zone_nonnull((1,2)) -void zone_check_a_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data) +int32_t zone_check_a_rdata( + zone_parser_t *parser, const zone_type_info_t *type) { - size_t count = 0; - const size_t length = parser->rdata->length; - const uint8_t *data = parser->rdata->octets; - const zone_field_info_t *fields = type->rdata.fields; - - count += check_ip4(parser, type, &fields[0], data, length); - if (count != length) - SEMANTIC_ERROR(parser, "Invalid %s record", type->name.data); - - accept_rr(parser, user_data); + int32_t r; + size_t c; + const size_t n = parser->rdata->length; + const uint8_t *o = parser->rdata->octets; + const zone_field_info_t *f = type->rdata.fields; + + if ((r = add(&c, check_ip4(parser, type, &f[0], o, n)))) + return r; + + if (c != n) + SYNTAX_ERROR(parser, "Invalid %s", NAME(type)); + return accept_rr(parser); } zone_nonnull((1,2)) -void zone_check_ns_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data) +int32_t zone_check_ns_rdata( + zone_parser_t *parser, const zone_type_info_t *type) { - size_t count = 0; - const size_t length = parser->rdata->length; - const uint8_t *data = parser->rdata->octets; - const zone_field_info_t *fields = type->rdata.fields; - - count += check_name(parser, type, &fields[0], data, length); - if (count != length) - SEMANTIC_ERROR(parser, "Invalid %s record", type->name.data); - - accept_rr(parser, user_data); + int32_t r; + size_t c; + const size_t n = parser->rdata->length; + const uint8_t *o = parser->rdata->octets; + const zone_field_info_t *f = type->rdata.fields; + + if ((r = add(&c, check_name(parser, type, &f[0], o, n))) < 0) + return r; + + if (c != n) + SYNTAX_ERROR(parser, "Invalid %s", NAME(type)); + return accept_rr(parser); } zone_nonnull((1,2)) -void zone_check_cname_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data) +int32_t zone_check_cname_rdata( + zone_parser_t *parser, const zone_type_info_t *type) { - size_t count = 0; - const size_t length = parser->rdata->length; - const uint8_t *data = parser->rdata->octets; - const zone_field_info_t *fields = type->rdata.fields; - - count += check_name(parser, type, &fields[0], data, length); - if (count != length) - SEMANTIC_ERROR(parser, "Invalid %s record", type->name.data); - accept_rr(parser, user_data); + int32_t r; + size_t c = 0; + const size_t n = parser->rdata->length; + const uint8_t *o = parser->rdata->octets; + const zone_field_info_t *f = type->rdata.fields; + + if ((r = add(&c, check_name(parser, type, &f[0], o, n)))) + return r; + + if (c != n) + SYNTAX_ERROR(parser, "Invalid %s", NAME(type)); + return accept_rr(parser); } zone_nonnull((1,2)) -void zone_check_soa_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data) +int32_t zone_check_soa_rdata( + zone_parser_t *parser, const zone_type_info_t *type) { - size_t count = 0; - const size_t length = parser->rdata->length; - const uint8_t *data = parser->rdata->octets; - const zone_field_info_t *fields = type->rdata.fields; - - count += check_name(parser, type, &fields[0], data, length); - count += check_name(parser, type, &fields[1], data + count, length - count); - count += check_int32(parser, type, &fields[2], data + count, length - count); - count += check_ttl(parser, type, &fields[3], data + count, length - count); - count += check_ttl(parser, type, &fields[4], data + count, length - count); - count += check_ttl(parser, type, &fields[5], data + count, length - count); - count += check_ttl(parser, type, &fields[6], data + count, length - count); - - if (count != length) - SEMANTIC_ERROR(parser, "Invalid %s record", type->name.data); - accept_rr(parser, user_data); + int32_t r; + size_t c = 0; + const size_t n = parser->rdata->length; + const uint8_t *o = parser->rdata->octets; + const zone_field_info_t *f = type->rdata.fields; + + if ((r = add(&c, check_name(parser, type, &f[0], o, n))) || + (r = add(&c, check_name(parser, type, &f[1], o+c, n-c))) || + (r = add(&c, check_int32(parser, type, &f[2], o+c, n-c))) || + (r = add(&c, check_ttl(parser, type, &f[3], o+c, n-c))) || + (r = add(&c, check_ttl(parser, type, &f[4], o+c, n-c))) || + (r = add(&c, check_ttl(parser, type, &f[5], o+c, n-c))) || + (r = add(&c, check_ttl(parser, type, &f[6], o+c, n-c)))) + return r; + + if (c != n) + SYNTAX_ERROR(parser, "Invalid %s", NAME(type)); + return accept_rr(parser); } zone_nonnull((1,2)) -void zone_check_mx_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data) +int32_t zone_check_mx_rdata( + zone_parser_t *parser, const zone_type_info_t *type) { - size_t count = 0; - const size_t length = parser->rdata->length; - const uint8_t *data = parser->rdata->octets; - const zone_field_info_t *fields = type->rdata.fields; - - count += check_int16(parser, type, &fields[0], data, length); - count += check_name(parser, type, &fields[1], data + count, length - count); - - if (count != length) - SEMANTIC_ERROR(parser, "Invalid %s record", type->name.data); - accept_rr(parser, user_data); + int32_t r; + size_t c = 0; + const size_t n = parser->rdata->length; + const uint8_t *o = parser->rdata->octets; + const zone_field_info_t *f = type->rdata.fields; + + if ((r = add(&c, check_int16(parser, type, &f[0], o, n))) || + (r = add(&c, check_name(parser, type, &f[1], o+c, n-c)))) + return r; + + if (c != n) + SYNTAX_ERROR(parser, "Invalid %s", NAME(type)); + return accept_rr(parser); } zone_nonnull((1,2)) -void zone_check_txt_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data) +int32_t zone_check_txt_rdata( + zone_parser_t *parser, const zone_type_info_t *type) { - size_t count = 0; - const size_t length = parser->rdata->length; - const uint8_t *data = parser->rdata->octets; - const zone_field_info_t *fields = type->rdata.fields; - - count += check_string(parser, type, &fields[0], data, length); - while (count < length) - count += check_string(parser, type, &fields[0], data+count, length-count); - - if (count != length) - SEMANTIC_ERROR(parser, "Invalid %s record", type->name.data); - accept_rr(parser, user_data); + int32_t r; + size_t c = 0; + const size_t n = parser->rdata->length; + const uint8_t *o = parser->rdata->octets; + const zone_field_info_t *f = type->rdata.fields; + + if ((r = add(&c, check_string(parser, type, &f[0], o, n)))) + return r; + + while (c < n) + if ((r = add(&c, check_string(parser, type, &f[0], o+c, n-c)))) + return r; + + if (c != n) + SYNTAX_ERROR(parser, "Invalid %s", NAME(type)); + return accept_rr(parser); } zone_nonnull((1,2)) -void zone_check_aaaa_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data) +int32_t zone_check_aaaa_rdata( + zone_parser_t *parser, const zone_type_info_t *type) { - size_t count = 0; - const size_t length = parser->rdata->length; - const uint8_t *data = parser->rdata->octets; - const zone_field_info_t *fields = type->rdata.fields; - - count += check_ip6(parser, type, &fields[0], data, length); - - if (count != length) - SEMANTIC_ERROR(parser, "Invalid %s record", type->name.data); - accept_rr(parser, user_data); + int32_t r; + size_t c = 0; + const size_t n = parser->rdata->length; + const uint8_t *o = parser->rdata->octets; + const zone_field_info_t *f = type->rdata.fields; + + if ((r = add(&c, check_ip6(parser, type, &f[0], o, n)))) + return r; + + if (c != n) + SYNTAX_ERROR(parser, "Invalid %s record", NAME(type)); + return accept_rr(parser); } zone_nonnull((1,2)) -void zone_check_srv_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data) +int32_t zone_check_srv_rdata( + zone_parser_t *parser, const zone_type_info_t *type) { - size_t count = 0; - const size_t length = parser->rdata->length; - const uint8_t *data = parser->rdata->octets; - const zone_field_info_t *fields = type->rdata.fields; - - count += check_int16(parser, type, &fields[0], data, length); - count += check_int16(parser, type, &fields[1], data+count, length-count); - count += check_int16(parser, type, &fields[2], data+count, length-count); - count += check_name(parser, type, &fields[3], data+count, length-count); - - if (count != length) - SEMANTIC_ERROR(parser, "Invalid %s record", type->name.data); - accept_rr(parser, user_data); + int32_t r; + size_t c = 0; + const size_t n = parser->rdata->length; + const uint8_t *o = parser->rdata->octets; + const zone_field_info_t *f = type->rdata.fields; + + if ((r = add(&c, check_int16(parser, type, &f[0], o, n))) || + (r = add(&c, check_int16(parser, type, &f[1], o+c, n-c))) || + (r = add(&c, check_int16(parser, type, &f[2], o+c, n-c))) || + (r = add(&c, check_name(parser, type, &f[3], o+c, n-c)))) + return r; + + if (c != n) + SYNTAX_ERROR(parser, "Invalid %s", NAME(type)); + return accept_rr(parser); } zone_nonnull((1,2)) -void zone_check_ds_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data) +int32_t zone_check_ds_rdata( + zone_parser_t *parser, const zone_type_info_t *type) { - size_t count = 0; - const size_t length = parser->rdata->length; - const uint8_t *data = parser->rdata->octets; - const zone_field_info_t *fields = type->rdata.fields; - - count += check_int16(parser, type, &fields[0], data, length); - count += check_int8(parser, type, &fields[1], data+count, length-count); - count += check_int8(parser, type, &fields[2], data+count, length-count); - - if (count <= length) - SEMANTIC_ERROR(parser, "Invalid %s record", type->name.data); - accept_rr(parser, user_data); + int32_t r; + size_t c = 0; + const size_t n = parser->rdata->length; + const uint8_t *o = parser->rdata->octets; + const zone_field_info_t *f = type->rdata.fields; + + if ((r = add(&c, check_int16(parser, type, &f[0], o, n))) || + (r = add(&c, check_int8(parser, type, &f[1], o+c, n-c))) || + (r = add(&c, check_int8(parser, type, &f[2], o+c, n-c)))) + return r; + + if (c != n) + SYNTAX_ERROR(parser, "Invalid %s", NAME(type)); + return accept_rr(parser); } zone_nonnull((1,2)) -void zone_check_rrsig_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data) +int32_t zone_check_rrsig_rdata( + zone_parser_t *parser, const zone_type_info_t *type) { - size_t count = 0; - const size_t length = parser->rdata->length; - const uint8_t *data = parser->rdata->octets; - const zone_field_info_t *fields = type->rdata.fields; - - count += check_type(parser, type, &fields[0], data, length); - count += check_int8(parser, type, &fields[1], data, length); - count += check_int8(parser, type, &fields[2], data, length); - count += check_ttl(parser, type, &fields[3], data, length); - count += check_int32(parser, type, &fields[4], data, length); - count += check_int32(parser, type, &fields[5], data, length); - count += check_int16(parser, type, &fields[6], data, length); - count += check_name(parser, type, &fields[7], data, length); - - if (count <= length) - SEMANTIC_ERROR(parser, "Invalid %s record", type->name.data); - accept_rr(parser, user_data); + int32_t r; + size_t c = 0; + const size_t n = parser->rdata->length; + const uint8_t *o = parser->rdata->octets; + const zone_field_info_t *f = type->rdata.fields; + + if ((r = add(&c, check_type(parser, type, &f[0], o, n))) || + (r = add(&c, check_int8(parser, type, &f[1], o+c, n-c))) || + (r = add(&c, check_int8(parser, type, &f[2], o+c, n-c))) || + (r = add(&c, check_ttl(parser, type, &f[3], o+c, n-c))) || + (r = add(&c, check_int32(parser, type, &f[4], o+c, n-c))) || + (r = add(&c, check_int32(parser, type, &f[5], o+c, n-c))) || + (r = add(&c, check_int16(parser, type, &f[6], o+c, n-c))) || + (r = add(&c, check_name(parser, type, &f[7], o+c, n-c)))) + return r; + + if (c != n) + SYNTAX_ERROR(parser, "Invalid %s", NAME(type)); + return accept_rr(parser); } zone_nonnull((1,2)) -void zone_check_nsec_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data) +int32_t zone_check_nsec_rdata( + zone_parser_t *parser, const zone_type_info_t *type) { - size_t count = 0; - const size_t length = parser->rdata->length; - const uint8_t *data = parser->rdata->octets; - const zone_field_info_t *fields = type->rdata.fields; - - count += check_name(parser, type, &fields[0], data, length); - count += check_nsec(parser, type, &fields[1], data, length); - - if (count <= length) - SEMANTIC_ERROR(parser, "Invalid %s record", type->name.data); - accept_rr(parser, user_data); + int32_t r; + size_t c = 0; + const size_t n = parser->rdata->length; + const uint8_t *o = parser->rdata->octets; + const zone_field_info_t *f = type->rdata.fields; + + if ((r = add(&c, check_name(parser, type, &f[0], o, n))) || + (r = add(&c, check_nsec(parser, type, &f[1], o+c, n-c)))) + return r; + + if (c != n) + SYNTAX_ERROR(parser, "Invalid %s", NAME(type)); + return accept_rr(parser); } zone_nonnull((1,2)) -void zone_check_dnskey_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data) +int32_t zone_check_dnskey_rdata( + zone_parser_t *parser, const zone_type_info_t *type) { - size_t count = 0; - const size_t length = parser->rdata->length; - const uint8_t *data = parser->rdata->octets; - const zone_field_info_t *fields = type->rdata.fields; - - count += check_int16(parser, type, &fields[0], data, length); - count += check_int8(parser, type, &fields[1], data+count, length-count); - count += check_int8(parser, type, &fields[2], data+count, length-count); - - if (count <= length) - SEMANTIC_ERROR(parser, "Invalid %s record", type->name.data); - - accept_rr(parser, user_data); + int32_t r; + size_t c = 0; + const size_t n = parser->rdata->length; + const uint8_t *o = parser->rdata->octets; + const zone_field_info_t *f = type->rdata.fields; + + if ((r = add(&c, check_int16(parser, type, &f[0], o, n))) || + (r = add(&c, check_int8(parser, type, &f[1], o+c, n-c))) || + (r = add(&c, check_int8(parser, type, &f[2], o+c, n-c)))) + return r; + + if (c != n) + SYNTAX_ERROR(parser, "Invalid %s", NAME(type)); + return accept_rr(parser); } zone_nonnull((1,2)) -void zone_check_nsec3_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data) +int32_t zone_check_nsec3_rdata( + zone_parser_t *parser, const zone_type_info_t *type) { - size_t count = 0; - const size_t length = parser->rdata->length; - const uint8_t *data = parser->rdata->octets; - const zone_field_info_t *fields = type->rdata.fields; - - count += check_int8(parser, type, &fields[0], data, length); - count += check_int8(parser, type, &fields[1], data+count, length-count); - count += check_int16(parser, type, &fields[2], data+count, length-count); - count += check_string(parser, type, &fields[3], data+count, length-count); - count += check_string(parser, type, &fields[4], data+count, length-count); - count += check_nsec(parser, type, &fields[5], data+count, length-count); - - accept_rr(parser, user_data); + int32_t r; + size_t c = 0; + const size_t n = parser->rdata->length; + const uint8_t *o = parser->rdata->octets; + const zone_field_info_t *f = type->rdata.fields; + + if ((r = add(&c, check_int8(parser, type, &f[0], o, n))) || + (r = add(&c, check_int8(parser, type, &f[1], o+c, n-c))) || + (r = add(&c, check_int16(parser, type, &f[2], o+c, n-c))) || + (r = add(&c, check_string(parser, type, &f[3], o+c, n-c))) || + (r = add(&c, check_string(parser, type, &f[4], o+c, n-c))) || + (r = add(&c, check_nsec(parser, type, &f[5], o+c, n-c)))) + return r; + + if (c != n) + SYNTAX_ERROR(parser, "Invalid %s", NAME(type)); + return accept_rr(parser); } zone_nonnull((1,2)) -void zone_check_nsec3param_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data) +int32_t zone_check_nsec3param_rdata( + zone_parser_t *parser, const zone_type_info_t *type) { - size_t count = 0; - const size_t length = parser->rdata->length; - const uint8_t *data = parser->rdata->octets; - const zone_field_info_t *fields = type->rdata.fields; - - count += check_int8(parser, type, &fields[0], data, length); - count += check_int8(parser, type, &fields[1], data+count, length-count); - count += check_int16(parser, type, &fields[2], data+count, length-count); - count += check_string(parser, type, &fields[3], data+count, length-count); - - if (count <= length) - SEMANTIC_ERROR(parser, "Invalid %s record", type->name.data); - - accept_rr(parser, user_data); + int32_t r; + size_t c = 0; + const size_t n = parser->rdata->length; + const uint8_t *o = parser->rdata->octets; + const zone_field_info_t *f = type->rdata.fields; + + if ((r = add(&c, check_int8(parser, type, &f[0], o, n))) || + (r = add(&c, check_int8(parser, type, &f[1], o+c, n-c))) || + (r = add(&c, check_int16(parser, type, &f[2], o+c, n-c))) || + (r = add(&c, check_string(parser, type, &f[3], o+c, n-c)))) + return r; + + if (c != n) + SYNTAX_ERROR(parser, "Invalid %s", NAME(type)); + return accept_rr(parser); } zone_nonnull((1,2)) -void zone_check_unknown_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data) +int32_t zone_check_unknown_rdata( + zone_parser_t *parser, const zone_type_info_t *type) { (void)parser; (void)type; - (void)user_data; - // implement + // FIXME: implement + + return 0; } diagnostic_pop() diff --git a/src/parser.h b/src/parser.h index 4956c6c..0128569 100644 --- a/src/parser.h +++ b/src/parser.h @@ -9,353 +9,375 @@ #ifndef PARSER_H #define PARSER_H -zone_nonnull((1,2)) -extern void zone_check_a_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data); +zone_nonnull_all +extern int32_t zone_check_a_rdata( + zone_parser_t *parser, const zone_type_info_t *type); -zone_nonnull((1,2,3)) -static void parse_a_rdata( - zone_parser_t *parser, - const zone_type_info_t *type, - zone_token_t *token, - void *user_data) +zone_nonnull_all +static int32_t parse_a_rdata( + zone_parser_t *parser, const zone_type_info_t *type, token_t *token) { - parse_ip4(parser, type, &type->rdata.fields[0], token); - - lex_delimiter(parser, type, token); - accept_rr(parser, user_data); -} + int32_t r; -zone_nonnull((1,2)) -extern void zone_check_ns_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data); - -zone_nonnull((1,2,3)) -static void parse_ns_rdata( - zone_parser_t *parser, - const zone_type_info_t *type, - zone_token_t *token, - void *user_data) -{ - parse_name(parser, type, &type->rdata.fields[0], token); - - lex_delimiter(parser, type, token); - accept_rr(parser, user_data); -} - -zone_nonnull((1,2)) -extern void zone_check_cname_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data); - -zone_nonnull((1,2,3)) -static void parse_cname_rdata( - zone_parser_t *parser, - const zone_type_info_t *type, - zone_token_t *token, - void *user_data) -{ - parse_name(parser, type, &type->rdata.fields[0], token); + if ((r = parse_ip4(parser, type, &type->rdata.fields[0], token)) < 0) + return r; + lex(parser, token); + if ((r = have_delimiter(parser, type, token)) < 0) + return r; - lex_delimiter(parser, type, token); - accept_rr(parser, user_data); + return accept_rr(parser); } -zone_nonnull((1,2)) -extern void zone_check_soa_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data); +zone_nonnull_all +extern int32_t zone_check_ns_rdata( + zone_parser_t *parser, const zone_type_info_t *type); -zone_nonnull((1,2,3)) -static void parse_soa_rdata( - zone_parser_t *parser, - const zone_type_info_t *type, - zone_token_t *token, - void *user_data) +zone_nonnull_all +static int32_t parse_ns_rdata( + zone_parser_t *parser, const zone_type_info_t *type, token_t *token) { - parse_name(parser, type, &type->rdata.fields[0], token); - - lex_field(parser, type, &type->rdata.fields[1], token); - parse_name(parser, type, &type->rdata.fields[1], token); - - lex_field(parser, type, &type->rdata.fields[2], token); - parse_int32(parser, type, &type->rdata.fields[2], token); - - lex_field(parser, type, &type->rdata.fields[3], token); - parse_ttl(parser, type, &type->rdata.fields[3], token); - - lex_field(parser, type, &type->rdata.fields[4], token); - parse_ttl(parser, type, &type->rdata.fields[4], token); - - lex_field(parser, type, &type->rdata.fields[5], token); - parse_ttl(parser, type, &type->rdata.fields[5], token); + int32_t r; - lex_field(parser, type, &type->rdata.fields[6], token); - parse_ttl(parser, type, &type->rdata.fields[6], token); + if ((r = parse_name(parser, type, &type->rdata.fields[0], token)) < 0) + return r; + lex(parser, token); + if ((r = have_delimiter(parser, type, token)) < 0) + return r; - lex_delimiter(parser, type, token); - accept_rr(parser, user_data); + return accept_rr(parser); } -zone_nonnull((1,2)) -extern void zone_check_mx_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data); +zone_nonnull_all +extern int32_t zone_check_cname_rdata( + zone_parser_t *parser, const zone_type_info_t *type); -zone_nonnull((1,2,3)) -static void parse_mx_rdata( - zone_parser_t *parser, - const zone_type_info_t *type, - zone_token_t *token, - void *user_data) +zone_nonnull_all +static int32_t parse_cname_rdata( + zone_parser_t *parser, const zone_type_info_t *type, token_t *token) { - parse_int16(parser, type, &type->rdata.fields[0], token); + int32_t r; - lex_field(parser, type, &type->rdata.fields[1], token); - parse_name(parser, type, &type->rdata.fields[1], token); + if ((r = parse_name(parser, type, &type->rdata.fields[0], token)) < 0) + return r; + lex(parser, token); + if ((r = have_delimiter(parser, type, token)) < 0) + return r; - lex_delimiter(parser, type, token); - accept_rr(parser, user_data); + return accept_rr(parser); } -zone_nonnull((1,2)) -extern void zone_check_txt_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data); +zone_nonnull_all +extern int32_t zone_check_soa_rdata( + zone_parser_t *parser, const zone_type_info_t *type); -zone_nonnull((1,2,3)) -static void parse_txt_rdata( - zone_parser_t *parser, - const zone_type_info_t *type, - zone_token_t *token, - void *user_data) +zone_nonnull_all +static int32_t parse_soa_rdata( + zone_parser_t *parser, const zone_type_info_t *type, token_t *token) { - parse_string(parser, type, &type->rdata.fields[0], token); - - while (lex(parser, token)) - parse_string(parser, type, &type->rdata.fields[0], token); - - accept_rr(parser, user_data); + int32_t r; + + if ((r = parse_name(parser, type, &type->rdata.fields[0], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_name(parser, type, &type->rdata.fields[1], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_int32(parser, type, &type->rdata.fields[2], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_ttl(parser, type, &type->rdata.fields[3], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_ttl(parser, type, &type->rdata.fields[4], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_ttl(parser, type, &type->rdata.fields[5], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_ttl(parser, type, &type->rdata.fields[6], token)) < 0) + return r; + lex(parser, token); + if ((r = have_delimiter(parser, type, token)) < 0) + return r; + + return accept_rr(parser); } -zone_nonnull((1,2)) -extern void zone_check_aaaa_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data); +zone_nonnull_all +extern int32_t zone_check_mx_rdata( + zone_parser_t *parser, const zone_type_info_t *type); -zone_nonnull((1,2,3)) -static void parse_aaaa_rdata( - zone_parser_t *parser, - const zone_type_info_t *type, - zone_token_t *token, - void *user_data) +zone_nonnull_all +static int32_t parse_mx_rdata( + zone_parser_t *parser, const zone_type_info_t *type, token_t *token) { - parse_ip6(parser, type, &type->rdata.fields[0], token); - - lex_delimiter(parser, type, token); - accept_rr(parser, user_data); + int32_t r; + + if ((r = parse_int16(parser, type, &type->rdata.fields[0], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_name(parser, type, &type->rdata.fields[1], token)) < 0) + return r; + lex(parser, token); + if ((r = have_delimiter(parser, type, token)) < 0) + return r; + + return accept_rr(parser); } -zone_nonnull((1,2)) -extern void zone_check_srv_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data); +zone_nonnull_all +extern int32_t zone_check_txt_rdata( + zone_parser_t *parser, const zone_type_info_t *type); -zone_nonnull((1,2,3)) -static void parse_srv_rdata( - zone_parser_t *parser, - const zone_type_info_t *type, - zone_token_t *token, - void *user_data) +zone_nonnull_all +static int32_t parse_txt_rdata( + zone_parser_t *parser, const zone_type_info_t *type, token_t *token) { - parse_int16(parser, type, &type->rdata.fields[0], token); + int32_t r; - lex_field(parser, type, &type->rdata.fields[1], token); - parse_int16(parser, type, &type->rdata.fields[1], token); - - lex_field(parser, type, &type->rdata.fields[2], token); - parse_int16(parser, type, &type->rdata.fields[2], token); + do { + if ((r = parse_string(parser, type, &type->rdata.fields[0], token)) < 0) + return r; + lex(parser, token); + } while (token->code & (CONTIGUOUS | QUOTED)); - lex_field(parser, type, &type->rdata.fields[3], token); - parse_name(parser, type, &type->rdata.fields[3], token); + if ((r = have_delimiter(parser, type, token)) < 0) + return r; - lex_delimiter(parser, type, token); - accept_rr(parser, user_data); + return accept_rr(parser); } -zone_nonnull((1,2)) -extern void zone_check_ds_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data); +zone_nonnull_all +extern int32_t zone_check_aaaa_rdata( + zone_parser_t *parser, const zone_type_info_t *type); -zone_nonnull((1,2,3)) -static void parse_ds_rdata( - zone_parser_t *parser, - const zone_type_info_t *type, - zone_token_t *token, - void *user_data) +zone_nonnull_all +static int32_t parse_aaaa_rdata( + zone_parser_t *parser, const zone_type_info_t *type, token_t *token) { - parse_int16(parser, type, &type->rdata.fields[0], token); - - lex_field(parser, type, &type->rdata.fields[1], token); - parse_int8(parser, type, &type->rdata.fields[1], token); + int32_t r; - lex_field(parser, type, &type->rdata.fields[2], token); - parse_int8(parser, type, &type->rdata.fields[2], token); + if ((r = parse_ip6(parser, type, &type->rdata.fields[0], token)) < 0) + return r; + lex(parser, token); + if ((r = have_delimiter(parser, type, token)) < 0) + return r; - lex_field(parser, type, &type->rdata.fields[3], token); - parse_base16(parser, type, &type->rdata.fields[3], token); - - accept_rr(parser, user_data); + return accept_rr(parser); } -zone_nonnull((1,2)) -extern void zone_check_rrsig_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data); +zone_nonnull_all +extern int32_t zone_check_srv_rdata( + zone_parser_t *parser, const zone_type_info_t *type); -zone_nonnull((1,2,3)) -static void parse_rrsig_rdata( - zone_parser_t *parser, - const zone_type_info_t *type, - zone_token_t *token, - void *user_data) +zone_nonnull_all +static int32_t parse_srv_rdata( + zone_parser_t *parser, const zone_type_info_t *type, token_t *token) { - parse_type(parser, type, &type->rdata.fields[0], token); - - lex_field(parser, type, &type->rdata.fields[1], token); - parse_int8(parser, type, &type->rdata.fields[1], token); - - lex_field(parser, type, &type->rdata.fields[2], token); - parse_int8(parser, type, &type->rdata.fields[2], token); - - lex_field(parser, type, &type->rdata.fields[3], token); - parse_ttl(parser, type, &type->rdata.fields[3], token); - - lex_field(parser, type, &type->rdata.fields[4], token); - parse_time(parser, type, &type->rdata.fields[4], token); - - lex_field(parser, type, &type->rdata.fields[5], token); - parse_time(parser, type, &type->rdata.fields[5], token); - - lex_field(parser, type, &type->rdata.fields[6], token); - parse_int16(parser, type, &type->rdata.fields[6], token); - - lex_field(parser, type, &type->rdata.fields[7], token); - parse_name(parser, type, &type->rdata.fields[7], token); - - lex_field(parser, type, &type->rdata.fields[8], token); - parse_base64(parser, type, &type->rdata.fields[8], token); - - accept_rr(parser, user_data); + int32_t r; + + if ((r = parse_int16(parser, type, &type->rdata.fields[0], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_int16(parser, type, &type->rdata.fields[1], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_int16(parser, type, &type->rdata.fields[2], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_name(parser, type, &type->rdata.fields[3], token)) < 0) + return r; + lex(parser, token); + if ((r = have_delimiter(parser, type, token)) < 0) + return r; + + return accept_rr(parser); } -zone_nonnull((1,2)) -extern void zone_check_nsec_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data); +zone_nonnull_all +extern int32_t zone_check_ds_rdata( + zone_parser_t *parser, const zone_type_info_t *type); -zone_nonnull((1,2,3)) -static void parse_nsec_rdata( - zone_parser_t *parser, - const zone_type_info_t *type, - zone_token_t *token, - void *user_data) +zone_nonnull_all +static int32_t parse_ds_rdata( + zone_parser_t *parser, const zone_type_info_t *type, token_t *token) { - parse_name(parser, type, &type->rdata.fields[0], token); - - lex_field(parser, type, &type->rdata.fields[1], token); - parse_nsec(parser, type, &type->rdata.fields[1], token); - - accept_rr(parser, user_data); + int32_t r; + + if ((r = parse_int16(parser, type, &type->rdata.fields[0], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_symbol(parser, type, &type->rdata.fields[1], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_symbol(parser, type, &type->rdata.fields[2], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_base16(parser, type, &type->rdata.fields[3], token)) < 0) + return r; + + return accept_rr(parser); } -zone_nonnull((1,2)) -extern void zone_check_dnskey_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data); +zone_nonnull_all +extern int32_t zone_check_rrsig_rdata( + zone_parser_t *parser, const zone_type_info_t *type); -zone_nonnull((1,2,3)) -static void parse_dnskey_rdata( - zone_parser_t *parser, - const zone_type_info_t *type, - zone_token_t *token, - void *user_data) +zone_nonnull_all +static int32_t parse_rrsig_rdata( + zone_parser_t *parser, const zone_type_info_t *type, token_t *token) { - parse_int16(parser, type, &type->rdata.fields[0], token); - - lex_field(parser, type, &type->rdata.fields[1], token); - parse_int8(parser, type, &type->rdata.fields[1], token); - - lex_field(parser, type, &type->rdata.fields[2], token); - parse_int8(parser, type, &type->rdata.fields[2], token); - - lex_field(parser, type, &type->rdata.fields[3], token); - parse_base64(parser, type, &type->rdata.fields[3], token); - - accept_rr(parser, user_data); + int32_t r; + + if ((r = parse_type(parser, type, &type->rdata.fields[0], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_symbol(parser, type, &type->rdata.fields[1], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_int8(parser, type, &type->rdata.fields[2], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_ttl(parser, type, &type->rdata.fields[3], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_time(parser, type, &type->rdata.fields[4], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_time(parser, type, &type->rdata.fields[5], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_int16(parser, type, &type->rdata.fields[6], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_name(parser, type, &type->rdata.fields[7], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_base64(parser, type, &type->rdata.fields[8], token)) < 0) + return r; + + return accept_rr(parser); } -zone_nonnull((1,2)) -extern void zone_check_nsec3_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data); +zone_nonnull_all +extern int32_t zone_check_nsec_rdata( + zone_parser_t *parser, const zone_type_info_t *type); -zone_nonnull((1,2,3)) -static void parse_nsec3_rdata( - zone_parser_t *parser, - const zone_type_info_t *type, - zone_token_t *token, - void *user_data) +zone_nonnull_all +static int32_t parse_nsec_rdata( + zone_parser_t *parser, const zone_type_info_t *type, token_t *token) { - parse_int8(parser, type, &type->rdata.fields[0], token); - - lex_field(parser, type, &type->rdata.fields[1], token); - parse_int8(parser, type, &type->rdata.fields[1], token); + int32_t r; - lex_field(parser, type, &type->rdata.fields[2], token); - parse_int16(parser, type, &type->rdata.fields[2], token); + if ((r = parse_name(parser, type, &type->rdata.fields[0], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_nsec(parser, type, &type->rdata.fields[1], token)) < 0) + return r; - lex_field(parser, type, &type->rdata.fields[3], token); - parse_salt(parser, type, &type->rdata.fields[3], token); - - lex_field(parser, type, &type->rdata.fields[4], token); - parse_base32(parser, type, &type->rdata.fields[4], token); - - lex_field(parser, type, &type->rdata.fields[5], token); - parse_nsec(parser, type, &type->rdata.fields[5], token); - - accept_rr(parser, user_data); + return accept_rr(parser); } -zone_nonnull((1,2)) -extern void zone_check_nsec3param_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data); +zone_nonnull_all +extern int32_t zone_check_dnskey_rdata( + zone_parser_t *parser, const zone_type_info_t *type); -zone_nonnull((1,2,3)) -static void parse_nsec3param_rdata( - zone_parser_t *parser, - const zone_type_info_t *type, - zone_token_t *token, - void *user_data) +zone_nonnull_all +static int32_t parse_dnskey_rdata( + zone_parser_t *parser, const zone_type_info_t *type, token_t *token) { - parse_int8(parser, type, &type->rdata.fields[0], token); + int32_t r; + + if ((r = parse_int16(parser, type, &type->rdata.fields[0], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_int8(parser, type, &type->rdata.fields[1], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_symbol(parser, type, &type->rdata.fields[2], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_base64(parser, type, &type->rdata.fields[3], token)) < 0) + return r; + + return accept_rr(parser); +} - lex_field(parser, type, &type->rdata.fields[1], token); - parse_int8(parser, type, &type->rdata.fields[1], token); +zone_nonnull_all +extern int32_t zone_check_nsec3_rdata( + zone_parser_t *parser, const zone_type_info_t *type); - lex_field(parser, type, &type->rdata.fields[2], token); - parse_int16(parser, type, &type->rdata.fields[2], token); +zone_nonnull_all +static int32_t parse_nsec3_rdata( + zone_parser_t *parser, const zone_type_info_t *type, token_t *token) +{ + int32_t r; + + if ((r = parse_symbol(parser, type, &type->rdata.fields[0], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_symbol(parser, type, &type->rdata.fields[1], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_int16(parser, type, &type->rdata.fields[2], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_salt(parser, type, &type->rdata.fields[3], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_base32(parser, type, &type->rdata.fields[4], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_nsec(parser, type, &type->rdata.fields[5], token)) < 0) + return r; + + return accept_rr(parser); +} - lex_field(parser, type, &type->rdata.fields[3], token); - parse_salt(parser, type, &type->rdata.fields[3], token); +zone_nonnull_all +extern int32_t zone_check_nsec3param_rdata( + zone_parser_t *parser, const zone_type_info_t *type); - lex_delimiter(parser, type, token); - accept_rr(parser, user_data); +zone_nonnull_all +static int32_t parse_nsec3param_rdata( + zone_parser_t *parser, const zone_type_info_t *type, token_t *token) +{ + int32_t r; + + if ((r = parse_symbol(parser, type, &type->rdata.fields[0], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_symbol(parser, type, &type->rdata.fields[1], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_int16(parser, type, &type->rdata.fields[2], token)) < 0) + return r; + lex(parser, token); + if ((r = parse_salt(parser, type, &type->rdata.fields[3], token)) < 0) + return r; + lex(parser, token); + if ((r = have_delimiter(parser, type, token)) < 0) + return r; + + return accept_rr(parser); } -zone_nonnull((1,2)) +zone_nonnull_all extern void zone_check_unknown_rdata( - zone_parser_t *parser, const zone_type_info_t *type, void *user_data); + zone_parser_t *parser, const zone_type_info_t *type); -zone_nonnull((1,2,3)) -static void parse_unknown_rdata( - zone_parser_t *parser, - const zone_type_info_t *type, - zone_token_t *token, - void *user_data) +zone_nonnull_all +static int32_t parse_unknown_rdata( + zone_parser_t *parser, const zone_type_info_t *type, token_t *token) { - (void)user_data; - parse_base16(parser, type, &type->rdata.fields[0], token); + int32_t r; + + if ((r = parse_base16(parser, type, &type->rdata.fields[0], token)) < 0) + return r; + + // FIXME: verify date using the corresponding check_xxx_rdata function + + return accept_rr(parser); } #define SYMBOLS(symbols) \ @@ -379,8 +401,8 @@ static void parse_unknown_rdata( typedef struct zone_type_descriptor zone_type_descriptor_t; struct zone_type_descriptor { zone_type_info_t info; - void (*check)(zone_parser_t *, const zone_type_info_t *, void *); - void (*parse)(zone_parser_t *, const zone_type_info_t *, zone_token_t *, void *); + int32_t (*check)(zone_parser_t *, const zone_type_info_t *); + int32_t (*parse)(zone_parser_t *, const zone_type_info_t *, token_t *); }; diagnostic_push() @@ -683,43 +705,47 @@ static const zone_type_descriptor_t types[] = { diagnostic_pop() -zone_always_inline() -zone_nonnull_all() -static inline void parse_owner( +zone_nonnull_all +static zone_really_inline int32_t parse_owner( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + const token_t *token) { - // a freestanding "@" denotes the origin - if (token->length == 1 && token->data[0] == '@') { - parser->owner = &parser->file->origin; - return; + int32_t r; + size_t n = 0; + uint8_t *o = parser->file->owner.octets; + + if (zone_likely(token->code == CONTIGUOUS)) { + // a freestanding "@" denotes the origin + if (token->data[0] == '@' && !is_contiguous((uint8_t)token->data[1])) + goto relative; + r = scan_contiguous_name(parser, type, field, token, o, &n); + if (r == 0) + return (void)(parser->owner->length = n), ZONE_NAME; + if (r < 0) + return r; + } else if (token->code == QUOTED) { + r = scan_quoted_name(parser, type, field, token, o, &n); + if (r == 0) + return (void)(parser->owner->length = n), ZONE_NAME; + if (r < 0) + return r; + } else { + return have_string(parser, type, field, token); } - parser->cache.owner.serial++; - if (parser->cache.owner.serial == parser->cache.size) - parser->cache.owner.serial = 0; - - parser->owner = &parser->cache.owner.blocks[parser->cache.owner.serial]; - scan_name(parser, type, field, token, - parser->owner->octets, - &parser->owner->length); - - if (parser->owner->octets[parser->owner->length - 1] == 0) - return; - if (parser->owner->length > 255 - parser->file->origin.length) - SEMANTIC_ERROR(parser, "Invalid name in owner"); - - memcpy(&parser->owner->octets[parser->owner->length], - parser->file->origin.octets, - parser->file->origin.length); - parser->owner->length += parser->file->origin.length; +relative: + if (n > 255 - parser->file->origin.length) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); + memcpy(o+n, parser->file->origin.octets, parser->file->origin.length); + parser->owner->length = n + parser->file->origin.length; + return ZONE_NAME; } -zone_always_inline() -static inline void parse_rr( - zone_parser_t *parser, zone_token_t *token, void *user_data) +zone_nonnull_all +static zone_really_inline int32_t parse_rr( + zone_parser_t *parser, token_t *token) { static const zone_type_info_t unknown = { { 6, "record" }, 0, 0, { 0, NULL } }; @@ -729,112 +755,144 @@ static inline void parse_rr( { { 3, "ttl" }, ZONE_INT32, 0, { 0 } }; static const zone_field_info_t type = { { 4, "type" }, ZONE_INT16, 0, { 0 } }; + static const zone_string_t backslash_hash = { 2, "\\#" }; + int32_t r; const zone_type_descriptor_t *descriptor; uint16_t code; uint32_t epoch; if (parser->file->start_of_line) { parse_owner(parser, &unknown, &owner, token); - lex_field(parser, &unknown, &type, token); + lex(parser, token); } if ((uint8_t)token->data[0] - '0' <= 9) { - scan_ttl(parser, &unknown, &ttl, token, &epoch); + if ((r = scan_ttl(parser, &unknown, &ttl, token, &epoch)) < 0) + return r; goto class_or_type; - } - - switch (scan_type_or_class(parser, &unknown, &type, token, &code)) { - case ZONE_CLASS: - parser->file->last_class = code; - goto ttl_or_type; - default: + } else { + r = scan_type_or_class(parser, &unknown, &type, token, &code); + if (zone_likely(r == ZONE_TYPE)) { parser->file->last_type = code; goto rdata; + } else if (r == ZONE_CLASS) { + parser->file->last_class = code; + goto ttl_or_type; + } else { + assert(r < 0); + return r; + } } ttl_or_type: - lex_field(parser, &unknown, &type, token); - if ((uint8_t)token->data[0] - '0' <= 9) { - scan_ttl(parser, &unknown, &ttl, token, &epoch); + lex(parser, token); + if ((uint8_t)token->data[0] - '0' <= 9) { // << this is illegal before checking the code + if ((r = scan_ttl(parser, &unknown, &ttl, token, &epoch)) < 0) + return r; goto type; } else { - scan_type(parser, &unknown, &type, token, &code); + if ((r = scan_type(parser, &unknown, &type, token, &code)) < 0) + return r; parser->file->last_type = code; goto rdata; } class_or_type: - lex_field(parser, &unknown, &type, token); - switch (scan_type_or_class(parser, &unknown, &type, token, &code)) { - case ZONE_CLASS: - parser->file->last_class = code; - goto type; - default: - parser->file->last_type = code; - goto rdata; + lex(parser, token); + r = scan_type_or_class(parser, &unknown, &type, token, &code); + if (zone_likely(r == ZONE_TYPE)) { + parser->file->last_type = code; + goto rdata; + } else if (r == ZONE_CLASS) { + parser->file->last_class = code; + goto type; + } else { + assert(r < 0); + return r; } type: - lex_field(parser, &unknown, &type, token); - scan_type(parser, &unknown, &type, token, &code); + lex(parser, token); + if ((r = scan_type(parser, &unknown, &type, token, &code)) < 0) + return r; parser->file->last_type = code; rdata: // FIXME: check if type is directly indexable descriptor = &types[code]; - // check if rdata starts with "\#" and, if so, parse generic rdata - lex_field(parser, &descriptor->info, &descriptor->info.rdata.fields[0], token); - parser->rdata->length = 0; - if (token->length == 2 && strncmp(token->data, "\\#", 2) == 0) { - parse_unknown_rdata(parser, &descriptor->info, token, user_data); - descriptor->check(parser, &descriptor->info, user_data); + // check if rdata starts with "\#" and, if so, parse generic rdata + lex(parser, token); + if (token->code == CONTIGUOUS && compare(token, &backslash_hash) == 0) { + parse_unknown_rdata(parser, &descriptor->info, token); + return descriptor->check(parser, &descriptor->info); } else if (descriptor->parse) { - descriptor->parse(parser, &descriptor->info, token, user_data); - } else { - SEMANTIC_ERROR(parser, "Unknown record type in record"); + return descriptor->parse(parser, &descriptor->info, token); } + + SYNTAX_ERROR(parser, "Unknown record type in record"); } // RFC1035 section 5.1 // $INCLUDE [] [] -zone_nonnull((1,2)) -static inline void parse_dollar_include( - zone_parser_t *parser, zone_token_t *token, void *user_data) +zone_nonnull_all +static zone_really_inline int32_t parse_dollar_include( + zone_parser_t *parser, token_t *token) { - static const zone_field_info_t field = - { { 11, "domain-name" }, ZONE_NAME, 0, { 0 } }; + static const zone_field_info_t fields[] = { + { { 9, "file-name" }, ZONE_STRING, 0, { 0 } }, + { { 11, "domain-name" }, ZONE_NAME, 0, { 0 } } + }; static const zone_type_info_t type = - { { 8, "$INCLUDE" }, 0, 0, { 1, &field } }; + { { 8, "$INCLUDE" }, 0, 0, { 1, fields } }; + int32_t r; zone_file_t *includer, *file; zone_name_block_t name; const zone_name_block_t *origin = &parser->file->origin; - zone_return_t result; - - (void)user_data; + const uint8_t *delimiters; if (parser->options.no_includes) NOT_PERMITTED(parser, "$INCLUDE directive is disabled"); - if (!lex(parser, token)) - SYNTAX_ERROR(parser, "$INCLUDE directive takes a file-name argument"); - if ((result = zone_open_file(parser, token, &file)) < 0) - RAISE(parser, result, "Cannot open file specified in $INCLUDE directive"); - - if (lex(parser, token)) { - scan_name(parser, &type, &field, token, name.octets, &name.length); - if (name.octets[name.length - 1] != 0) { - zone_close_file(parser, file); - SYNTAX_ERROR(parser, "$INCLUDE directive requires an absolute domain"); - } - if (lex(parser, token)) { - zone_close_file(parser, file); - SYNTAX_ERROR(parser, "$INCLUDE directive takes at most two arguments"); - } + lex(parser, token); + if (token->code == CONTIGUOUS) + delimiters = contiguous; + else if (token->code == QUOTED) + delimiters = quoted; + else + return have_string(parser, &type, &fields[0], token); + + // FIXME: a more elegant solution probably exists + const char *p = token->data; + for (; delimiters[(uint8_t)*p] == token->code; p++) ; + const size_t n = (size_t)(p - token->data); + + if ((r = zone_open_file(parser, &(zone_string_t){ n, token->data }, &file)) < 0) + return r; + + // $INCLUDE directive may specify an origin + lex(parser, token); + if (token->code == CONTIGUOUS) { + r = scan_contiguous_name( + parser, &type, &fields[1], token, name.octets, &name.length); + if (r < 0) + goto invalid_name; + if (r != 0) + goto relative_name; + origin = &name; + lex(parser, token); + } else if (token->code == QUOTED) { + r = scan_quoted_name( + parser, &type, &fields[1], token, name.octets, &name.length); + if (r < 0) + goto invalid_name; + if (r != 0) + goto relative_name; origin = &name; + lex(parser, token); } // store the current owner to restore later if necessary @@ -848,7 +906,10 @@ static inline void parse_dollar_include( file->last_ttl = includer->last_ttl; file->line = 1; - // check for circular includes + if ((r = have_delimiter(parser, &type, token)) < 0) + return r; + + // check for recursive includes do { if (strcmp(includer->path, file->path) != 0) continue; @@ -857,90 +918,105 @@ static inline void parse_dollar_include( } while ((includer = includer->includer)); parser->file = file; + return 0; +relative_name: + zone_close_file(parser, file); + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(&type), NAME(&fields[1])); +invalid_name: + zone_close_file(parser, file); + return r; } // RFC1035 section 5.1 // $ORIGIN [] zone_nonnull((1,2)) -static inline void parse_dollar_origin( - zone_parser_t *parser, zone_token_t *token, void *user_data) +static inline int32_t parse_dollar_origin( + zone_parser_t *parser, token_t *token) { static const zone_field_info_t field = { { 4, "name" }, ZONE_NAME, 0, { 0 } }; static const zone_type_info_t type = { { 7, "$ORIGIN" }, 0, 0, { 1, &field } }; - (void)user_data; - - if (!lex(parser, token)) - scan_name(parser, &type, &field, token, - parser->file->origin.octets, - &parser->file->origin.length); - if (parser->file->origin.octets[parser->file->origin.length - 1] != 0) - SYNTAX_ERROR(parser, "Invalid name in $ORIGIN, not fully qualified"); - if (lex(parser, token)) - SYNTAX_ERROR(parser, "$ORIGIN takes just a single argument"); + int32_t r; + + lex(parser, token); + if (zone_likely(token->code == CONTIGUOUS)) + r = scan_contiguous_name(parser, &type, &field, token, + parser->file->origin.octets, + &parser->file->origin.length); + else if (token->code == QUOTED) + r = scan_quoted_name(parser, &type, &field, token, + parser->file->origin.octets, + &parser->file->origin.length); + else + return have_string(parser, &type, &field, token); + + if (r < 0) + return r; + if (r > 0) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(&field), NAME(&type)); + + lex(parser, token); + return have_delimiter(parser, &type, token); } // RFC2308 section 4 // $TTL [] zone_nonnull((1,2)) -static inline void parse_dollar_ttl( - zone_parser_t *parser, zone_token_t *token, void *user_data) +static inline int32_t parse_dollar_ttl( + zone_parser_t *parser, token_t *token) { static const zone_field_info_t field = { { 3, "ttl" }, ZONE_INT32, 0, { 0 } }; static const zone_type_info_t type = { { 4, "$TTL" }, 0, 0, { 1, &field } }; - (void)user_data; - - if (!lex(parser, token)) - SYNTAX_ERROR(parser, "$TTL directive takes a ttl argument"); + int32_t r; - scan_ttl(parser, &type, &field, token, &parser->file->default_ttl); + lex(parser, token); + if ((r = scan_ttl(parser, &type, &field, token, + &parser->file->last_ttl)) < 0) + return r; + lex(parser, token); + if ((r = have_delimiter(parser, &type, token)) < 0) + return r; - if (lex(parser, token)) - SYNTAX_ERROR(parser, "$TTL directive takes only a ttl argument"); - - parser->file->last_ttl = parser->file->default_ttl; + parser->file->default_ttl = parser->file->last_ttl; + return 0; } -static inline zone_return_t parse(zone_parser_t *parser, void *user_data) +static inline int32_t parse(zone_parser_t *parser) { static const zone_string_t ttl = { 4, "$TTL" }; static const zone_string_t origin = { 7, "$ORIGIN" }; static const zone_string_t include = { 8, "$INCLUDE" }; - zone_token_t token; - - for (;;) { - switch (lex(parser, &token)) { - case ZONE_CONTIGUOUS: // contiguous - if (!parser->file->start_of_line || token.data[0] != '$') - parse_rr(parser, &token, user_data); - else if (zone_compare(&token, &ttl) == 0) - parse_dollar_ttl(parser, &token, user_data); - else if (zone_compare(&token, &origin) == 0) - parse_dollar_origin(parser, &token, user_data); - else if (zone_compare(&token, &include) == 0) - parse_dollar_include(parser, &token, user_data); - else - parse_rr(parser, &token, user_data); - break; - case ZONE_QUOTED: // quoted (never a directive) - parse_rr(parser, &token, user_data); - break; - case ZONE_DELIMITER: - if (token.data == zone_end_of_file && parser->file->end_of_file == ZONE_NO_MORE_DATA) - return 0; - break; - default: + int32_t r = 0; + token_t token; + + while (r >= 0) { + lex(parser, &token); + if (zone_likely(token.code == CONTIGUOUS)) { + if (!parser->file->start_of_line || token.data[0] != '$') + r = parse_rr(parser, &token); + else if (compare(&token, &ttl) == 0) + r = parse_dollar_ttl(parser, &token); + else if (compare(&token, &origin) == 0) + r = parse_dollar_origin(parser, &token); + else if (compare(&token, &include) == 0) + r = parse_dollar_include(parser, &token); + else + r = parse_rr(parser, &token); + } else if (token.code == QUOTED) { + r = parse_rr(parser, &token); + } else if (token.code == END_OF_FILE) { + if (parser->file->end_of_file == ZONE_NO_MORE_DATA) break; } } - return 0; + return r; } #endif // PARSER_H diff --git a/src/table.c b/src/table.c deleted file mode 100644 index 1040f0d..0000000 --- a/src/table.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - * table.h -- some useful comment - * - * Copyright (c) 2022-2023, NLnet Labs. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - * - */ -#include -#include - -#if _WIN32 -#define strncasecmp(s1, s2, n) _strnicmp(s1, s2, n) -#endif - -#include "zone.h" - -int zone_compare(const void *p1, const void *p2) -{ - const zone_string_t *s1 = p1, *s2 = p2; - assert(s1 && s1->data && s1->length); - assert(s2 && s2->data && s2->length); - int eq; - const size_t n = s1->length < s2->length ? s1->length : s2->length; - if ((eq = strncasecmp(s1->data, s2->data, n)) != 0) - return eq; - return s1->length < s2->length ? -1 : (s1->length > s2->length ? +1 : 0); -} - -extern inline zone_symbol_t * -zone_lookup(const zone_table_t *table, const zone_string_t *string); diff --git a/src/table.h b/src/table.h new file mode 100644 index 0000000..cf94f62 --- /dev/null +++ b/src/table.h @@ -0,0 +1,31 @@ +#ifndef TABLE_H +#define TABLE_H + +#include + +#if _WIN32 +#define strncasecmp(s1, s2, n) _strnicmp(s1, s2, n) +#else +#include +#endif + +static int compare(const void *p1, const void *p2) +{ + int r; + const token_t *t = p1; + const zone_string_t *s = p2; + assert(s->length <= ZONE_BLOCK_SIZE); + if ((r = strncasecmp(t->data, s->data, s->length)) != 0) + return r; + // make sure symbol is followed by non-contiguous to avoid matching wrong + // symbol based on prefix. e.g. NSEC3 vs. NSEC3PARAM + return contiguous[ (uint8_t)t->data[s->length] ] == CONTIGUOUS; +} + +static const zone_symbol_t *lookup_symbol( + const zone_table_t *table, const token_t *token) +{ + return bsearch(token, table->symbols, table->length, sizeof(zone_symbol_t), compare); +} + +#endif // TABLE_H diff --git a/src/visit.h b/src/visit.h index d7f145a..cbd4c8a 100644 --- a/src/visit.h +++ b/src/visit.h @@ -9,10 +9,7 @@ #ifndef VISIT_H #define VISIT_H -#include - -zone_always_inline() -static inline void accept_rr(zone_parser_t *parser, void *user_data) +static zone_really_inline int32_t accept_rr(zone_parser_t *parser) { zone_return_t result; @@ -26,12 +23,13 @@ static inline void accept_rr(zone_parser_t *parser, void *user_data) parser->file->last_ttl, (uint16_t)parser->rdata->length, parser->rdata->octets, - user_data); + parser->user_data); - if (result < 0) - longjmp((void*)parser->environment, result); assert((size_t)result < parser->cache.size); + if (result < 0) + return result; parser->rdata = &parser->cache.rdata.blocks[result]; + return 0; } #endif // VISIT_H diff --git a/src/westmere/bench.c b/src/westmere/bench.c index 1c52a55..fe71166 100644 --- a/src/westmere/bench.c +++ b/src/westmere/bench.c @@ -17,16 +17,18 @@ diagnostic_push() clang_diagnostic_ignored(missing-prototypes) -zone_return_t zone_bench_westmere_lex(zone_parser_t *parser, size_t *tokens) +int32_t zone_bench_westmere_lex(zone_parser_t *parser, size_t *tokens) { - zone_token_t token; - zone_return_t result; + token_t token; (*tokens) = 0; - while ((result = lex(parser, &token)) >= 0 && token.data != zone_end_of_file) + lex(parser, &token); + while (token.code > 0) { (*tokens)++; + lex(parser, &token); + } - return result; + return token.code ? -1 : 0; } diagnostic_pop() diff --git a/src/westmere/delimited.h b/src/westmere/delimited.h new file mode 100644 index 0000000..4326aa3 --- /dev/null +++ b/src/westmere/delimited.h @@ -0,0 +1,50 @@ +/* + * string.h -- some useful comment + * + * Copyright (c) 2023, NLnet Labs. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + */ +#ifndef DELIMITED_H +#define DELIMITED_H + +zone_nonnull_all +static zone_really_inline void copy_and_scan_delimited( + delimited_t *block, + const simd_table_t delimiter, + const simd_table_t space, + const char *source, + uint8_t *destination) +{ + __m128i b = _mm_loadu_si128((const __m128i *)space); + __m128i d = _mm_loadu_si128((const __m128i *)delimiter); + + simd_loadu_8x(&block->input, (const uint8_t *)source); + b = _mm_shuffle_epi8(b, block->input.chunks[0]); + d = _mm_shuffle_epi8(d, block->input.chunks[0]); + simd_storeu_8x(destination, &block->input); + b = _mm_cmpeq_epi8(block->input.chunks[0], b); + d = _mm_cmpeq_epi8(block->input.chunks[0], d); + block->delimiter = (uint16_t)_mm_movemask_epi8(_mm_or_si128(b, d)); +} + +zone_nonnull_all +static zone_really_inline void scan_delimited( + delimited_t *block, + const simd_table_t delimiter, + const simd_table_t space, + const char *source) +{ + __m128i b = _mm_loadu_si128((const __m128i *)space); + __m128i d = _mm_loadu_si128((const __m128i *)delimiter); + + simd_loadu_8x(&block->input, (const uint8_t *)source); + b = _mm_shuffle_epi8(b, block->input.chunks[0]); + d = _mm_shuffle_epi8(d, block->input.chunks[0]); + b = _mm_cmpeq_epi8(block->input.chunks[0], b); + d = _mm_cmpeq_epi8(block->input.chunks[0], d); + block->delimiter = (uint16_t)_mm_movemask_epi8(_mm_or_si128(b, d)); +} + +#endif // STRING_H diff --git a/src/westmere/ip4.h b/src/westmere/ip4.h index 00a55c8..0b533f2 100644 --- a/src/westmere/ip4.h +++ b/src/westmere/ip4.h @@ -115,100 +115,6 @@ static const uint8_t patterns[81][16] = { {2, 1, 6, 5, 10, 9, 14, 13, 0, 0, 4, 4, 8, 8, 12, 12}, }; - -#if 0 - -// convert IPv4 from text to binary form. -// -// ipv4_string points to a character string containing an IPv4 network address in dotted-decimal format -// "ddd.ddd.ddd.ddd" of length ipv4_string_length (the string does not have to be null terminated), -// where ddd is a decimal number of up to three digits in the range 0 to 255. -// The address is converted to a 32-bit integer (destination) (in network byte order). -// -// Important: the function will systematically read 16 bytes at the provided address (ipv4_string). However, -// only the first ipv4_string_length bytes are processed. -// -// returns 1 on success (network address was successfully converted). -// -// This function assumes that the processor supports SSE 4.1 instructions or better. That's true of most -// processors in operation today (June 2023). -// -// See also sse_inet_aton_16 for a version that does not take a string length. -static inline int sse_inet_aton(const char* ipv4_string, const size_t ipv4_string_length, uint8_t * destination) { - // This function always reads 16 bytes. With AVX-512 we can do a mask - // load, but it is not generally available with SSE 4.1. - const __m128i input = _mm_loadu_si128((const __m128i *)ipv4_string); - if (ipv4_string_length > 15) { - return 0; - } - // locate dots - uint16_t dotmask; - { - const __m128i dot = _mm_set1_epi8('.'); - const __m128i t0 = _mm_cmpeq_epi8(input, dot); - dotmask = (uint16_t)_mm_movemask_epi8(t0); - uint16_t mask = (uint16_t)(1 << ipv4_string_length); - dotmask &= mask - 1; - dotmask |= mask; - } - - // build a hashcode - const uint8_t hashcode = ((6639 * dotmask) >> 13); - - // grab the index of the shuffle mask - const uint8_t id = patterns_id[hashcode]; - if (id >= 81) { - return 0; - } - const uint8_t *pat = &patterns[id][0]; - const __m128i pattern = _mm_loadu_si128((const __m128i *)pat); - // The value of the shuffle mask at a specific index points at the last digit, - // we check that it matches the length of the input. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i t0 = input; - __m128i t1 = _mm_shuffle_epi8(t0, pattern); - // check that leading digits of 2- 3- numbers are not zeros. - { - const __m128i eq0 = _mm_cmpeq_epi8(t1, ascii0); - if (!_mm_testz_si128(eq0, _mm_set_epi8(-1, 0, -1, 0, -1, 0, -1, 0, - 0, 0, 0, 0, 0, 0, 0, 0))) { - return 0; - } - } - // replace null values with '0' - __m128i t1b = _mm_blendv_epi8(t1, ascii0, pattern); - - // subtract '0' - const __m128i t2 = _mm_sub_epi8(t1b, ascii0); - // check that everything was in the range '0' to '9' - { - const __m128i c9 = _mm_set1_epi8('9' - '0'); - const __m128i t2m = _mm_max_epu8(t2, c9); - const __m128i t2me = _mm_cmpeq_epi8(t2m, c9); - if (!_mm_test_all_ones(t2me)) { - return 0; - } - } - // We do the computation, the Mula way. - const __m128i weights = - _mm_setr_epi8(1, 10, 1, 10, 1, 10, 1, 10, 100, 0, 100, 0, 100, 0, 100, 0); - const __m128i t3 = _mm_maddubs_epi16(t2, weights); - const __m128i t4 = _mm_alignr_epi8(t3, t3, 8); - const __m128i t5 = _mm_add_epi16(t4, t3); - // Test that we don't overflow (over 255) - if (!_mm_testz_si128(t5, _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, - 0, -1, 0, -1, 0))) { - return 0; - } - // pack and we are done! - const __m128i t6 = _mm_packus_epi16(t5, t5); - uint32_t address = (uint32_t)_mm_cvtsi128_si32(t6); - memcpy(destination, &address, 4); - return (int)(ipv4_string_length - (size_t)pat[6]); -} - -#endif - // convert IPv4 from text to binary form. // // ipv4_string points to a character string containing an IPv4 network address in dotted-decimal format @@ -223,9 +129,7 @@ static inline int sse_inet_aton(const char* ipv4_string, const size_t ipv4_strin // // This function assumes that the processor supports SSE 4.1 instructions or better. That's true of most // processors in operation today (June 2023). -// -// See also sse_inet_aton for a version that takes a string length -static inline int sse_inet_aton_16(const char* ipv4_string, uint8_t* destination, size_t* restrict ipv4_string_length) { +static inline int sse_inet_aton(const char* ipv4_string, uint8_t* destination, size_t* restrict ipv4_string_length) { const __m128i input = _mm_loadu_si128((const __m128i *)ipv4_string); const __m128i dot = _mm_set1_epi8('.'); // locate dots @@ -298,24 +202,26 @@ static inline int sse_inet_aton_16(const char* ipv4_string, uint8_t* destination return (int)(*ipv4_string_length - (size_t)pat[6]); } -zone_always_inline() -zone_nonnull_all() -static inline void parse_ip4( +zone_nonnull_all +static zone_really_inline int32_t parse_ip4( zone_parser_t *parser, const zone_type_info_t *type, const zone_field_info_t *field, - zone_token_t *token) + const token_t *token) { - if (token->length > INET_ADDRSTRLEN) - SEMANTIC_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); - size_t computed_length; + int32_t r; + size_t n; + uint8_t *o = &parser->rdata->octets[parser->rdata->length]; + + if ((r = have_contiguous(parser, type, field, token)) < 0) + return r; + // Note that this assumes that reading up to token->data + 16 is safe (i.e., we do not cross a page). - if ((sse_inet_aton_16(token->data, &parser->rdata->octets[parser->rdata->length], &computed_length) != 1) - || (computed_length != token->length)) - SEMANTIC_ERROR(parser, "Invalid %s in %s", - field->name.data, type->name.data); + if (sse_inet_aton(token->data, o, &n) != 1 || + is_contiguous((uint8_t)token->data[n])) + SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); parser->rdata->length += sizeof(struct in_addr); + return ZONE_IP4; } #endif // IP4_H diff --git a/src/westmere/name.h b/src/westmere/name.h new file mode 100644 index 0000000..e69de29 diff --git a/src/westmere/parser.c b/src/westmere/parser.c index 50d152a..d7195c6 100644 --- a/src/westmere/parser.c +++ b/src/westmere/parser.c @@ -6,17 +6,15 @@ * See LICENSE for the license. * */ -#define _XOPEN_SOURCE -#include -#undef _XOPEN_SOURCE - #include "zone.h" #include "diagnostic.h" #include "log.h" #include "westmere/simd.h" #include "westmere/bits.h" #include "lexer.h" +#include "table.h" #include "generic/scanner.h" +#include "westmere/delimited.h" #include "generic/number.h" #include "generic/ttl.h" #include "generic/time.h" @@ -35,9 +33,9 @@ diagnostic_push() clang_diagnostic_ignored(missing-prototypes) -zone_return_t zone_westmere_parse(zone_parser_t *parser, void *user_data) +int32_t zone_westmere_parse(zone_parser_t *parser) { - return parse(parser, user_data); + return parse(parser); } diagnostic_pop() diff --git a/src/westmere/simd.h b/src/westmere/simd.h index 16d4fff..90795f2 100644 --- a/src/westmere/simd.h +++ b/src/westmere/simd.h @@ -29,32 +29,28 @@ typedef simd_8x_t simd_8x16_t; typedef struct { __m128i chunks[4]; } simd_8x64_t; -zone_always_inline() -zone_nonnull_all() -static inline void simd_loadu_8x(simd_8x_t *simd, const uint8_t *address) +zone_nonnull_all +static zone_really_inline void simd_loadu_8x(simd_8x_t *simd, const uint8_t *address) { simd->chunks[0] = _mm_loadu_si128((const __m128i *)address); } -zone_always_inline() -zone_nonnull_all() -static inline void simd_storeu_8x(uint8_t *address, const simd_8x_t *simd) +zone_nonnull_all +static zone_really_inline void simd_storeu_8x(uint8_t *address, const simd_8x_t *simd) { _mm_storeu_si128((__m128i *)address, simd->chunks[0]); } -zone_always_inline() -zone_nonnull_all() -static inline uint64_t simd_find_8x(const simd_8x_t *simd, char key) +zone_nonnull_all +static zone_really_inline uint64_t simd_find_8x(const simd_8x_t *simd, char key) { const __m128i k = _mm_set1_epi8(key); const __m128i r = _mm_cmpeq_epi8(simd->chunks[0], k); return (uint16_t)_mm_movemask_epi8(r); } -zone_always_inline() -zone_nonnull_all() -static inline uint64_t simd_find_any_8x( +zone_nonnull_all +static zone_really_inline uint64_t simd_find_any_8x( const simd_8x_t *simd, const simd_table_t table) { const __m128i t = _mm_loadu_si128((const __m128i *)table); @@ -66,9 +62,8 @@ static inline uint64_t simd_find_any_8x( #define simd_loadu_8x16(simd, address) simd_loadu_8x(simd, address) #define simd_find_8x16(simd, key) simd_find_8x(simd, key) -zone_always_inline() -zone_nonnull_all() -static inline void simd_loadu_8x64(simd_8x64_t *simd, const uint8_t *address) +zone_nonnull_all +static zone_really_inline void simd_loadu_8x64(simd_8x64_t *simd, const uint8_t *address) { simd->chunks[0] = _mm_loadu_si128((const __m128i *)(address)); simd->chunks[1] = _mm_loadu_si128((const __m128i *)(address+16)); @@ -76,9 +71,8 @@ static inline void simd_loadu_8x64(simd_8x64_t *simd, const uint8_t *address) simd->chunks[3] = _mm_loadu_si128((const __m128i *)(address+48)); } -zone_always_inline() -zone_nonnull_all() -static inline uint64_t simd_find_8x64(const simd_8x64_t *simd, char key) +zone_nonnull_all +static zone_really_inline uint64_t simd_find_8x64(const simd_8x64_t *simd, char key) { const __m128i k = _mm_set1_epi8(key); @@ -95,9 +89,8 @@ static inline uint64_t simd_find_8x64(const simd_8x64_t *simd, char key) return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48); } -zone_always_inline() -zone_nonnull_all() -static inline uint64_t simd_find_any_8x64( +zone_nonnull_all +static zone_really_inline uint64_t simd_find_any_8x64( const simd_8x64_t *simd, const simd_table_t table) { const __m128i t = _mm_loadu_si128((const __m128i *)table); diff --git a/src/zone.c b/src/zone.c index 1a6041c..a03a800 100644 --- a/src/zone.c +++ b/src/zone.c @@ -14,7 +14,6 @@ #include #include #include -#include #if _WIN32 # include #endif @@ -40,7 +39,7 @@ static char *strndup(const char *s, size_t n) static const char not_a_file[] = ""; -static zone_return_t check_options(const zone_options_t *options) +static int32_t check_options(const zone_options_t *options) { if (!options->accept.add) return ZONE_BAD_PARAMETER; @@ -100,11 +99,11 @@ static int parse_origin(const char *origin, uint8_t str[255], size_t *len) #include "isadetection.h" #if HAVE_HASWELL -extern zone_return_t zone_haswell_parse(zone_parser_t *, void *); +extern int32_t zone_haswell_parse(zone_parser_t *, void *); #endif #if HAVE_WESTMERE -extern zone_return_t zone_westmere_parse(zone_parser_t *, void *); +extern int32_t zone_westmere_parse(zone_parser_t *, void *); #endif extern zone_return_t zone_fallback_parse(zone_parser_t *, void *); @@ -113,7 +112,7 @@ typedef struct target target_t; struct target { const char *name; uint32_t instruction_set; - zone_return_t (*parse)(zone_parser_t *, void *); + int32_t (*parse)(zone_parser_t *, void *); }; static const target_t targets[] = { @@ -126,6 +125,9 @@ static const target_t targets[] = { { "fallback", 0, &zone_fallback_parse } }; +diagnostic_push() +msvc_diagnostic_ignored(4996) + static inline const target_t * select_target(void) { @@ -149,29 +151,23 @@ select_target(void) return &targets[length - 1]; } -static zone_return_t parse(zone_parser_t *parser, void *user_data) +diagnostic_pop() + +static int32_t parse(zone_parser_t *parser, void *user_data) { const target_t *target; - zone_return_t result; target = select_target(); assert(target); - - switch ((result = setjmp((void *)parser->environment))) { - case 0: - result = target->parse(parser, user_data); - assert(result == ZONE_SUCCESS); - break; - default: - assert(result < 0); - break; - } - - return result; + parser->user_data = user_data; + return target->parse(parser, user_data); } -zone_nonnull_all() -static zone_return_t open_file( +diagnostic_push() +msvc_diagnostic_ignored(4996) + +zone_nonnull_all +static int32_t open_file( zone_parser_t *parser, zone_file_t *file, const zone_string_t *path) { (void)parser; @@ -181,7 +177,7 @@ static zone_return_t open_file( #if _WIN32 char buf[1]; - size_t length, size = GetFullPathName(file->name, sizeof(buf), buf, NULL); + DWORD length, size = GetFullPathName(file->name, sizeof(buf), buf, NULL); if (!size) return ZONE_IO_ERROR; if (!(file->path = malloc(size))) @@ -215,13 +211,18 @@ static zone_return_t open_file( file->buffer.index = 0; file->start_of_line = true; file->end_of_file = ZONE_HAVE_DATA; - file->indexer.tape[0] = (zone_index_t){ file->buffer.data, 0 }; - file->indexer.tape[1] = (zone_index_t){ file->buffer.data, 0 }; - file->indexer.head = file->indexer.tape; - file->indexer.tail = file->indexer.tape; + file->fields.tape[0] = file->buffer.data; + file->fields.tape[1] = NULL; + file->fields.head = file->fields.tape; + file->fields.tail = file->fields.tape; + file->lines.tape[0] = 0; + file->lines.head = file->lines.tape; + file->lines.tail = file->lines.tape; return 0; } +diagnostic_pop() + static void set_defaults(zone_parser_t *parser) { if (!parser->options.log.write && !parser->options.log.categories) @@ -233,7 +234,7 @@ static void set_defaults(zone_parser_t *parser) diagnostic_push() clang_diagnostic_ignored(missing-prototypes) -zone_nonnull_all() +zone_nonnull_all void zone_close_file( zone_parser_t *parser, zone_file_t *file) { @@ -246,10 +247,10 @@ void zone_close_file( if (file->buffer.data) free(file->buffer.data); file->buffer.data = NULL; - if (file->name) + if (file->name && file->name != not_a_file) free((char *)file->name); file->name = NULL; - if (file->path) + if (file->path && file->name != not_a_file) free((char *)file->path); file->path = NULL; (void)fclose(file->handle); @@ -258,16 +259,16 @@ void zone_close_file( free(file); } -zone_nonnull_all() -zone_return_t zone_open_file( +zone_nonnull_all +int32_t zone_open_file( zone_parser_t *parser, const zone_string_t *path, zone_file_t **fileptr) { zone_file_t *file; - zone_return_t result; + int32_t result; if (!(file = malloc(sizeof(*file)))) return ZONE_OUT_OF_MEMORY; - memset(file, 0, sizeof(*file) - sizeof(file->indexer.tape)); + memset(file, 0, sizeof(*file));// - sizeof(file->fields.tape)); if ((result = open_file(parser, file, path)) < 0) goto err_open; @@ -290,7 +291,7 @@ void zone_close(zone_parser_t *parser) } } -zone_return_t zone_open( +int32_t zone_open( zone_parser_t *parser, const zone_options_t *options, zone_cache_t *cache, @@ -298,7 +299,7 @@ zone_return_t zone_open( void *user_data) { zone_file_t *file; - zone_return_t result; + int32_t result; if ((result = check_options(options)) < 0) return result; @@ -332,25 +333,23 @@ zone_return_t zone_open( diagnostic_pop() -zone_return_t zone_parse( +int32_t zone_parse( zone_parser_t *parser, const zone_options_t *options, zone_cache_t *cache, const char *path, void *user_data) { - zone_return_t result; - volatile jmp_buf environment; + int32_t result; if ((result = zone_open(parser, options, cache, path, user_data)) < 0) return result; - parser->environment = &environment; result = parse(parser, user_data); zone_close(parser); return result; } -zone_return_t zone_parse_string( +int32_t zone_parse_string( zone_parser_t *parser, const zone_options_t *options, zone_cache_t *cache, @@ -359,8 +358,7 @@ zone_return_t zone_parse_string( void *user_data) { zone_file_t *file; - zone_return_t result; - volatile jmp_buf environment; + int32_t result; if ((result = check_options(options)) < 0) return result; @@ -372,8 +370,8 @@ zone_return_t zone_parse_string( if ((result = parse_origin(options->origin, file->origin.octets, &file->origin.length)) < 0) return result; - file->name = not_a_file; - file->path = not_a_file; + file->name = (char *)not_a_file; + file->path = (char *)not_a_file; file->handle = NULL; file->buffer.index = 0; file->buffer.length = length; @@ -381,10 +379,13 @@ zone_return_t zone_parse_string( file->buffer.data = (char *)string; file->start_of_line = true; file->end_of_file = ZONE_READ_ALL_DATA; - file->indexer.tape[0] = (zone_index_t){ "\0", 0 }; - file->indexer.tape[1] = (zone_index_t){ "\0", 0 }; - file->indexer.head = file->indexer.tape; - file->indexer.tail = file->indexer.tape; + file->fields.tape[0] = "\0"; + file->fields.tape[1] = NULL; + file->fields.head = file->fields.tape; + file->fields.tail = file->fields.tape; + file->lines.tape[0] = 0; + file->lines.head = file->lines.tape; + file->lines.tail = file->lines.tape; parser->cache.size = cache->size; parser->cache.owner.serial = 0; @@ -397,7 +398,6 @@ zone_return_t zone_parse_string( file->line = 1; set_defaults(parser); - parser->environment = &environment; result = parse(parser, user_data); zone_close(parser); return result; diff --git a/tests/include.c b/tests/include.c index 1b55d9e..f4b33a8 100644 --- a/tests/include.c +++ b/tests/include.c @@ -17,6 +17,7 @@ #endif #include "zone.h" +#include "diagnostic.h" typedef struct input input_t; struct input { @@ -27,6 +28,9 @@ struct input { } includer, include; }; +diagnostic_push() +msvc_diagnostic_ignored(4996) + /*!cmocka */ int teardown(void **state) { @@ -119,6 +123,8 @@ int setup(void **state) return -1; } +diagnostic_pop() + static zone_return_t add_rr( zone_parser_t *parser, const zone_name_t *owner, diff --git a/tests/types.c b/tests/types.c index 86758af..0f25aae 100644 --- a/tests/types.c +++ b/tests/types.c @@ -19,7 +19,7 @@ #include "zone.h" // automatically pad string literal -#define TEXT(literal) \ +#define PAD(literal) \ literal \ "\0\0\0\0\0\0\0\0" /* 0 - 7 */ \ "\0\0\0\0\0\0\0\0" /* 8 - 15 */ \ @@ -47,14 +47,14 @@ struct field { #define RDATA(x) x, sizeof(x)/sizeof(x[0]) -static const char a_text[] = TEXT("host.example.com. 1 IN A 192.0.2.1"); +static const char a_text[] = PAD("host.example.com. 1 IN A 192.0.2.1"); static const field_t a[] = { IP4(0, 16908480) }; -static const char ns_text[] = TEXT("example.com. 1 IN NS host.example.com."); +static const char ns_text[] = PAD("example.com. 1 IN NS host.example.com."); static const field_t ns[] = { NAME(ZONE_COMPRESSED, From 18b0d24b788ab1c092d86b4b99d4805e63b3f57f Mon Sep 17 00:00:00 2001 From: Jeroen Koekkoek Date: Thu, 8 Jun 2023 16:47:32 +0200 Subject: [PATCH 2/3] Update numbers in README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 176937f..73095cb 100644 --- a/README.md +++ b/README.md @@ -31,9 +31,9 @@ $ time ./zone-bench parse ../../zones/com.zone Selected target haswell Parsed 341535548 records -real 0m20.629s -user 0m19.328s -sys 0m1.244s +real 0m19.834s +user 0m18.612s +sys 0m1.176s ``` There are bound to be bugs and quite possibly smarter ways of implementing From a3482aff134683670daf6b6ee8c518ba2415826a Mon Sep 17 00:00:00 2001 From: Jeroen Koekkoek Date: Wed, 14 Jun 2023 17:36:49 +0200 Subject: [PATCH 3/3] Update FORMAT.md to reflect recent changes --- FORMAT.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/FORMAT.md b/FORMAT.md index 96a34bd..b5a029c 100644 --- a/FORMAT.md +++ b/FORMAT.md @@ -62,9 +62,8 @@ characters that overlap with any structural characters and in practice, it really never happens. The same applies to base64 sequences, which was specifically designed to encode binary data in printable ASCII characters. To quote a field and include whitespace is more-or-less instructing the parser -to not ignore it. The current implementation is such that any field may be -quoted, but it MUST actually disallow fields that cannot contain structural -characters to be quoted. +to not ignore it. Fields that cannot contain structural characters, i.e. +anything other than domain names and text strings, MUST not be quoted. > BIND does not accept quoted fields for A or NS RDATA. TTL values in SOA > RDATA, base64 Signature in DNSKEY RDATA, as well as type, class and TTL