NLnetLabs · k0ekk0ek · Sep 15, 2023 · Aug 28, 2023 · Sep 4, 2023 · Sep 15, 2023
diff --git a/README.md b/README.md
@@ -23,17 +23,17 @@ similar performance boost for parsing zone data.
 
 ## Results
 Running `zone-bench` on my system (Intel Core i7-1065G7) against an older
-`.com` zone file of 12482791271 bytes under Linux (Fedora 37).
+`.com` zone file of 12482791271 bytes under Linux (Fedora 38).
 
-clang version 15.0.7, release mode:
+clang version 16.0.6, release mode:
 ```
 $ time ./zone-bench parse ../../zones/com.zone
 Selected target haswell
 Parsed 341535548 records
 
-real    0m16.344s
-user    0m15.125s
-sys     0m1.165s
+real    0m14.812s
+user    0m13.704s
+sys     0m1.088s
 ```
 
 There are bound to be bugs and quite possibly smarter ways of implementing

diff --git a/conanfile.txt b/conanfile.txt
@@ -5,4 +5,4 @@ cmocka/1.1.5
 CMakeDeps
 
 [options]
-cmocka/*:shared=True
+cmocka*:shared=False
diff --git a/include/zone.h b/include/zone.h
@@ -298,25 +298,21 @@ typedef enum {
 #define ZONE_CAA_TAG (1u << 12)
 /** @} */
 
-// FIXME: drop rdata_info, just use field_info
-typedef struct zone_rdata_info zone_rdata_info_t;
-struct zone_rdata_info {
+typedef struct zone_field_info zone_field_info_t;
+struct zone_field_info {
   zone_string_t name;
   uint32_t type;
   uint32_t qualifiers;
   zone_table_t symbols;
 };
 
-typedef struct zone_rdata_info zone_field_info_t;
-
 /**
  * @defgroup options Type options
  * @brief Options for record types
  *
  * @{
  */
-// type options
-// ZONE_IN goes here too!
+// ZONE_IN (1) can be used too
 #define ZONE_ANY (1<<2)
 #define ZONE_EXPERIMENTAL (1<<3)
 #define ZONE_OBSOLETE (1<<4)
@@ -328,7 +324,7 @@ struct zone_type_info {
   uint32_t options;
   struct {
     size_t length;
-    const zone_rdata_info_t *fields;
+    const zone_field_info_t *fields;
   } rdata;
 };
 
@@ -340,20 +336,23 @@ struct zone_type_info {
 // worst-case read (e.g. 64 consecutive line feeds). in practice a single
 // block will never contain 64 tokens, therefore, to optimize throughput,
 // allocate twice the size so consecutive index operations can be done
-#define ZONE_TAPE_SIZE (100 * (ZONE_BLOCK_SIZE + ZONE_BLOCK_SIZE))
+#define ZONE_TAPE_SIZE ((100 * ZONE_BLOCK_SIZE) + ZONE_BLOCK_SIZE)
+
+#define ZONE_RDATA_SIZE (65535)
 
-#define ZONE_RDATA_LIMIT (65535)
+#define ZONE_NAME_SIZE (255)
+#define ZONE_PADDING_SIZE (ZONE_BLOCK_SIZE)
 
-typedef struct zone_name_block zone_name_block_t;
-struct zone_name_block {
+typedef struct zone_name_buffer zone_name_buffer_t;
+struct zone_name_buffer {
   size_t length; /**< Length of domain name stored in block */
-  uint8_t octets[ 255 + ZONE_BLOCK_SIZE ];
+  uint8_t octets[ ZONE_NAME_SIZE + ZONE_PADDING_SIZE ];
 };
 
-typedef struct zone_rdata_block zone_rdata_block_t;
-struct zone_rdata_block {
+typedef struct zone_rdata_buffer zone_rdata_buffer_t;
+struct zone_rdata_buffer {
   size_t length; /**< Length of RDATA stored in block */
-  uint8_t octets[ 65535 + 4096 /* nsec padding */ ];
+  uint8_t octets[ ZONE_RDATA_SIZE + 4096 /* nsec padding */ ];
 };
 
 // @private
@@ -371,7 +370,7 @@ struct zone_rdata_block {
 typedef struct zone_file zone_file_t;
 struct zone_file {
   zone_file_t *includer;
-  zone_name_block_t origin, owner;
+  zone_name_buffer_t origin, owner;
   uint16_t last_type;
   uint32_t last_ttl, default_ttl;
   uint16_t last_class;
@@ -397,9 +396,10 @@ struct zone_file {
     uint64_t is_escaped;
     uint64_t follows_contiguous;
   } state;
-  // vector of tokens generated by the indexer. guaranteed to be large
+  // vector(s) of tokens generated by the indexer. guaranteed to be large
   // enough to hold every token for a single read + terminators
   struct { const char **head, **tail, *tape[ZONE_TAPE_SIZE + 2]; } fields;
+  struct { const char **head, **tail, *tape[ZONE_TAPE_SIZE + 1]; } delimiters;
   struct { uint16_t *head, *tail, tape[ZONE_TAPE_SIZE + 1]; } lines;
 };
 
@@ -534,29 +534,28 @@ typedef struct {
  * rdata buffer to use next. Rotation of name buffers is controlled by the
  * parser.
  */
-typedef struct zone_cache zone_cache_t;
-struct zone_cache {
+typedef struct zone_buffers zone_buffers_t;
+struct zone_buffers {
   size_t size; /**< Number of name and rdata storage blocks available */
-  zone_name_block_t *owner;
-  zone_rdata_block_t *rdata;
+  zone_name_buffer_t *owner;
+  zone_rdata_buffer_t *rdata;
 };
 
 struct zone_parser {
   zone_options_t options;
   void *user_data;
-  volatile void *environment; // FIXME: not sure about this yet
   struct {
     size_t size;
     struct {
       size_t serial;
-      zone_name_block_t *blocks;
+      zone_name_buffer_t *blocks;
     } owner;
     struct {
-      zone_rdata_block_t *blocks;
+      zone_rdata_buffer_t *blocks;
     } rdata;
-  } cache;
-  zone_name_block_t *owner;
-  zone_rdata_block_t *rdata;
+  } buffers;
+  zone_name_buffer_t *owner;
+  zone_rdata_buffer_t *rdata;
   zone_file_t *file, first;
 };
 
@@ -592,7 +591,7 @@ ZONE_EXPORT int32_t
 zone_parse(
   zone_parser_t *parser,
   const zone_options_t *options,
-  zone_cache_t *cache,
+  zone_buffers_t *buffers,
   const char *path,
   void *user_data)
 zone_nonnull((1,2,3,4));
@@ -604,7 +603,7 @@ ZONE_EXPORT int32_t
 zone_parse_string(
   zone_parser_t *parser,
   const zone_options_t *options,
-  zone_cache_t *cache,
+  zone_buffers_t *buffers,
   const char *string,
   size_t length,
   void *user_data)

diff --git a/src/bench.c b/src/bench.c
@@ -61,7 +61,7 @@ static const target_t targets[] = {
 extern int32_t zone_open(
   zone_parser_t *,
   const zone_options_t *,
-  zone_cache_t *,
+  zone_buffers_t *,
   const char *,
   void *user_data);
 
@@ -129,6 +129,7 @@ static const target_t *select_target(const char *name)
       if (targets[i].instruction_set & supported)
         target = &targets[i];
     }
+    assert(target != NULL);
   } else {
     for (size_t i=0; !target && i < n; i++) {
       if (strcasecmp(name, targets[i].name) == 0)
@@ -208,16 +209,16 @@ int main(int argc, char *argv[])
 
   zone_parser_t parser = { 0 };
   zone_options_t options = { 0 };
-  zone_name_block_t owner;
-  zone_rdata_block_t rdata;
-  zone_cache_t cache = { 1, &owner, &rdata };
+  zone_name_buffer_t owner;
+  zone_rdata_buffer_t rdata;
+  zone_buffers_t buffers = { 1, &owner, &rdata };
 
   options.accept.add = &bench_accept;
   options.origin = ".";
   options.default_ttl = 3600;
   options.default_class = ZONE_IN;
 
-  if (zone_open(&parser, &options, &cache, argv[argc-1], NULL) < 0)
+  if (zone_open(&parser, &options, &buffers, argv[argc-1], NULL) < 0)
     exit(EXIT_FAILURE);
   if (bench(&parser, target) < 0)
     exit(EXIT_FAILURE);

diff --git a/src/fallback/base16.h b/src/fallback/base16.h
@@ -77,7 +77,7 @@ static zone_really_inline int32_t parse_base16(
 
   uint8_t x0 = 0x80, x1 = 0x80;
   uint8_t *w = &parser->rdata->octets[parser->rdata->length];
-  const uint8_t *ws = w, *we = &parser->rdata->octets[ZONE_RDATA_LIMIT];
+  const uint8_t *ws = w, *we = &parser->rdata->octets[ZONE_RDATA_SIZE];
   const char *p;
 
   do {

diff --git a/src/fallback/name.h b/src/fallback/name.h
@@ -12,91 +12,49 @@
 zone_nonnull_all
 static zone_really_inline int32_t scan_name(
   zone_parser_t *parser,
-  const zone_type_info_t *type,
-  const zone_field_info_t *field,
-  const uint8_t delimiters[256],
   const token_t *token,
   uint8_t octets[255 + ZONE_BLOCK_SIZE],
-  size_t *length)
+  size_t *lengthp)
 {
-  uint8_t *l = octets, *b = octets + 1;
-  const uint8_t *bs = octets + 255;
-  const char *s = token->data;
+  uint8_t *l = octets, *w = octets + 1;
+  const uint8_t *we = octets + 255;
+  const char *t = token->data, *te = t + token->length;
 
-  l[0] = 0;
+  (void)parser;
 
-  if (s[0] == '.') {
-    if (delimiters[(uint8_t)s[1]] == token->code)
-      SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
-    *length = 1;
-    return 0;
-  }
+  l[0] = 0;
 
-  while (b < bs) {
-    const uint8_t c = (uint8_t)s[0];
-    if (c == '\\') {
-      uint8_t d[3];
-      d[0] = (uint8_t)s[1] - '0';
+  if (*t == '.')
+    return (*lengthp = token->length) == 1 ? 0 : -1;
 
-      if (d[0] > 2) {
-        b[0] = (uint8_t)s[1];
-        b += 1; s += 2;
-      } else {
-        uint8_t m = d[0] < 2 ? 9 : 5;
-        d[1] = (uint8_t)s[2] - '0';
-        d[2] = (uint8_t)s[3] - '0';
-        if (d[1] > m || d[2] > m)
-          SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
-        b[0] = d[0] * 100 + d[1] * 10 + d[0];
-        b += 1; s += 4;
-      }
-    } else if (c == '.') {
-      if ((b - 1) - l > 63 || (b - 1) - l == 0)
-        SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
-      l[0] = (uint8_t)((b - 1) - l);
-      l = b;
+  while ((t < te) & (w < we)) {
+    *w = (uint8_t)*t;
+    if (*t == '\\') {
+      uint32_t n;
+      if (!(n = unescape(t, w)))
+        return -1;
+      w += 1; t += n;
+    } else if (*t == '.') {
+      if ((w - 1) - l > 63 || (w - 1) - l == 0)
+        return -1;
+      l[0] = (uint8_t)((w - 1) - l);
+      l = w;
       l[0] = 0;
-      b += 1; s += 1;
-    } else if (delimiters[c] != token->code) {
-      if ((b - 1) - l > 63)
-        SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
-      l[0] = (uint8_t)((b - 1) - l);
-      break;
+      w += 1; t += 1;
     } else {
-      b[0] = c;
-      b += 1; s += 1;
+      w += 1; t += 1;
     }
   }
 
-  if (delimiters[(uint8_t)*s] == token->code)
-    SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
+  if ((w - 1) - l > 63)
+    return -1;
+  *l = (uint8_t)((w - 1) - l);
 
-  *length = (size_t)(b - octets);
-  return l[0] == 0 ? 0 : ZONE_NAME;
-}
+  if (t != te || w >= we)
+    return -1;
 
-zone_nonnull_all
-static zone_really_inline int32_t scan_contiguous_name(
-  zone_parser_t *parser,
-  const zone_type_info_t *type,
-  const zone_field_info_t *field,
-  const token_t *token,
-  uint8_t octets[255 + ZONE_BLOCK_SIZE],
-  size_t *length)
-{
-  return scan_name(parser, type, field, contiguous, token, octets, length);
-}
-
-zone_nonnull_all
-static zone_really_inline int32_t scan_quoted_name(
-  zone_parser_t *parser,
-  const zone_type_info_t *type,
-  const zone_field_info_t *field,
-  const token_t *token,
-  uint8_t octets[255 + ZONE_BLOCK_SIZE],
-  size_t *length)
-{
-  return scan_name(parser, type, field, quoted, token, octets, length);
+  *lengthp = (size_t)(w - octets);
+  return *l != 0;
 }
 
 zone_nonnull_all
@@ -112,32 +70,34 @@ static zone_really_inline int32_t parse_name(
 
   if (zone_likely(token->code == CONTIGUOUS)) {
     // a freestanding "@" denotes the current origin
-    if (token->data[0] == '@' && !is_contiguous((uint8_t)token->data[1]))
+    if (token->data[0] == '@' && token->length > 1)
       goto relative;
-    r = scan_contiguous_name(parser, type, field, token, o, &n);
+    r = scan_name(parser, token, o, &n);
     if (r == 0)
-      goto absolute;
-    if (r < 0)
-      return r;
+      return (void)(parser->rdata->length += n), ZONE_NAME;
+    if (r > 0)
+      goto relative;
   } else if (token->code == QUOTED) {
-    r = scan_quoted_name(parser, type, field, token, o, &n);
+    if (token->length == 0)
+      goto invalid;
+    r = scan_name(parser, token, o, &n);
     if (r == 0)
-      goto absolute;
-    if (r < 0)
-      return r;
+      return (void)(parser->rdata->length += n), ZONE_NAME;
+    if (r > 0)
+      goto relative;
   } else {
     return have_string(parser, type, field, token);
   }
 
+invalid:
+  SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
+
 relative:
   if (n > 255 - parser->file->origin.length)
     SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), TNAME(type));
   memcpy(o+n, parser->file->origin.octets, parser->file->origin.length);
   parser->rdata->length += n + parser->file->origin.length;
   return ZONE_NAME;
-absolute:
-  parser->rdata->length += n;
-  return ZONE_NAME;
 }
 
 #endif // NAME_H