Skip to content

Drop secondary index #64

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ jobs:
generator: "Visual Studio 17 2022"
build_type: Debug
build_tool_options: "-nologo -verbosity:minimal -maxcpucount:4 -p:CL_MPCount=4"
warnings_as_errors: off
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
Expand Down
14 changes: 6 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -128,22 +128,20 @@ generate_export_header(
zone BASE_NAME ZONE EXPORT_FILE_NAME include/zone/export.h)

target_include_directories(
zone PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>
PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>)
zone PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>)

target_sources(zone PRIVATE
src/zone.c
src/types.c
src/table.c
src/log.c
src/parser.c
src/lexer.c
src/fallback/parser.c
src/generic/base16.c
src/generic/base32.c
src/generic/base64.c
src/fallback/parser.c)
src/generic/base64.c)

add_executable(zone-bench src/bench.c src/fallback/bench.c)
target_include_directories(
Expand Down
5 changes: 2 additions & 3 deletions FORMAT.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,8 @@ characters that overlap with any structural characters and in practice, it
really never happens. The same applies to base64 sequences, which was
specifically designed to encode binary data in printable ASCII characters. To
quote a field and include whitespace is more-or-less instructing the parser
to not ignore it. The current implementation is such that any field may be
quoted, but it MUST actually disallow fields that cannot contain structural
characters to be quoted.
to not ignore it. Fields that cannot contain structural characters, i.e.
anything other than domain names and text strings, MUST not be quoted.

> BIND does not accept quoted fields for A or NS RDATA. TTL values in SOA
> RDATA, base64 Signature in DNSKEY RDATA, as well as type, class and TTL
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ $ time ./zone-bench parse ../../zones/com.zone
Selected target haswell
Parsed 341535548 records

real 0m20.629s
user 0m19.328s
sys 0m1.244s
real 0m19.834s
user 0m18.612s
sys 0m1.176s
```

There are bound to be bugs and quite possibly smarter ways of implementing
Expand Down
59 changes: 23 additions & 36 deletions include/zone.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,19 +202,6 @@ struct zone_table {
const zone_symbol_t *symbols; // sorted for use with bsearch
};

ZONE_EXPORT int
zone_compare(const void *s1, const void *s2)
zone_nonnull_all();

zone_always_inline()
zone_nonnull_all()
inline zone_symbol_t *zone_lookup(
const zone_table_t *table, const zone_string_t *string)
{
const zone_symbol_t key = { *string, 0 };
return bsearch(&key, table->symbols, table->length, sizeof(key), zone_compare);
}

// @private
//
// bsearch is quite slow compared to a hash table, but a hash table is either
Expand Down Expand Up @@ -371,21 +358,6 @@ struct zone_type_info {
#define ZONE_BLOCK_SIZE (64)
#define ZONE_WINDOW_SIZE (256 * ZONE_BLOCK_SIZE) // 16KB

// @private
// non-delimiting tokens may contain (escaped) newlines. tracking newlines
// within tokens by taping them makes the lex operation more complex, resulting
// in a significantly larger binary and slower operation, and may introduce an
// infinite loop if the tape may not be sufficiently large enough. tokens
// containing newlines is very much an edge case, therefore the scanner
// implements an unlikely slow path that tracks the number of escaped newlines
// during tokenization and registers them with each consecutive newline token.
// this mode of operation nicely isolates location tracking in the scanner and
// accommodates parallel processing should that ever be desired
typedef struct zone_index zone_index_t;
struct zone_index {
const char *data;
uint32_t newlines; // number of escaped newlines (stored per newline)
};

// tape capacity must be large enough to hold every token from a single
// worst-case read (e.g. 64 consecutive line feeds). in practice a single
Expand All @@ -405,6 +377,17 @@ struct zone_rdata_block {
uint8_t octets[ 65535 + 4096 /* nsec padding */ ];
};

// @private
// non-delimiting tokens may contain (escaped) newlines. tracking newlines
// within tokens by taping them makes the lex operation more complex, resulting
// in a significantly larger binary and slower operation, and may introduce an
// infinite loop if the tape may not be sufficiently large enough. tokens
// containing newlines is very much an edge case, therefore the scanner
// implements an unlikely slow path that tracks the number of escaped newlines
// during tokenization and registers them with each consecutive newline token.
// this mode of operation nicely isolates location tracking in the scanner and
// accommodates parallel processing should that ever be desired

// @private
typedef struct zone_file zone_file_t;
struct zone_file {
Expand All @@ -413,9 +396,13 @@ struct zone_file {
uint16_t last_type;
uint32_t last_ttl, default_ttl;
uint16_t last_class;
size_t line;
const char *name;
const char *path;
// non-terminating line feeds, i.e. escaped line feeds, line feeds in quoted
// sections or within parentheses, are counted, but deferred for consistency
// in error reports
size_t span; /**< number of lines spanned by record */
size_t line; /**< starting line of record */
char *name;
char *path;
FILE *handle;
bool grouped;
bool start_of_line;
Expand All @@ -426,15 +413,15 @@ struct zone_file {
} buffer;
// indexer state is kept per-file
struct {
uint32_t newlines; // number of escaped newlines
uint64_t in_comment;
uint64_t in_quoted;
uint64_t is_escaped;
uint64_t follows_contiguous;
// vector of tokens generated by the indexer. guaranteed to be large
// enough to hold every token for a single read + terminators
zone_index_t *head, *tail, tape[ZONE_TAPE_SIZE + 2];
} indexer;
} state;
// vector of tokens generated by the indexer. guaranteed to be large
// enough to hold every token for a single read + terminators
struct { const char **head, **tail, *tape[ZONE_TAPE_SIZE + 2]; } fields;
struct { uint16_t *head, *tail, tape[ZONE_TAPE_SIZE + 1]; } lines;
};

typedef struct zone_parser zone_parser_t;
Expand Down
35 changes: 13 additions & 22 deletions include/zone/attributes.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,38 +40,29 @@
#endif

#define zone_nonnull(params) zone_attribute((__nonnull__ params))
#define zone_nonnull_all() zone_attribute((__nonnull__))
#define zone_nonnull_all zone_attribute((__nonnull__))

#if _MSC_VER
# define zone_always_inline() __forceinline
# define zone_never_inline() __declspec(noinline)
# define zone_noreturn() __declspec(noreturn)
# define zone_allocator(...)
# define zone_really_inline __forceinline
# define zone_never_inline __declspec(noinline)
# define zone_warn_unused_result

# define zone_unlikely(x)
# define zone_likely(params) (params)
# define zone_unlikely(params) (params)

# define zone_format(params)
# define zone_format_printf(string_index, first_to_check)
#else // _MSC_VER
# define zone_always_inline() zone_attribute((always_inline))
# define zone_never_inline() zone_attribute((noinline))
# if zone_has_attribute(noreturn)
# define zone_noreturn() zone_attribute((noreturn))
# define zone_really_inline inline zone_attribute((always_inline))
# define zone_never_inline zone_attribute((noinline))
# if zone_has_attribute(warn_unused_result)
# define zone_warn_unused_result zone_attribute((warn_unused_result))
# else
# define zone_noreturn()
# define zone_warn_unused_result
# endif

# if zone_has_attribute(malloc)
# if zone_gcc
# define zone_allocator(...) zone_attribute((malloc(__VA_ARGS__)))
# else
# define zone_allocator(...) zone_attribute((malloc))
# endif
# else
# define zone_allocator(...)
# endif

# define zone_unlikely(params) __builtin_expect((params), 0)
# define zone_likely(params) __builtin_expect(!!(params), 1)
# define zone_unlikely(params) __builtin_expect(!!(params), 0)

# if zone_has_attribute(format)
# define zone_format(params) zone_attribute((__format__ params))
Expand Down
71 changes: 28 additions & 43 deletions src/bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
*
*/
#include <assert.h>
#include <setjmp.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Expand All @@ -21,31 +20,32 @@
#include "zone.h"
#include "config.h"
#include "isadetection.h"
#include "diagnostic.h"

#if _WIN32
#define strcasecmp(s1, s2) _stricmp(s1, s2)
#define strncasecmp(s1, s2, n) _strnicmp(s1, s2, n)
#endif

#if HAVE_HASWELL
extern zone_return_t zone_bench_haswell_lex(zone_parser_t *, size_t *);
extern zone_return_t zone_haswell_parse(zone_parser_t *, void *);
extern int32_t zone_bench_haswell_lex(zone_parser_t *, size_t *);
extern int32_t zone_haswell_parse(zone_parser_t *);
#endif

#if HAVE_WESTMERE
extern zone_return_t zone_bench_westmere_lex(zone_parser_t *, size_t *);
extern zone_return_t zone_westmere_parse(zone_parser_t *, void *);
extern int32_t zone_bench_westmere_lex(zone_parser_t *, size_t *);
extern int32_t zone_westmere_parse(zone_parser_t *);
#endif

extern zone_return_t zone_bench_fallback_lex(zone_parser_t *, size_t *);
extern zone_return_t zone_fallback_parse(zone_parser_t *, void *);
extern int32_t zone_bench_fallback_lex(zone_parser_t *, size_t *);
extern int32_t zone_fallback_parse(zone_parser_t *);

typedef struct target target_t;
struct target {
const char *name;
uint32_t instruction_set;
zone_return_t (*bench_lex)(zone_parser_t *, size_t *);
zone_return_t (*parse)(zone_parser_t *, void *);
int32_t (*bench_lex)(zone_parser_t *, size_t *);
int32_t (*parse)(zone_parser_t *);
};

static const target_t targets[] = {
Expand All @@ -58,7 +58,7 @@ static const target_t targets[] = {
{ "fallback", 0, &zone_bench_fallback_lex, &zone_fallback_parse }
};

extern zone_return_t zone_open(
extern int32_t zone_open(
zone_parser_t *,
const zone_options_t *,
zone_cache_t *,
Expand All @@ -68,29 +68,19 @@ extern zone_return_t zone_open(
extern void zone_close(
zone_parser_t *);

static zone_return_t bench_lex(zone_parser_t *parser, const target_t *target)
static int32_t bench_lex(zone_parser_t *parser, const target_t *target)
{
size_t tokens = 0;
zone_return_t result;
volatile jmp_buf environment;

switch ((result = setjmp((void *)environment))) {
case 0:
parser->environment = environment;
result = target->bench_lex(parser, &tokens);
assert(result == ZONE_SUCCESS);
break;
default:
assert(result < 0);
assert(parser->environment == environment);
break;
}
int32_t result;

if ((result = target->bench_lex(parser, &tokens)) < 0)
return result;

printf("Lexed %zu tokens\n", tokens);
return result;
return 0;
}

static zone_return_t bench_accept(
static int32_t bench_accept(
zone_parser_t *parser,
const zone_name_t *owner,
uint16_t type,
Expand All @@ -111,28 +101,21 @@ static zone_return_t bench_accept(
return ZONE_SUCCESS;
}

static zone_return_t bench_parse(zone_parser_t *parser, const target_t *target)
static int32_t bench_parse(zone_parser_t *parser, const target_t *target)
{
size_t records = 0;
zone_return_t result;
volatile jmp_buf environment;

switch ((result = setjmp((void *)environment))) {
case 0:
parser->environment = environment;
result = target->parse(parser, &records);
assert(result == ZONE_SUCCESS);
break;
default:
assert(result < 0);
assert(parser->environment == environment);
break;
}
int32_t result;

parser->user_data = &records;
result = target->parse(parser);

printf("Parsed %zu records\n", records);
return result;
}

diagnostic_push()
msvc_diagnostic_ignored(4996)

static const target_t *select_target(const char *name)
{
const size_t n = sizeof(targets)/sizeof(targets[0]);
Expand Down Expand Up @@ -160,6 +143,8 @@ static const target_t *select_target(const char *name)
return target;
}

diagnostic_pop()

static void help(const char *program)
{
const char *format =
Expand Down Expand Up @@ -207,7 +192,7 @@ int main(int argc, char *argv[])
if (optind > argc || argc - optind < 2)
usage(program);

zone_return_t (*bench)(zone_parser_t *, const target_t *) = 0;
int32_t (*bench)(zone_parser_t *, const target_t *) = 0;
if (strcasecmp(argv[optind], "lex") == 0)
bench = &bench_lex;
else if (strcasecmp(argv[optind], "parse") == 0)
Expand Down
4 changes: 2 additions & 2 deletions src/diagnostic.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
#if _MSC_VER
# define diagnostic_push() \
__pragma(warning(push))
# define msvc_diagnostic_ignored(warning) \
__pragma(warning(disable: ## warning))
# define msvc_diagnostic_ignored(warning_specifier) \
__pragma(warning(disable:warning_specifier))
# define diagnostic_pop() \
__pragma(warning(pop))
#elif __GNUC__
Expand Down
12 changes: 7 additions & 5 deletions src/fallback/bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,18 @@
diagnostic_push()
clang_diagnostic_ignored(missing-prototypes)

zone_return_t zone_bench_fallback_lex(zone_parser_t *parser, size_t *tokens)
int32_t zone_bench_fallback_lex(zone_parser_t *parser, size_t *tokens)
{
zone_token_t token;
zone_return_t result;
token_t token;

(*tokens) = 0;
while ((result = lex(parser, &token)) >= 0 && token.data != zone_end_of_file)
lex(parser, &token);
while (token.code > 0) {
(*tokens)++;
lex(parser, &token);
}

return result;
return token.code ? -1 : 0;
}

diagnostic_pop()
Loading