From 398a1c3859ee0f9cddefe99527b31945233a63d3 Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Thu, 14 Jan 2021 17:21:18 -0700 Subject: [PATCH 1/5] Add spec --- tests/resources/spec.json | 139 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 tests/resources/spec.json diff --git a/tests/resources/spec.json b/tests/resources/spec.json new file mode 100644 index 0000000..14cca2e --- /dev/null +++ b/tests/resources/spec.json @@ -0,0 +1,139 @@ +{ + "version": 1.0, + "url": "https://www.elastic.co/guide/en/ecs/current/index.html", + "ecs": { + "version": "1.x" + }, + "fields": { + "@timestamp": { + "type": "datetime", + "required": true, + "index": 0, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-base.html" + }, + "log.level": { + "type": "string", + "required": true, + "index": 1, + "top_level_field": true, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-log.html", + "comment": [ + "This field SHOULD NOT be a nested object field but at the top level with a dot in the property name.", + "This is to make the JSON logs more human-readable.", + "Loggers MAY indent the log level so that the `message` field always starts at the exact same offset,", + "no matter the number of characters the log level has.", + "For example: `'DEBUG'` (5 chars) will not be indented, whereas ` 'WARN'` (4 chars) will be indented by one space character." + ] + }, + "message": { + "type": "string", + "required": true, + "index": 2, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-base.html" + }, + "ecs.version": { + "type": "string", + "required": true, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-ecs.html" + }, + "labels": { + "type": "object", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-base.html", + "sanitization": { + "key": { + "replacements": [ + ".", + "*", + "\\" + ], + "substitute": "_" + } + } + }, + "trace.id": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-tracing.html", + "comment": "When APM agents add this field to the context, ecs loggers should pick it up and add it to the log event." + }, + "transaction.id": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-tracing.html", + "comment": "When APM agents add this field to the context, ecs loggers should pick it up and add it to the log event." + }, + "service.name": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-service.html", + "comment": [ + "Configurable by users.", + "When an APM agent is active, they should auto-configure it if not already set." + ] + }, + "event.dataset": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-event.html", + "default": "${service.name}.log OR ${service.name}.${appender.name}", + "comment": [ + "Configurable by users.", + "If the user manually configures the service name,", + "the logging library should set `event.dataset=${service.name}.log` if not explicitly configured otherwise.", + "", + "When agents auto-configure the app to use an ECS logger,", + "they should set `event.dataset=${service.name}.${appender.name}` if the appender name is available in the logging library.", + "Otherwise, agents should also set `event.dataset=${service.name}.log`", + "", + "The field helps to filter for different log streams from the same pod, for example and is required for log anomaly detection." + ] + }, + "process.thread.name": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-process.html" + }, + "log.logger": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-log.html" + }, + "log.origin.file.line": { + "type": "integer", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-log.html", + "comment": "Should be opt-in as it requires the logging library to capture a stack trace for each log event." + }, + "log.origin.file.name": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-log.html", + "comment": "Should be opt-in as it requires the logging library to capture a stack trace for each log event." + }, + "log.origin.function": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-log.html", + "comment": "Should be opt-in as it requires the logging library to capture a stack trace for each log event." + }, + "error.type": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-error.html", + "comment": "The exception type or class, such as `java.lang.IllegalArgumentException`." + }, + "error.message": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-error.html", + "comment": "The message of the exception." + }, + "error.stack_trace": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-error.html", + "comment": "The stack trace of the exception as plain text." + } + } +} \ No newline at end of file From 3a05f00594312260400151c39c940019856d117e Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Fri, 15 Jan 2021 16:37:21 -0700 Subject: [PATCH 2/5] Add spec validation --- .flake8 | 7 ++++ tests/conftest.py | 68 +++++++++++++++++++++++++++++++ tests/test_apm.py | 4 +- tests/test_stdlib_formatter.py | 8 ++-- tests/test_structlog_formatter.py | 8 ++-- 5 files changed, 85 insertions(+), 10 deletions(-) create mode 100644 .flake8 create mode 100644 tests/conftest.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..af70a91 --- /dev/null +++ b/.flake8 @@ -0,0 +1,7 @@ +[flake8] +exclude= + tests/**, + conftest.py, + setup.py +max-line-length=120 +ignore=E731,W503,E203,BLK100,B301 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..0befe68 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,68 @@ +import datetime +import json +import os + +import pytest + + +class ValidationError(Exception): + pass + + +@pytest.fixture +def spec_validator(): + with open( + os.path.join(os.path.dirname(__file__), "resources", "spec.json"), "r" + ) as fh: + spec = json.load(fh) + + def validator(data_json): + """ + Throws a ValidationError if anything doesn't match the spec. + + Returns the original json (pass-through) + """ + fields = spec["fields"] + data = json.loads(data_json) + for k, v in fields.items(): + if v.get("required"): + found = False + if k in data: + found = True + elif "." in k: + # Dotted keys could be nested, like ecs.version + subkeys = k.split(".") + subval = data + for subkey in subkeys: + subval = subval.get(subkey, {}) + if subval: + found = True + if not found: + raise ValidationError("Missing required key {}".format(k)) + if k in data: + if v["type"] == "string" and not isinstance(data[k], str): + raise ValidationError( + "Value {0} for key {1} should be string".format(data[k], k) + ) + if v["type"] == "datetime": + try: + datetime.datetime.fromisoformat(data[k].rstrip("Z")) + except ValueError: + raise ValidationError( + "Value {0} for key {1} doesn't parse as an ISO datetime".format( + data[k], k + ) + ) + if v.get("index"): + index = v.get("index") + key = json.loads( + data_json.split(",")[index].split(":")[0].strip().lstrip("{") + ) + if key != k: + raise ValidationError( + "Key {0} is not at index {1}".format(k, index) + ) + + return data_json + + return validator diff --git a/tests/test_apm.py b/tests/test_apm.py index 2497d55..81d3e21 100644 --- a/tests/test_apm.py +++ b/tests/test_apm.py @@ -10,7 +10,7 @@ from .compat import StringIO -def test_elasticapm_structlog_log_correlation_ecs_fields(): +def test_elasticapm_structlog_log_correlation_ecs_fields(spec_validator): apm = elasticapm.Client({"SERVICE_NAME": "apm-service", "DISABLE_SEND": True}) stream = StringIO() logger = structlog.PrintLogger(stream) @@ -30,7 +30,7 @@ def test_elasticapm_structlog_log_correlation_ecs_fields(): finally: apm.end_transaction("test-transaction") - ecs = json.loads(stream.getvalue().rstrip()) + ecs = json.loads(spec_validator(stream.getvalue().rstrip())) ecs.pop("@timestamp") assert ecs == { "ecs": {"version": "1.6.0"}, diff --git a/tests/test_stdlib_formatter.py b/tests/test_stdlib_formatter.py index c125479..c2ed87a 100644 --- a/tests/test_stdlib_formatter.py +++ b/tests/test_stdlib_formatter.py @@ -35,17 +35,17 @@ def make_record(): return record -def test_record_formatted(): +def test_record_formatted(spec_validator): formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"]) - assert formatter.format(make_record()) == ( + assert spec_validator(formatter.format(make_record())) == ( '{"@timestamp":"2020-03-20T14:12:46.123Z","log.level":"debug","message":"1: hello","ecs":{"version":"1.6.0"},' '"log":{"logger":"logger-name","origin":{"file":{"line":10,"name":"file.py"},"function":"test_function"},' '"original":"1: hello"}}' ) -def test_can_be_overridden(): +def test_can_be_overridden(spec_validator): class CustomFormatter(ecs_logging.StdlibFormatter): def format_to_ecs(self, record): ecs_dict = super(CustomFormatter, self).format_to_ecs(record) @@ -53,7 +53,7 @@ def format_to_ecs(self, record): return ecs_dict formatter = CustomFormatter(exclude_fields=["process"]) - assert formatter.format(make_record()) == ( + assert spec_validator(formatter.format(make_record())) == ( '{"@timestamp":"2020-03-20T14:12:46.123Z","log.level":"debug","message":"1: hello",' '"custom":"field","ecs":{"version":"1.6.0"},"log":{"logger":"logger-name","origin":' '{"file":{"line":10,"name":"file.py"},"function":"test_function"},"original":"1: hello"}}' diff --git a/tests/test_structlog_formatter.py b/tests/test_structlog_formatter.py index 4a48da5..9771d54 100644 --- a/tests/test_structlog_formatter.py +++ b/tests/test_structlog_formatter.py @@ -9,11 +9,11 @@ def make_event_dict(): @mock.patch("time.time") -def test_event_dict_formatted(time): +def test_event_dict_formatted(time, spec_validator): time.return_value = 1584720997.187709 formatter = ecs_logging.StructlogFormatter() - assert formatter(None, "debug", make_event_dict()) == ( + assert spec_validator(formatter(None, "debug", make_event_dict())) == ( '{"@timestamp":"2020-03-20T16:16:37.187Z","log.level":"debug",' '"message":"test message","ecs":{"version":"1.6.0"},' '"log":{"logger":"logger-name"}}' @@ -21,7 +21,7 @@ def test_event_dict_formatted(time): @mock.patch("time.time") -def test_can_be_set_as_processor(time): +def test_can_be_set_as_processor(time, spec_validator): time.return_value = 1584720997.187709 stream = StringIO() @@ -35,7 +35,7 @@ def test_can_be_set_as_processor(time): logger = structlog.get_logger("logger-name") logger.debug("test message", custom="key", **{"dot.ted": 1}) - assert stream.getvalue() == ( + assert spec_validator(stream.getvalue()) == ( '{"@timestamp":"2020-03-20T16:16:37.187Z","log.level":"debug",' '"message":"test message","custom":"key","dot":{"ted":1},' '"ecs":{"version":"1.6.0"}}\n' From 5ebf563c26bf8500b0bdd6dd13d0ee63e9ff0eb2 Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Fri, 15 Jan 2021 16:50:46 -0700 Subject: [PATCH 3/5] Py2 compatible string types --- tests/conftest.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 0befe68..3a51a81 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -40,9 +40,13 @@ def validator(data_json): if not found: raise ValidationError("Missing required key {}".format(k)) if k in data: - if v["type"] == "string" and not isinstance(data[k], str): + if v["type"] == "string" and not ( + isinstance(data[k], str) or isinstance(data[k], basestring) + ): raise ValidationError( - "Value {0} for key {1} should be string".format(data[k], k) + "Value {0} for key {1} should be string, is {2}".format( + data[k], k, type(data[k]) + ) ) if v["type"] == "datetime": try: From ff23df2ed980b06e4ab6c6eb3efc9a4ab273ad5b Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Tue, 19 Jan 2021 09:17:34 -0700 Subject: [PATCH 4/5] Use strptime for backwards compatibility --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 3a51a81..3db396c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -50,7 +50,7 @@ def validator(data_json): ) if v["type"] == "datetime": try: - datetime.datetime.fromisoformat(data[k].rstrip("Z")) + datetime.datetime.strptime(data[k], "%Y-%m-%dT%H:%M:%S.%fZ") except ValueError: raise ValidationError( "Value {0} for key {1} doesn't parse as an ISO datetime".format( From e3986bbc9512b3af3c339323614c57f00185cfff Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Tue, 19 Jan 2021 09:21:13 -0700 Subject: [PATCH 5/5] Use ordereddict for easier index-validation --- tests/conftest.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 3db396c..7e4b9d3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,7 @@ import datetime import json import os +import collections import pytest @@ -23,7 +24,7 @@ def validator(data_json): Returns the original json (pass-through) """ fields = spec["fields"] - data = json.loads(data_json) + data = json.loads(data_json, object_pairs_hook=collections.OrderedDict) for k, v in fields.items(): if v.get("required"): found = False @@ -57,15 +58,8 @@ def validator(data_json): data[k], k ) ) - if v.get("index"): - index = v.get("index") - key = json.loads( - data_json.split(",")[index].split(":")[0].strip().lstrip("{") - ) - if key != k: - raise ValidationError( - "Key {0} is not at index {1}".format(k, index) - ) + if v.get("index") and list(data.keys())[v.get("index")] != k: + raise ValidationError("Key {0} is not at index {1}".format(k, index)) return data_json