diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..af70a91 --- /dev/null +++ b/.flake8 @@ -0,0 +1,7 @@ +[flake8] +exclude= + tests/**, + conftest.py, + setup.py +max-line-length=120 +ignore=E731,W503,E203,BLK100,B301 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..7e4b9d3 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,66 @@ +import datetime +import json +import os +import collections + +import pytest + + +class ValidationError(Exception): + pass + + +@pytest.fixture +def spec_validator(): + with open( + os.path.join(os.path.dirname(__file__), "resources", "spec.json"), "r" + ) as fh: + spec = json.load(fh) + + def validator(data_json): + """ + Throws a ValidationError if anything doesn't match the spec. + + Returns the original json (pass-through) + """ + fields = spec["fields"] + data = json.loads(data_json, object_pairs_hook=collections.OrderedDict) + for k, v in fields.items(): + if v.get("required"): + found = False + if k in data: + found = True + elif "." in k: + # Dotted keys could be nested, like ecs.version + subkeys = k.split(".") + subval = data + for subkey in subkeys: + subval = subval.get(subkey, {}) + if subval: + found = True + if not found: + raise ValidationError("Missing required key {}".format(k)) + if k in data: + if v["type"] == "string" and not ( + isinstance(data[k], str) or isinstance(data[k], basestring) + ): + raise ValidationError( + "Value {0} for key {1} should be string, is {2}".format( + data[k], k, type(data[k]) + ) + ) + if v["type"] == "datetime": + try: + datetime.datetime.strptime(data[k], "%Y-%m-%dT%H:%M:%S.%fZ") + except ValueError: + raise ValidationError( + "Value {0} for key {1} doesn't parse as an ISO datetime".format( + data[k], k + ) + ) + if v.get("index") and list(data.keys())[v.get("index")] != k: + raise ValidationError("Key {0} is not at index {1}".format(k, index)) + + return data_json + + return validator diff --git a/tests/resources/spec.json b/tests/resources/spec.json new file mode 100644 index 0000000..14cca2e --- /dev/null +++ b/tests/resources/spec.json @@ -0,0 +1,139 @@ +{ + "version": 1.0, + "url": "https://www.elastic.co/guide/en/ecs/current/index.html", + "ecs": { + "version": "1.x" + }, + "fields": { + "@timestamp": { + "type": "datetime", + "required": true, + "index": 0, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-base.html" + }, + "log.level": { + "type": "string", + "required": true, + "index": 1, + "top_level_field": true, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-log.html", + "comment": [ + "This field SHOULD NOT be a nested object field but at the top level with a dot in the property name.", + "This is to make the JSON logs more human-readable.", + "Loggers MAY indent the log level so that the `message` field always starts at the exact same offset,", + "no matter the number of characters the log level has.", + "For example: `'DEBUG'` (5 chars) will not be indented, whereas ` 'WARN'` (4 chars) will be indented by one space character." + ] + }, + "message": { + "type": "string", + "required": true, + "index": 2, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-base.html" + }, + "ecs.version": { + "type": "string", + "required": true, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-ecs.html" + }, + "labels": { + "type": "object", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-base.html", + "sanitization": { + "key": { + "replacements": [ + ".", + "*", + "\\" + ], + "substitute": "_" + } + } + }, + "trace.id": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-tracing.html", + "comment": "When APM agents add this field to the context, ecs loggers should pick it up and add it to the log event." + }, + "transaction.id": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-tracing.html", + "comment": "When APM agents add this field to the context, ecs loggers should pick it up and add it to the log event." + }, + "service.name": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-service.html", + "comment": [ + "Configurable by users.", + "When an APM agent is active, they should auto-configure it if not already set." + ] + }, + "event.dataset": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-event.html", + "default": "${service.name}.log OR ${service.name}.${appender.name}", + "comment": [ + "Configurable by users.", + "If the user manually configures the service name,", + "the logging library should set `event.dataset=${service.name}.log` if not explicitly configured otherwise.", + "", + "When agents auto-configure the app to use an ECS logger,", + "they should set `event.dataset=${service.name}.${appender.name}` if the appender name is available in the logging library.", + "Otherwise, agents should also set `event.dataset=${service.name}.log`", + "", + "The field helps to filter for different log streams from the same pod, for example and is required for log anomaly detection." + ] + }, + "process.thread.name": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-process.html" + }, + "log.logger": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-log.html" + }, + "log.origin.file.line": { + "type": "integer", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-log.html", + "comment": "Should be opt-in as it requires the logging library to capture a stack trace for each log event." + }, + "log.origin.file.name": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-log.html", + "comment": "Should be opt-in as it requires the logging library to capture a stack trace for each log event." + }, + "log.origin.function": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-log.html", + "comment": "Should be opt-in as it requires the logging library to capture a stack trace for each log event." + }, + "error.type": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-error.html", + "comment": "The exception type or class, such as `java.lang.IllegalArgumentException`." + }, + "error.message": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-error.html", + "comment": "The message of the exception." + }, + "error.stack_trace": { + "type": "string", + "required": false, + "url": "https://www.elastic.co/guide/en/ecs/current/ecs-error.html", + "comment": "The stack trace of the exception as plain text." + } + } +} \ No newline at end of file diff --git a/tests/test_apm.py b/tests/test_apm.py index 2497d55..81d3e21 100644 --- a/tests/test_apm.py +++ b/tests/test_apm.py @@ -10,7 +10,7 @@ from .compat import StringIO -def test_elasticapm_structlog_log_correlation_ecs_fields(): +def test_elasticapm_structlog_log_correlation_ecs_fields(spec_validator): apm = elasticapm.Client({"SERVICE_NAME": "apm-service", "DISABLE_SEND": True}) stream = StringIO() logger = structlog.PrintLogger(stream) @@ -30,7 +30,7 @@ def test_elasticapm_structlog_log_correlation_ecs_fields(): finally: apm.end_transaction("test-transaction") - ecs = json.loads(stream.getvalue().rstrip()) + ecs = json.loads(spec_validator(stream.getvalue().rstrip())) ecs.pop("@timestamp") assert ecs == { "ecs": {"version": "1.6.0"}, diff --git a/tests/test_stdlib_formatter.py b/tests/test_stdlib_formatter.py index c125479..c2ed87a 100644 --- a/tests/test_stdlib_formatter.py +++ b/tests/test_stdlib_formatter.py @@ -35,17 +35,17 @@ def make_record(): return record -def test_record_formatted(): +def test_record_formatted(spec_validator): formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"]) - assert formatter.format(make_record()) == ( + assert spec_validator(formatter.format(make_record())) == ( '{"@timestamp":"2020-03-20T14:12:46.123Z","log.level":"debug","message":"1: hello","ecs":{"version":"1.6.0"},' '"log":{"logger":"logger-name","origin":{"file":{"line":10,"name":"file.py"},"function":"test_function"},' '"original":"1: hello"}}' ) -def test_can_be_overridden(): +def test_can_be_overridden(spec_validator): class CustomFormatter(ecs_logging.StdlibFormatter): def format_to_ecs(self, record): ecs_dict = super(CustomFormatter, self).format_to_ecs(record) @@ -53,7 +53,7 @@ def format_to_ecs(self, record): return ecs_dict formatter = CustomFormatter(exclude_fields=["process"]) - assert formatter.format(make_record()) == ( + assert spec_validator(formatter.format(make_record())) == ( '{"@timestamp":"2020-03-20T14:12:46.123Z","log.level":"debug","message":"1: hello",' '"custom":"field","ecs":{"version":"1.6.0"},"log":{"logger":"logger-name","origin":' '{"file":{"line":10,"name":"file.py"},"function":"test_function"},"original":"1: hello"}}' diff --git a/tests/test_structlog_formatter.py b/tests/test_structlog_formatter.py index 4a48da5..9771d54 100644 --- a/tests/test_structlog_formatter.py +++ b/tests/test_structlog_formatter.py @@ -9,11 +9,11 @@ def make_event_dict(): @mock.patch("time.time") -def test_event_dict_formatted(time): +def test_event_dict_formatted(time, spec_validator): time.return_value = 1584720997.187709 formatter = ecs_logging.StructlogFormatter() - assert formatter(None, "debug", make_event_dict()) == ( + assert spec_validator(formatter(None, "debug", make_event_dict())) == ( '{"@timestamp":"2020-03-20T16:16:37.187Z","log.level":"debug",' '"message":"test message","ecs":{"version":"1.6.0"},' '"log":{"logger":"logger-name"}}' @@ -21,7 +21,7 @@ def test_event_dict_formatted(time): @mock.patch("time.time") -def test_can_be_set_as_processor(time): +def test_can_be_set_as_processor(time, spec_validator): time.return_value = 1584720997.187709 stream = StringIO() @@ -35,7 +35,7 @@ def test_can_be_set_as_processor(time): logger = structlog.get_logger("logger-name") logger.debug("test message", custom="key", **{"dot.ted": 1}) - assert stream.getvalue() == ( + assert spec_validator(stream.getvalue()) == ( '{"@timestamp":"2020-03-20T16:16:37.187Z","log.level":"debug",' '"message":"test message","custom":"key","dot":{"ted":1},' '"ecs":{"version":"1.6.0"}}\n'