From ad693acdde0dac452a9346013ce5c0b07e3c4ac5 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Thu, 7 Jan 2021 11:14:41 -0600 Subject: [PATCH] Ensure @timestamp, log.level, and message fields are ordered per spec --- ecs_logging/_utils.py | 46 ++++++++++++++++++++++++++++++- tests/test_apm.py | 8 +++--- tests/test_stdlib_formatter.py | 24 ++++++++-------- tests/test_structlog_formatter.py | 11 ++++---- tests/test_utils.py | 45 +++++++++++++++++++++++++++++- 5 files changed, 111 insertions(+), 23 deletions(-) diff --git a/ecs_logging/_utils.py b/ecs_logging/_utils.py index 8d2e800..d58e53d 100644 --- a/ecs_logging/_utils.py +++ b/ecs_logging/_utils.py @@ -103,4 +103,48 @@ def merge_dicts(from_, into): def json_dumps(value): # type: (Dict[str, Any]) -> str - return json.dumps(value, sort_keys=True, separators=(",", ":")) + + # Ensure that the first three fields are '@timestamp', + # 'log.level', and 'message' per ECS spec + ordered_fields = [] + try: + ordered_fields.append(("@timestamp", value.pop("@timestamp"))) + except KeyError: + pass + + # log.level can either be nested or not nested so we have to try both + try: + ordered_fields.append(("log.level", value["log"].pop("level"))) + if not value["log"]: # Remove the 'log' dictionary if it's now empty + value.pop("log", None) + except KeyError: + try: + ordered_fields.append(("log.level", value.pop("log.level"))) + except KeyError: + pass + try: + ordered_fields.append(("message", value.pop("message"))) + except KeyError: + pass + + # Because we want to use 'sorted_keys=True' we manually build + # the first three keys and then build the rest with json.dumps() + if ordered_fields: + # Need to call json.dumps() on values just in + # case the given values aren't strings (even though + # they should be according to the spec) + ordered_json = ",".join( + '"%s":%s' % (k, json.dumps(v, sort_keys=True, separators=(",", ":"))) + for k, v in ordered_fields + ) + if value: + return "{%s,%s" % ( + ordered_json, + json.dumps(value, sort_keys=True, separators=(",", ":"))[1:], + ) + else: + return "{%s}" % ordered_json + # If there are no fields with ordering requirements we + # pass everything into json.dumps() + else: + return json.dumps(value, sort_keys=True, separators=(",", ":")) diff --git a/tests/test_apm.py b/tests/test_apm.py index f5b9202..2497d55 100644 --- a/tests/test_apm.py +++ b/tests/test_apm.py @@ -34,7 +34,7 @@ def test_elasticapm_structlog_log_correlation_ecs_fields(): ecs.pop("@timestamp") assert ecs == { "ecs": {"version": "1.6.0"}, - "log": {"level": "info"}, + "log.level": "info", "message": "test message", "span": {"id": span_id}, "trace": {"id": trace_id}, @@ -72,8 +72,8 @@ def test_elastic_apm_stdlib_no_filter_log_correlation_ecs_fields(): ecs = json.loads(stream.getvalue().rstrip()) assert ecs == { "ecs": {"version": "1.6.0"}, + "log.level": "info", "log": { - "level": "info", "logger": "apm-logger", "origin": { "file": {"name": "test_apm.py"}, @@ -116,8 +116,8 @@ def test_elastic_apm_stdlib_with_filter_log_correlation_ecs_fields(): ecs = json.loads(stream.getvalue().rstrip()) assert ecs == { "ecs": {"version": "1.6.0"}, + "log.level": "info", "log": { - "level": "info", "logger": "apm-logger", "origin": { "file": {"name": "test_apm.py"}, @@ -163,8 +163,8 @@ def test_elastic_apm_stdlib_exclude_fields(): ecs = json.loads(stream.getvalue().rstrip()) assert ecs == { "ecs": {"version": "1.6.0"}, + "log.level": "info", "log": { - "level": "info", "logger": "apm-logger", "origin": { "file": {"name": "test_apm.py"}, diff --git a/tests/test_stdlib_formatter.py b/tests/test_stdlib_formatter.py index ca15541..c125479 100644 --- a/tests/test_stdlib_formatter.py +++ b/tests/test_stdlib_formatter.py @@ -39,9 +39,9 @@ def test_record_formatted(): formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"]) assert formatter.format(make_record()) == ( - '{"@timestamp":"2020-03-20T14:12:46.123Z","ecs":{"version":"1.6.0"},' - '"log":{"level":"debug","logger":"logger-name","origin":{"file":{"line":10,"name":"file.py"},' - '"function":"test_function"},"original":"1: hello"},"message":"1: hello"}' + '{"@timestamp":"2020-03-20T14:12:46.123Z","log.level":"debug","message":"1: hello","ecs":{"version":"1.6.0"},' + '"log":{"logger":"logger-name","origin":{"file":{"line":10,"name":"file.py"},"function":"test_function"},' + '"original":"1: hello"}}' ) @@ -54,9 +54,9 @@ def format_to_ecs(self, record): formatter = CustomFormatter(exclude_fields=["process"]) assert formatter.format(make_record()) == ( - '{"@timestamp":"2020-03-20T14:12:46.123Z","custom":"field","ecs":{"version":"1.6.0"},' - '"log":{"level":"debug","logger":"logger-name","origin":{"file":{"line":10,"name":"file.py"},' - '"function":"test_function"},"original":"1: hello"},"message":"1: hello"}' + '{"@timestamp":"2020-03-20T14:12:46.123Z","log.level":"debug","message":"1: hello",' + '"custom":"field","ecs":{"version":"1.6.0"},"log":{"logger":"logger-name","origin":' + '{"file":{"line":10,"name":"file.py"},"function":"test_function"},"original":"1: hello"}}' ) @@ -68,9 +68,9 @@ def test_can_be_set_on_handler(): handler.handle(make_record()) assert stream.getvalue() == ( - '{"@timestamp":"2020-03-20T14:12:46.123Z","ecs":{"version":"1.6.0"},' - '"log":{"level":"debug","logger":"logger-name","origin":{"file":{"line":10,"name":"file.py"},' - '"function":"test_function"},"original":"1: hello"},"message":"1: hello"}\n' + '{"@timestamp":"2020-03-20T14:12:46.123Z","log.level":"debug","message":"1: hello",' + '"ecs":{"version":"1.6.0"},"log":{"logger":"logger-name","origin":{"file":{"line":10,' + '"name":"file.py"},"function":"test_function"},"original":"1: hello"}}\n' ) @@ -103,8 +103,8 @@ def test_extra_is_merged(time, logger): assert ecs == { "@timestamp": "2020-03-20T16:16:37.187Z", "ecs": {"version": "1.6.0"}, + "log.level": "info", "log": { - "level": "info", "logger": logger.name, "origin": { "file": {"name": "test_stdlib_formatter.py"}, @@ -161,7 +161,7 @@ def test_stack_trace_limit_disabled(stack_trace_limit, logger): ecs = json.loads(stream.getvalue().rstrip()) assert ecs["error"] == {"message": "error!", "type": "ValueError"} - assert ecs["log"]["level"] == "info" + assert ecs["log.level"] == "info" assert ecs["message"] == "there was an error" assert ecs["log"]["original"] == "there was an error" @@ -195,7 +195,7 @@ def h(): "message": "error!", "type": "ValueError", } - assert ecs["log"]["level"] == "info" + assert ecs["log.level"] == "info" assert ecs["message"] == "there was an error" assert ecs["log"]["original"] == "there was an error" diff --git a/tests/test_structlog_formatter.py b/tests/test_structlog_formatter.py index 4c97124..4a48da5 100644 --- a/tests/test_structlog_formatter.py +++ b/tests/test_structlog_formatter.py @@ -14,8 +14,9 @@ def test_event_dict_formatted(time): formatter = ecs_logging.StructlogFormatter() assert formatter(None, "debug", make_event_dict()) == ( - '{"@timestamp":"2020-03-20T16:16:37.187Z","ecs":{"version":"1.6.0"},' - '"log":{"level":"debug","logger":"logger-name"},"message":"test message"}' + '{"@timestamp":"2020-03-20T16:16:37.187Z","log.level":"debug",' + '"message":"test message","ecs":{"version":"1.6.0"},' + '"log":{"logger":"logger-name"}}' ) @@ -35,7 +36,7 @@ def test_can_be_set_as_processor(time): logger.debug("test message", custom="key", **{"dot.ted": 1}) assert stream.getvalue() == ( - '{"@timestamp":"2020-03-20T16:16:37.187Z","custom":"key",' - '"dot":{"ted":1},"ecs":{"version":"1.6.0"},"log":{"level":"debug"},' - '"message":"test message"}\n' + '{"@timestamp":"2020-03-20T16:16:37.187Z","log.level":"debug",' + '"message":"test message","custom":"key","dot":{"ted":1},' + '"ecs":{"version":"1.6.0"}}\n' ) diff --git a/tests/test_utils.py b/tests/test_utils.py index 17f9662..8b11ac8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,5 +1,5 @@ import pytest -from ecs_logging._utils import flatten_dict, de_dot, normalize_dict +from ecs_logging._utils import flatten_dict, de_dot, normalize_dict, json_dumps def test_flatten_dict(): @@ -30,3 +30,46 @@ def test_normalize_dict(): def test_normalize_dict_with_array(): assert normalize_dict({"a": ["1", "2"]}) == {"a": ["1", "2"]} + + +@pytest.mark.parametrize( + ["value", "expected"], + [ + ({}, "{}"), + ({"log": {"level": "info"}}, '{"log.level":"info"}'), + ({"log.level": "info"}, '{"log.level":"info"}'), + ( + {"log": {"level": "info", "message": "hello"}}, + '{"log.level":"info","log":{"message":"hello"}}', + ), + ({"@timestamp": "2021-01-01..."}, '{"@timestamp":"2021-01-01..."}'), + ({"message": "hello"}, '{"message":"hello"}'), + ({"message": 1}, '{"message":1}'), + ({"message": ["hello"]}, '{"message":["hello"]}'), + ({"message": {"key": "val"}}, '{"message":{"key":"val"}}'), + ({"custom": "value"}, '{"custom":"value"}'), + ({"log.level": "info"}, '{"log.level":"info"}'), + ( + {"log": {"message": "hello"}, "message": "hello"}, + '{"message":"hello","log":{"message":"hello"}}', + ), + ( + { + "log": {"message": "hello", "level": "info"}, + "message": "hello", + "@timestamp": "2021-01-01...", + }, + '{"@timestamp":"2021-01-01...","log.level":"info","message":"hello","log":{"message":"hello"}}', + ), + ( + { + "log": {"level": "info"}, + "message": "hello", + "@timestamp": "2021-01-01...", + }, + '{"@timestamp":"2021-01-01...","log.level":"info","message":"hello"}', + ), + ], +) +def test_json_dumps(value, expected): + assert json_dumps(value) == expected