Skip to content

Validate against spec #31

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[flake8]
exclude=
tests/**,
conftest.py,
setup.py
max-line-length=120
ignore=E731,W503,E203,BLK100,B301
66 changes: 66 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import datetime
import json
import os
import collections

import pytest


class ValidationError(Exception):
pass


@pytest.fixture
def spec_validator():
with open(
os.path.join(os.path.dirname(__file__), "resources", "spec.json"), "r"
) as fh:
spec = json.load(fh)

def validator(data_json):
"""
Throws a ValidationError if anything doesn't match the spec.

Returns the original json (pass-through)
"""
fields = spec["fields"]
data = json.loads(data_json, object_pairs_hook=collections.OrderedDict)
for k, v in fields.items():
if v.get("required"):
found = False
if k in data:
found = True
elif "." in k:
# Dotted keys could be nested, like ecs.version
subkeys = k.split(".")
subval = data
for subkey in subkeys:
subval = subval.get(subkey, {})
if subval:
found = True
if not found:
raise ValidationError("Missing required key {}".format(k))
if k in data:
if v["type"] == "string" and not (
isinstance(data[k], str) or isinstance(data[k], basestring)
):
raise ValidationError(
"Value {0} for key {1} should be string, is {2}".format(
data[k], k, type(data[k])
)
)
if v["type"] == "datetime":
try:
datetime.datetime.strptime(data[k], "%Y-%m-%dT%H:%M:%S.%fZ")
except ValueError:
raise ValidationError(
"Value {0} for key {1} doesn't parse as an ISO datetime".format(
data[k], k
)
)
if v.get("index") and list(data.keys())[v.get("index")] != k:
raise ValidationError("Key {0} is not at index {1}".format(k, index))

return data_json

return validator
139 changes: 139 additions & 0 deletions tests/resources/spec.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
{
"version": 1.0,
"url": "https://www.elastic.co/guide/en/ecs/current/index.html",
"ecs": {
"version": "1.x"
},
"fields": {
"@timestamp": {
"type": "datetime",
"required": true,
"index": 0,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-base.html"
},
"log.level": {
"type": "string",
"required": true,
"index": 1,
"top_level_field": true,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-log.html",
"comment": [
"This field SHOULD NOT be a nested object field but at the top level with a dot in the property name.",
"This is to make the JSON logs more human-readable.",
"Loggers MAY indent the log level so that the `message` field always starts at the exact same offset,",
"no matter the number of characters the log level has.",
"For example: `'DEBUG'` (5 chars) will not be indented, whereas ` 'WARN'` (4 chars) will be indented by one space character."
]
},
"message": {
"type": "string",
"required": true,
"index": 2,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-base.html"
},
"ecs.version": {
"type": "string",
"required": true,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-ecs.html"
},
"labels": {
"type": "object",
"required": false,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-base.html",
"sanitization": {
"key": {
"replacements": [
".",
"*",
"\\"
],
"substitute": "_"
}
}
},
"trace.id": {
"type": "string",
"required": false,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-tracing.html",
"comment": "When APM agents add this field to the context, ecs loggers should pick it up and add it to the log event."
},
"transaction.id": {
"type": "string",
"required": false,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-tracing.html",
"comment": "When APM agents add this field to the context, ecs loggers should pick it up and add it to the log event."
},
"service.name": {
"type": "string",
"required": false,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-service.html",
"comment": [
"Configurable by users.",
"When an APM agent is active, they should auto-configure it if not already set."
]
},
"event.dataset": {
"type": "string",
"required": false,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-event.html",
"default": "${service.name}.log OR ${service.name}.${appender.name}",
"comment": [
"Configurable by users.",
"If the user manually configures the service name,",
"the logging library should set `event.dataset=${service.name}.log` if not explicitly configured otherwise.",
"",
"When agents auto-configure the app to use an ECS logger,",
"they should set `event.dataset=${service.name}.${appender.name}` if the appender name is available in the logging library.",
"Otherwise, agents should also set `event.dataset=${service.name}.log`",
"",
"The field helps to filter for different log streams from the same pod, for example and is required for log anomaly detection."
]
},
"process.thread.name": {
"type": "string",
"required": false,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-process.html"
},
"log.logger": {
"type": "string",
"required": false,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-log.html"
},
"log.origin.file.line": {
"type": "integer",
"required": false,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-log.html",
"comment": "Should be opt-in as it requires the logging library to capture a stack trace for each log event."
},
"log.origin.file.name": {
"type": "string",
"required": false,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-log.html",
"comment": "Should be opt-in as it requires the logging library to capture a stack trace for each log event."
},
"log.origin.function": {
"type": "string",
"required": false,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-log.html",
"comment": "Should be opt-in as it requires the logging library to capture a stack trace for each log event."
},
"error.type": {
"type": "string",
"required": false,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-error.html",
"comment": "The exception type or class, such as `java.lang.IllegalArgumentException`."
},
"error.message": {
"type": "string",
"required": false,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-error.html",
"comment": "The message of the exception."
},
"error.stack_trace": {
"type": "string",
"required": false,
"url": "https://www.elastic.co/guide/en/ecs/current/ecs-error.html",
"comment": "The stack trace of the exception as plain text."
}
}
}
4 changes: 2 additions & 2 deletions tests/test_apm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .compat import StringIO


def test_elasticapm_structlog_log_correlation_ecs_fields():
def test_elasticapm_structlog_log_correlation_ecs_fields(spec_validator):
apm = elasticapm.Client({"SERVICE_NAME": "apm-service", "DISABLE_SEND": True})
stream = StringIO()
logger = structlog.PrintLogger(stream)
Expand All @@ -30,7 +30,7 @@ def test_elasticapm_structlog_log_correlation_ecs_fields():
finally:
apm.end_transaction("test-transaction")

ecs = json.loads(stream.getvalue().rstrip())
ecs = json.loads(spec_validator(stream.getvalue().rstrip()))
ecs.pop("@timestamp")
assert ecs == {
"ecs": {"version": "1.6.0"},
Expand Down
8 changes: 4 additions & 4 deletions tests/test_stdlib_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,25 +35,25 @@ def make_record():
return record


def test_record_formatted():
def test_record_formatted(spec_validator):
formatter = ecs_logging.StdlibFormatter(exclude_fields=["process"])

assert formatter.format(make_record()) == (
assert spec_validator(formatter.format(make_record())) == (
'{"@timestamp":"2020-03-20T14:12:46.123Z","log.level":"debug","message":"1: hello","ecs":{"version":"1.6.0"},'
'"log":{"logger":"logger-name","origin":{"file":{"line":10,"name":"file.py"},"function":"test_function"},'
'"original":"1: hello"}}'
)


def test_can_be_overridden():
def test_can_be_overridden(spec_validator):
class CustomFormatter(ecs_logging.StdlibFormatter):
def format_to_ecs(self, record):
ecs_dict = super(CustomFormatter, self).format_to_ecs(record)
ecs_dict["custom"] = "field"
return ecs_dict

formatter = CustomFormatter(exclude_fields=["process"])
assert formatter.format(make_record()) == (
assert spec_validator(formatter.format(make_record())) == (
'{"@timestamp":"2020-03-20T14:12:46.123Z","log.level":"debug","message":"1: hello",'
'"custom":"field","ecs":{"version":"1.6.0"},"log":{"logger":"logger-name","origin":'
'{"file":{"line":10,"name":"file.py"},"function":"test_function"},"original":"1: hello"}}'
Expand Down
8 changes: 4 additions & 4 deletions tests/test_structlog_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@ def make_event_dict():


@mock.patch("time.time")
def test_event_dict_formatted(time):
def test_event_dict_formatted(time, spec_validator):
time.return_value = 1584720997.187709

formatter = ecs_logging.StructlogFormatter()
assert formatter(None, "debug", make_event_dict()) == (
assert spec_validator(formatter(None, "debug", make_event_dict())) == (
'{"@timestamp":"2020-03-20T16:16:37.187Z","log.level":"debug",'
'"message":"test message","ecs":{"version":"1.6.0"},'
'"log":{"logger":"logger-name"}}'
)


@mock.patch("time.time")
def test_can_be_set_as_processor(time):
def test_can_be_set_as_processor(time, spec_validator):
time.return_value = 1584720997.187709

stream = StringIO()
Expand All @@ -35,7 +35,7 @@ def test_can_be_set_as_processor(time):
logger = structlog.get_logger("logger-name")
logger.debug("test message", custom="key", **{"dot.ted": 1})

assert stream.getvalue() == (
assert spec_validator(stream.getvalue()) == (
'{"@timestamp":"2020-03-20T16:16:37.187Z","log.level":"debug",'
'"message":"test message","custom":"key","dot":{"ted":1},'
'"ecs":{"version":"1.6.0"}}\n'
Expand Down