Skip to content

Commit 1f8d3fd

Browse files
feat: Correct FIPS-mode metrics (#588)
- Our `dogstatsd` client now supports timestamps for the metrics that it will send. - This unblocks us to always send metrics to the extension, even if they have a timestamp. Confirmed that this actually works now with both bottlecap and the go agent. - Refactored the metrics workflow to have an explicit choice of metrics handlers (Extension, Forwarder, Datadog API, or, for some FIPS usecases, No Handler). - Added a `DD_LAMBDA_FIPS_MODE` flag which allows FIPS-mode logic to be enabled in commercial regions or disabled in govcloud regions. - The new FIPS mode is used for Datadog API Key secret lookup and for metrics handling decisions. ### Breaking Change Since the `DD_LAMBDA_FIPS_MODE` defaults to `true` in govcloud, direct metrics submission there (without an Extension or a Forwarder) will now be disabled.
1 parent 72caf0a commit 1f8d3fd

10 files changed

+254
-143
lines changed

datadog_lambda/api.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
1-
import os
21
import logging
2+
import os
3+
4+
from datadog_lambda.fips import fips_mode_enabled
35

46
logger = logging.getLogger(__name__)
57
KMS_ENCRYPTION_CONTEXT_KEY = "LambdaFunctionName"
68
api_key = None
79

810

911
def decrypt_kms_api_key(kms_client, ciphertext):
10-
from botocore.exceptions import ClientError
1112
import base64
1213

14+
from botocore.exceptions import ClientError
15+
1316
"""
1417
Decodes and deciphers the base64-encoded ciphertext given as a parameter using KMS.
1518
For this to work properly, the Lambda function must have the appropriate IAM permissions.
@@ -63,10 +66,9 @@ def get_api_key() -> str:
6366
DD_API_KEY = os.environ.get("DD_API_KEY", os.environ.get("DATADOG_API_KEY", ""))
6467

6568
LAMBDA_REGION = os.environ.get("AWS_REGION", "")
66-
is_gov_region = LAMBDA_REGION.startswith("us-gov-")
67-
if is_gov_region:
69+
if fips_mode_enabled:
6870
logger.debug(
69-
"Govcloud region detected. Using FIPs endpoints for secrets management."
71+
"FIPS mode is enabled, using FIPS endpoints for secrets management."
7072
)
7173

7274
if DD_API_KEY_SECRET_ARN:
@@ -80,7 +82,7 @@ def get_api_key() -> str:
8082
return ""
8183
endpoint_url = (
8284
f"https://secretsmanager-fips.{secrets_region}.amazonaws.com"
83-
if is_gov_region
85+
if fips_mode_enabled
8486
else None
8587
)
8688
secrets_manager_client = _boto3_client(
@@ -92,7 +94,9 @@ def get_api_key() -> str:
9294
elif DD_API_KEY_SSM_NAME:
9395
# SSM endpoints: https://docs.aws.amazon.com/general/latest/gr/ssm.html
9496
fips_endpoint = (
95-
f"https://ssm-fips.{LAMBDA_REGION}.amazonaws.com" if is_gov_region else None
97+
f"https://ssm-fips.{LAMBDA_REGION}.amazonaws.com"
98+
if fips_mode_enabled
99+
else None
96100
)
97101
ssm_client = _boto3_client("ssm", endpoint_url=fips_endpoint)
98102
api_key = ssm_client.get_parameter(
@@ -101,7 +105,9 @@ def get_api_key() -> str:
101105
elif DD_KMS_API_KEY:
102106
# KMS endpoints: https://docs.aws.amazon.com/general/latest/gr/kms.html
103107
fips_endpoint = (
104-
f"https://kms-fips.{LAMBDA_REGION}.amazonaws.com" if is_gov_region else None
108+
f"https://kms-fips.{LAMBDA_REGION}.amazonaws.com"
109+
if fips_mode_enabled
110+
else None
105111
)
106112
kms_client = _boto3_client("kms", endpoint_url=fips_endpoint)
107113
api_key = decrypt_kms_api_key(kms_client, DD_KMS_API_KEY)

datadog_lambda/dogstatsd.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1+
import errno
12
import logging
23
import os
3-
import socket
4-
import errno
54
import re
5+
import socket
66
from threading import Lock
77

8-
98
MIN_SEND_BUFFER_SIZE = 32 * 1024
109
log = logging.getLogger("datadog_lambda.dogstatsd")
1110

@@ -55,14 +54,21 @@ def _get_udp_socket(cls, host, port):
5554

5655
return sock
5756

58-
def distribution(self, metric, value, tags=None):
57+
def distribution(self, metric, value, tags=None, timestamp=None):
5958
"""
60-
Send a global distribution value, optionally setting tags.
59+
Send a global distribution value, optionally setting tags. The optional
60+
timestamp should be an integer representing seconds since the epoch
61+
(January 1, 1970, 00:00:00 UTC).
6162
6263
>>> statsd.distribution("uploaded.file.size", 1445)
6364
>>> statsd.distribution("album.photo.count", 26, tags=["gender:female"])
65+
>>> statsd.distribution(
66+
>>> "historic.file.count",
67+
>>> 5,
68+
>>> timestamp=int(datetime(2020, 2, 14, 12, 0, 0).timestamp()),
69+
>>> )
6470
"""
65-
self._report(metric, "d", value, tags)
71+
self._report(metric, "d", value, tags, timestamp)
6672

6773
def close_socket(self):
6874
"""
@@ -84,20 +90,21 @@ def normalize_tags(self, tag_list):
8490
for tag in tag_list
8591
]
8692

87-
def _serialize_metric(self, metric, metric_type, value, tags):
93+
def _serialize_metric(self, metric, metric_type, value, tags, timestamp):
8894
# Create/format the metric packet
89-
return "%s:%s|%s%s" % (
95+
return "%s:%s|%s%s%s" % (
9096
metric,
9197
value,
9298
metric_type,
9399
("|#" + ",".join(self.normalize_tags(tags))) if tags else "",
100+
("|T" + str(timestamp)) if timestamp is not None else "",
94101
)
95102

96-
def _report(self, metric, metric_type, value, tags):
103+
def _report(self, metric, metric_type, value, tags, timestamp):
97104
if value is None:
98105
return
99106

100-
payload = self._serialize_metric(metric, metric_type, value, tags)
107+
payload = self._serialize_metric(metric, metric_type, value, tags, timestamp)
101108

102109
# Send it
103110
self._send_to_server(payload)

datadog_lambda/fips.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import logging
2+
import os
3+
4+
is_gov_region = os.environ.get("AWS_REGION", "").startswith("us-gov-")
5+
6+
fips_mode_enabled = (
7+
os.environ.get(
8+
"DD_LAMBDA_FIPS_MODE",
9+
"true" if is_gov_region else "false",
10+
).lower()
11+
== "true"
12+
)
13+
14+
if is_gov_region or fips_mode_enabled:
15+
logger = logging.getLogger(__name__)
16+
logger.debug(
17+
"Python Lambda Layer FIPS mode is %s.",
18+
"enabled" if fips_mode_enabled else "not enabled",
19+
)

datadog_lambda/metric.py

Lines changed: 84 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -3,37 +3,66 @@
33
# This product includes software developed at Datadog (https://www.datadoghq.com/).
44
# Copyright 2019 Datadog, Inc.
55

6+
import enum
7+
import logging
68
import os
79
import time
8-
import logging
9-
import ujson as json
1010
from datetime import datetime, timedelta
1111

12+
import ujson as json
13+
1214
from datadog_lambda.extension import should_use_extension
13-
from datadog_lambda.tags import get_enhanced_metrics_tags, dd_lambda_layer_tag
15+
from datadog_lambda.fips import fips_mode_enabled
16+
from datadog_lambda.tags import dd_lambda_layer_tag, get_enhanced_metrics_tags
1417

1518
logger = logging.getLogger(__name__)
1619

17-
lambda_stats = None
18-
extension_thread_stats = None
1920

20-
flush_in_thread = os.environ.get("DD_FLUSH_IN_THREAD", "").lower() == "true"
21+
class MetricsHandler(enum.Enum):
22+
EXTENSION = "extension"
23+
FORWARDER = "forwarder"
24+
DATADOG_API = "datadog_api"
25+
NO_METRICS = "no_metrics"
26+
2127

22-
if should_use_extension:
28+
def _select_metrics_handler():
29+
if should_use_extension:
30+
return MetricsHandler.EXTENSION
31+
if os.environ.get("DD_FLUSH_TO_LOG", "").lower() == "true":
32+
return MetricsHandler.FORWARDER
33+
34+
if fips_mode_enabled:
35+
logger.debug(
36+
"With FIPS mode enabled, the Datadog API metrics handler is unavailable."
37+
)
38+
return MetricsHandler.NO_METRICS
39+
40+
return MetricsHandler.DATADOG_API
41+
42+
43+
metrics_handler = _select_metrics_handler()
44+
logger.debug("identified primary metrics handler as %s", metrics_handler)
45+
46+
47+
lambda_stats = None
48+
if metrics_handler == MetricsHandler.EXTENSION:
2349
from datadog_lambda.statsd_writer import StatsDWriter
2450

2551
lambda_stats = StatsDWriter()
26-
else:
52+
53+
elif metrics_handler == MetricsHandler.DATADOG_API:
2754
# Periodical flushing in a background thread is NOT guaranteed to succeed
2855
# and leads to data loss. When disabled, metrics are only flushed at the
2956
# end of invocation. To make metrics submitted from a long-running Lambda
3057
# function available sooner, consider using the Datadog Lambda extension.
31-
from datadog_lambda.thread_stats_writer import ThreadStatsWriter
3258
from datadog_lambda.api import init_api
59+
from datadog_lambda.thread_stats_writer import ThreadStatsWriter
3360

61+
flush_in_thread = os.environ.get("DD_FLUSH_IN_THREAD", "").lower() == "true"
3462
init_api()
3563
lambda_stats = ThreadStatsWriter(flush_in_thread)
3664

65+
3766
enhanced_metrics_enabled = (
3867
os.environ.get("DD_ENHANCED_METRICS", "true").lower() == "true"
3968
)
@@ -44,16 +73,19 @@ def lambda_metric(metric_name, value, timestamp=None, tags=None, force_async=Fal
4473
Submit a data point to Datadog distribution metrics.
4574
https://docs.datadoghq.com/graphing/metrics/distributions/
4675
47-
When DD_FLUSH_TO_LOG is True, write metric to log, and
48-
wait for the Datadog Log Forwarder Lambda function to submit
49-
the metrics asynchronously.
76+
If the Datadog Lambda Extension is present, metrics are submitted to its
77+
dogstatsd endpoint.
78+
79+
When DD_FLUSH_TO_LOG is True or force_async is True, write metric to log,
80+
and wait for the Datadog Log Forwarder Lambda function to submit the
81+
metrics asynchronously.
5082
5183
Otherwise, the metrics will be submitted to the Datadog API
5284
periodically and at the end of the function execution in a
5385
background thread.
5486
55-
Note that if the extension is present, it will override the DD_FLUSH_TO_LOG value
56-
and always use the layer to send metrics to the extension
87+
Note that if the extension is present, it will override the DD_FLUSH_TO_LOG
88+
value and always use the layer to send metrics to the extension
5789
"""
5890
if not metric_name or not isinstance(metric_name, str):
5991
logger.warning(
@@ -71,56 +103,54 @@ def lambda_metric(metric_name, value, timestamp=None, tags=None, force_async=Fal
71103
)
72104
return
73105

74-
flush_to_logs = os.environ.get("DD_FLUSH_TO_LOG", "").lower() == "true"
75106
tags = [] if tags is None else list(tags)
76107
tags.append(dd_lambda_layer_tag)
77108

78-
if should_use_extension and timestamp is not None:
79-
# The extension does not support timestamps for distributions so we create a
80-
# a thread stats writer to submit metrics with timestamps to the API
81-
timestamp_ceiling = int(
82-
(datetime.now() - timedelta(hours=4)).timestamp()
83-
) # 4 hours ago
84-
if isinstance(timestamp, datetime):
85-
timestamp = int(timestamp.timestamp())
86-
if timestamp_ceiling > timestamp:
87-
logger.warning(
88-
"Timestamp %s is older than 4 hours, not submitting metric %s",
89-
timestamp,
90-
metric_name,
91-
)
92-
return
93-
global extension_thread_stats
94-
if extension_thread_stats is None:
95-
from datadog_lambda.thread_stats_writer import ThreadStatsWriter
96-
from datadog_lambda.api import init_api
97-
98-
init_api()
99-
extension_thread_stats = ThreadStatsWriter(flush_in_thread)
100-
101-
extension_thread_stats.distribution(
102-
metric_name, value, tags=tags, timestamp=timestamp
103-
)
104-
return
109+
if metrics_handler == MetricsHandler.EXTENSION:
110+
if timestamp is not None:
111+
if isinstance(timestamp, datetime):
112+
timestamp = int(timestamp.timestamp())
113+
114+
timestamp_floor = int((datetime.now() - timedelta(hours=4)).timestamp())
115+
if timestamp < timestamp_floor:
116+
logger.warning(
117+
"Timestamp %s is older than 4 hours, not submitting metric %s",
118+
timestamp,
119+
metric_name,
120+
)
121+
return
105122

106-
if should_use_extension:
107123
logger.debug(
108124
"Sending metric %s value %s to Datadog via extension", metric_name, value
109125
)
110126
lambda_stats.distribution(metric_name, value, tags=tags, timestamp=timestamp)
127+
128+
elif force_async or (metrics_handler == MetricsHandler.FORWARDER):
129+
write_metric_point_to_stdout(metric_name, value, timestamp=timestamp, tags=tags)
130+
131+
elif metrics_handler == MetricsHandler.DATADOG_API:
132+
lambda_stats.distribution(metric_name, value, tags=tags, timestamp=timestamp)
133+
134+
elif metrics_handler == MetricsHandler.NO_METRICS:
135+
logger.debug(
136+
"Metric %s cannot be submitted because the metrics handler is disabled",
137+
metric_name,
138+
),
139+
111140
else:
112-
if flush_to_logs or force_async:
113-
write_metric_point_to_stdout(
114-
metric_name, value, timestamp=timestamp, tags=tags
115-
)
116-
else:
117-
lambda_stats.distribution(
118-
metric_name, value, tags=tags, timestamp=timestamp
119-
)
141+
# This should be qutie impossible, but let's at least log a message if
142+
# it somehow happens.
143+
logger.debug(
144+
"Metric %s cannot be submitted because the metrics handler is not configured: %s",
145+
metric_name,
146+
metrics_handler,
147+
)
120148

121149

122-
def write_metric_point_to_stdout(metric_name, value, timestamp=None, tags=[]):
150+
def write_metric_point_to_stdout(metric_name, value, timestamp=None, tags=None):
123151
"""Writes the specified metric point to standard output"""
152+
tags = tags or []
153+
124154
logger.debug(
125155
"Sending metric %s value %s to Datadog via log forwarder", metric_name, value
126156
)
@@ -138,19 +168,8 @@ def write_metric_point_to_stdout(metric_name, value, timestamp=None, tags=[]):
138168

139169

140170
def flush_stats(lambda_context=None):
141-
lambda_stats.flush()
142-
143-
if extension_thread_stats is not None:
144-
tags = None
145-
if lambda_context is not None:
146-
tags = get_enhanced_metrics_tags(lambda_context)
147-
split_arn = lambda_context.invoked_function_arn.split(":")
148-
if len(split_arn) > 7:
149-
# Get rid of the alias
150-
split_arn.pop()
151-
arn = ":".join(split_arn)
152-
tags.append("function_arn:" + arn)
153-
extension_thread_stats.flush(tags)
171+
if lambda_stats is not None:
172+
lambda_stats.flush()
154173

155174

156175
def submit_enhanced_metric(metric_name, lambda_context):

datadog_lambda/stats_writer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
class StatsWriter:
2-
def distribution(self, metric_name, value, tags=[], timestamp=None):
2+
def distribution(self, metric_name, value, tags=None, timestamp=None):
33
raise NotImplementedError()
44

55
def flush(self):

datadog_lambda/statsd_writer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
from datadog_lambda.stats_writer import StatsWriter
21
from datadog_lambda.dogstatsd import statsd
2+
from datadog_lambda.stats_writer import StatsWriter
33

44

55
class StatsDWriter(StatsWriter):
66
"""
77
Writes distribution metrics using StatsD protocol
88
"""
99

10-
def distribution(self, metric_name, value, tags=[], timestamp=None):
11-
statsd.distribution(metric_name, value, tags=tags)
10+
def distribution(self, metric_name, value, tags=None, timestamp=None):
11+
statsd.distribution(metric_name, value, tags=tags, timestamp=timestamp)
1212

1313
def flush(self):
1414
pass

0 commit comments

Comments
 (0)