Skip to content

Commit 3e3bee8

Browse files
authored
feature: Add JSON lines deserializer (#1767)
1 parent f7b3f6b commit 3e3bee8

File tree

2 files changed

+49
-0
lines changed

2 files changed

+49
-0
lines changed

src/sagemaker/deserializers.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,3 +245,28 @@ def deserialize(self, stream, content_type):
245245
return pandas.read_json(stream)
246246

247247
raise ValueError("%s cannot read content type %s." % (__class__.__name__, content_type))
248+
249+
250+
class JSONLinesDeserializer(BaseDeserializer):
251+
"""Deserialize JSON lines data from an inference endpoint."""
252+
253+
ACCEPT = "application/jsonlines"
254+
255+
def deserialize(self, stream, content_type):
256+
"""Deserialize JSON lines data from an inference endpoint.
257+
258+
See https://docs.python.org/3/library/json.html#py-to-json-table to
259+
understand how JSON values are converted to Python objects.
260+
261+
Args:
262+
stream (botocore.response.StreamingBody): Data to be deserialized.
263+
content_type (str): The MIME type of the data.
264+
265+
Returns:
266+
list: A list of JSON serializable objects.
267+
"""
268+
try:
269+
lines = stream.read().rstrip().split("\n")
270+
return [json.loads(line) for line in lines]
271+
finally:
272+
stream.close()

tests/unit/sagemaker/test_deserializers.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
NumpyDeserializer,
2828
JSONDeserializer,
2929
PandasDeserializer,
30+
JSONLinesDeserializer,
3031
)
3132

3233

@@ -208,3 +209,26 @@ def test_pandas_deserializer_csv(pandas_deserializer):
208209
result = pandas_deserializer.deserialize(stream, "text/csv")
209210
expected = pd.DataFrame([["a", "b"], ["c", "d"]], columns=["col 1", "col 2"])
210211
assert result.equals(expected)
212+
213+
214+
@pytest.fixture
215+
def json_lines_deserializer():
216+
return JSONLinesDeserializer()
217+
218+
219+
@pytest.mark.parametrize(
220+
"source, expected",
221+
[
222+
('["Name", "Score"]\n["Gilbert", 24]', [["Name", "Score"], ["Gilbert", 24]]),
223+
('["Name", "Score"]\n["Gilbert", 24]\n', [["Name", "Score"], ["Gilbert", 24]]),
224+
(
225+
'{"Name": "Gilbert", "Score": 24}\n{"Name": "Alexa", "Score": 29}',
226+
[{"Name": "Gilbert", "Score": 24}, {"Name": "Alexa", "Score": 29}],
227+
),
228+
],
229+
)
230+
def test_json_lines_deserializer(json_lines_deserializer, source, expected):
231+
stream = io.StringIO(source)
232+
content_type = "application/jsonlines"
233+
actual = json_lines_deserializer.deserialize(stream, content_type)
234+
assert actual == expected

0 commit comments

Comments
 (0)