Skip to content

Commit 989aab2

Browse files
Fix nested array JSON detection and pre-compute column metadata
- Add '[[' to JSON detection guard in _convert_typed_array so nested arrays like [[1,2],[3]] are parsed via json.loads instead of falling through to native format (which returns None for nested arrays). - Pre-compute _column_types and _column_names tuples once in _process_metadata. Use them in _get_rows to eliminate per-cell meta.get("Type") and meta.get("Name") dict lookups. - S3FSResultSet._fetch() reuses _column_types from parent instead of rebuilding from self.description on every call. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 311f039 commit 989aab2

File tree

4 files changed

+66
-21
lines changed

4 files changed

+66
-21
lines changed

pyathena/parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ def _convert_typed_array(self, value: str, type_node: TypeNode) -> list[Any] | N
279279

280280
# Try JSON first (only if content looks like JSON)
281281
inner_preview = value[1:10] if len(value) > 10 else value[1:-1]
282-
if '"' in inner_preview or value.startswith(("[{", "[null")):
282+
if '"' in inner_preview or value.startswith(("[{", "[null", "[[")):
283283
try:
284284
parsed = json.loads(value)
285285
if isinstance(parsed, list):

pyathena/result_set.py

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ def __init__(
8787
)
8888

8989
self._metadata: tuple[dict[str, Any], ...] | None = None
90+
self._column_types: tuple[str, ...] | None = None
91+
self._column_names: tuple[str, ...] | None = None
9092
self._column_type_hints: tuple[str | None, ...] | None = None
9193
self._rows: collections.deque[tuple[Any | None, ...] | dict[Any, Any | None]] = (
9294
collections.deque()
@@ -429,14 +431,16 @@ def _process_metadata(self, response: dict[str, Any]) -> None:
429431
if column_info is None:
430432
raise DataError("KeyError `ColumnInfo`")
431433
self._metadata = tuple(column_info)
434+
self._column_types = tuple(m.get("Type", "") for m in self._metadata)
435+
self._column_names = tuple(m.get("Name", "") for m in self._metadata)
432436
if self._result_set_type_hints and any(
433-
m.get("Type", "").lower() in self._COMPLEX_TYPES for m in self._metadata
437+
t.lower() in self._COMPLEX_TYPES for t in self._column_types
434438
):
435439
hints = tuple(
436440
self._result_set_type_hints.get(m.get("Name", "").lower())
437-
if m.get("Type", "").lower() in self._COMPLEX_TYPES
441+
if t.lower() in self._COMPLEX_TYPES
438442
else None
439-
for m in self._metadata
443+
for m, t in zip(self._metadata, self._column_types, strict=True)
440444
)
441445
if any(hints):
442446
self._column_type_hints = hints
@@ -465,19 +469,28 @@ def _get_rows(
465469
converter: Converter | None = None,
466470
) -> list[tuple[Any | None, ...] | dict[Any, Any | None]]:
467471
conv = converter or self._converter
472+
col_types = self._column_types
468473
col_hints = self._column_type_hints
469-
if col_hints:
474+
if col_hints and col_types:
470475
return [
471476
tuple(
472-
conv.convert(meta.get("Type"), row.get("VarCharValue"), type_hint=hint)
477+
conv.convert(col_type, row.get("VarCharValue"), type_hint=hint)
473478
if hint
474-
else conv.convert(meta.get("Type"), row.get("VarCharValue"))
475-
for meta, row, hint in zip(
476-
metadata, rows[i].get("Data", []), col_hints, strict=False
479+
else conv.convert(col_type, row.get("VarCharValue"))
480+
for col_type, row, hint in zip(
481+
col_types, rows[i].get("Data", []), col_hints, strict=False
477482
)
478483
)
479484
for i in range(offset, len(rows))
480485
]
486+
if col_types:
487+
return [
488+
tuple(
489+
conv.convert(col_type, row.get("VarCharValue"))
490+
for col_type, row in zip(col_types, rows[i].get("Data", []), strict=False)
491+
)
492+
for i in range(offset, len(rows))
493+
]
481494
return [
482495
tuple(
483496
conv.convert(meta.get("Type"), row.get("VarCharValue"))
@@ -639,6 +652,8 @@ def close(self) -> None:
639652
self._connection = None
640653
self._query_execution = None
641654
self._metadata = None
655+
self._column_types = None
656+
self._column_names = None
642657
self._rows.clear()
643658
self._next_token = None
644659
self._rownumber = None
@@ -663,18 +678,37 @@ def _get_rows(
663678
converter: Converter | None = None,
664679
) -> list[tuple[Any | None, ...] | dict[Any, Any | None]]:
665680
conv = converter or self._converter
681+
col_types = self._column_types
682+
col_names = self._column_names
666683
col_hints = self._column_type_hints
667-
if col_hints:
684+
if col_hints and col_types and col_names:
668685
return [
669686
self.dict_type(
670687
(
671-
meta.get("Name"),
672-
conv.convert(meta.get("Type"), row.get("VarCharValue"), type_hint=hint)
688+
name,
689+
conv.convert(col_type, row.get("VarCharValue"), type_hint=hint)
673690
if hint
674-
else conv.convert(meta.get("Type"), row.get("VarCharValue")),
691+
else conv.convert(col_type, row.get("VarCharValue")),
692+
)
693+
for name, col_type, row, hint in zip(
694+
col_names,
695+
col_types,
696+
rows[i].get("Data", []),
697+
col_hints,
698+
strict=False,
699+
)
700+
)
701+
for i in range(offset, len(rows))
702+
]
703+
if col_types and col_names:
704+
return [
705+
self.dict_type(
706+
(
707+
name,
708+
conv.convert(col_type, row.get("VarCharValue")),
675709
)
676-
for meta, row, hint in zip(
677-
metadata, rows[i].get("Data", []), col_hints, strict=False
710+
for name, col_type, row in zip(
711+
col_names, col_types, rows[i].get("Data", []), strict=False
678712
)
679713
)
680714
for i in range(offset, len(rows))

pyathena/s3fs/result_set.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,10 @@ def _fetch(self) -> None:
149149
if not self._csv_reader:
150150
return
151151

152-
description = self.description if self.description else []
153-
column_types = [d[1] for d in description]
152+
col_types = self._column_types
153+
if not col_types:
154+
description = self.description if self.description else []
155+
col_types = tuple(d[1] for d in description)
154156
col_hints = self._column_type_hints
155157

156158
rows_fetched = 0
@@ -171,25 +173,25 @@ def _fetch(self) -> None:
171173
)
172174
if hint
173175
else self._converter.convert(col_type, value if value != "" else None)
174-
for col_type, value, hint in zip(column_types, row, col_hints, strict=False)
176+
for col_type, value, hint in zip(col_types, row, col_hints, strict=False)
175177
)
176178
else:
177179
converted_row = tuple(
178180
self._converter.convert(col_type, value if value != "" else None)
179-
for col_type, value in zip(column_types, row, strict=False)
181+
for col_type, value in zip(col_types, row, strict=False)
180182
)
181183
else:
182184
if col_hints:
183185
converted_row = tuple(
184186
self._converter.convert(col_type, value, type_hint=hint)
185187
if hint
186188
else self._converter.convert(col_type, value)
187-
for col_type, value, hint in zip(column_types, row, col_hints, strict=False)
189+
for col_type, value, hint in zip(col_types, row, col_hints, strict=False)
188190
)
189191
else:
190192
converted_row = tuple(
191193
self._converter.convert(col_type, value)
192-
for col_type, value in zip(column_types, row, strict=False)
194+
for col_type, value in zip(col_types, row, strict=False)
193195
)
194196
self._rows.append(converted_row)
195197
rows_fetched += 1

tests/pyathena/test_parser.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,15 @@ def test_struct_json_name_based_type_matching(self, converter):
180180
assert isinstance(result["age"], int)
181181
assert isinstance(result["name"], str)
182182

183+
def test_nested_array_json(self, converter):
184+
"""JSON path: nested array like [[1,2],[3]] must be parsed via json.loads."""
185+
parser = TypeSignatureParser()
186+
node = parser.parse("array(array(integer))")
187+
result = converter.convert("[[1, 2], [3]]", node)
188+
assert result == [[1, 2], [3]]
189+
assert isinstance(result[0], list)
190+
assert isinstance(result[0][0], int)
191+
183192
def test_map_json_null_value_preserved(self, converter):
184193
"""JSON path: map with null values vs "null" string values."""
185194
parser = TypeSignatureParser()

0 commit comments

Comments
 (0)