Fix nested array JSON detection and pre-compute column metadata

laughingman7743 · claude · laughingman7743 · commit 989aab2f6e25 · 2026-02-28T20:06:00.000+09:00
- Add '[[' to JSON detection guard in _convert_typed_array so nested
  arrays like [[1,2],[3]] are parsed via json.loads instead of falling
  through to native format (which returns None for nested arrays).

- Pre-compute _column_types and _column_names tuples once in
  _process_metadata. Use them in _get_rows to eliminate per-cell
  meta.get("Type") and meta.get("Name") dict lookups.

- S3FSResultSet._fetch() reuses _column_types from parent instead of
  rebuilding from self.description on every call.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/pyathena/parser.py b/pyathena/parser.py
@@ -279,7 +279,7 @@ def _convert_typed_array(self, value: str, type_node: TypeNode) -> list[Any] | N
 
         # Try JSON first (only if content looks like JSON)
         inner_preview = value[1:10] if len(value) > 10 else value[1:-1]
-        if '"' in inner_preview or value.startswith(("[{", "[null")):
+        if '"' in inner_preview or value.startswith(("[{", "[null", "[[")):
             try:
                 parsed = json.loads(value)
                 if isinstance(parsed, list):
diff --git a/pyathena/result_set.py b/pyathena/result_set.py
@@ -87,6 +87,8 @@ def __init__(
         )
 
         self._metadata: tuple[dict[str, Any], ...] | None = None
+        self._column_types: tuple[str, ...] | None = None
+        self._column_names: tuple[str, ...] | None = None
         self._column_type_hints: tuple[str | None, ...] | None = None
         self._rows: collections.deque[tuple[Any | None, ...] | dict[Any, Any | None]] = (
             collections.deque()
@@ -429,14 +431,16 @@ def _process_metadata(self, response: dict[str, Any]) -> None:
         if column_info is None:
             raise DataError("KeyError `ColumnInfo`")
         self._metadata = tuple(column_info)
+        self._column_types = tuple(m.get("Type", "") for m in self._metadata)
+        self._column_names = tuple(m.get("Name", "") for m in self._metadata)
         if self._result_set_type_hints and any(
-            m.get("Type", "").lower() in self._COMPLEX_TYPES for m in self._metadata
+            t.lower() in self._COMPLEX_TYPES for t in self._column_types
         ):
             hints = tuple(
                 self._result_set_type_hints.get(m.get("Name", "").lower())
-                if m.get("Type", "").lower() in self._COMPLEX_TYPES
+                if t.lower() in self._COMPLEX_TYPES
                 else None
-                for m in self._metadata
+                for m, t in zip(self._metadata, self._column_types, strict=True)
             )
             if any(hints):
                 self._column_type_hints = hints
@@ -465,19 +469,28 @@ def _get_rows(
         converter: Converter | None = None,
     ) -> list[tuple[Any | None, ...] | dict[Any, Any | None]]:
         conv = converter or self._converter
+        col_types = self._column_types
         col_hints = self._column_type_hints
-        if col_hints:
+        if col_hints and col_types:
             return [
                 tuple(
-                    conv.convert(meta.get("Type"), row.get("VarCharValue"), type_hint=hint)
+                    conv.convert(col_type, row.get("VarCharValue"), type_hint=hint)
                     if hint
-                    else conv.convert(meta.get("Type"), row.get("VarCharValue"))
-                    for meta, row, hint in zip(
-                        metadata, rows[i].get("Data", []), col_hints, strict=False
+                    else conv.convert(col_type, row.get("VarCharValue"))
+                    for col_type, row, hint in zip(
+                        col_types, rows[i].get("Data", []), col_hints, strict=False
                     )
                 )
                 for i in range(offset, len(rows))
             ]
+        if col_types:
+            return [
+                tuple(
+                    conv.convert(col_type, row.get("VarCharValue"))
+                    for col_type, row in zip(col_types, rows[i].get("Data", []), strict=False)
+                )
+                for i in range(offset, len(rows))
+            ]
         return [
             tuple(
                 conv.convert(meta.get("Type"), row.get("VarCharValue"))
@@ -639,6 +652,8 @@ def close(self) -> None:
         self._connection = None
         self._query_execution = None
         self._metadata = None
+        self._column_types = None
+        self._column_names = None
         self._rows.clear()
         self._next_token = None
         self._rownumber = None
@@ -663,18 +678,37 @@ def _get_rows(
         converter: Converter | None = None,
     ) -> list[tuple[Any | None, ...] | dict[Any, Any | None]]:
         conv = converter or self._converter
+        col_types = self._column_types
+        col_names = self._column_names
         col_hints = self._column_type_hints
-        if col_hints:
+        if col_hints and col_types and col_names:
             return [
                 self.dict_type(
                     (
-                        meta.get("Name"),
-                        conv.convert(meta.get("Type"), row.get("VarCharValue"), type_hint=hint)
+                        name,
+                        conv.convert(col_type, row.get("VarCharValue"), type_hint=hint)
                         if hint
-                        else conv.convert(meta.get("Type"), row.get("VarCharValue")),
+                        else conv.convert(col_type, row.get("VarCharValue")),
+                    )
+                    for name, col_type, row, hint in zip(
+                        col_names,
+                        col_types,
+                        rows[i].get("Data", []),
+                        col_hints,
+                        strict=False,
+                    )
+                )
+                for i in range(offset, len(rows))
+            ]
+        if col_types and col_names:
+            return [
+                self.dict_type(
+                    (
+                        name,
+                        conv.convert(col_type, row.get("VarCharValue")),
                     )
-                    for meta, row, hint in zip(
-                        metadata, rows[i].get("Data", []), col_hints, strict=False
+                    for name, col_type, row in zip(
+                        col_names, col_types, rows[i].get("Data", []), strict=False
                     )
                 )
                 for i in range(offset, len(rows))
diff --git a/pyathena/s3fs/result_set.py b/pyathena/s3fs/result_set.py
@@ -149,8 +149,10 @@ def _fetch(self) -> None:
         if not self._csv_reader:
             return
 
-        description = self.description if self.description else []
-        column_types = [d[1] for d in description]
+        col_types = self._column_types
+        if not col_types:
+            description = self.description if self.description else []
+            col_types = tuple(d[1] for d in description)
         col_hints = self._column_type_hints
 
         rows_fetched = 0
@@ -171,25 +173,25 @@ def _fetch(self) -> None:
                         )
                         if hint
                         else self._converter.convert(col_type, value if value != "" else None)
-                        for col_type, value, hint in zip(column_types, row, col_hints, strict=False)
+                        for col_type, value, hint in zip(col_types, row, col_hints, strict=False)
                     )
                 else:
                     converted_row = tuple(
                         self._converter.convert(col_type, value if value != "" else None)
-                        for col_type, value in zip(column_types, row, strict=False)
+                        for col_type, value in zip(col_types, row, strict=False)
                     )
             else:
                 if col_hints:
                     converted_row = tuple(
                         self._converter.convert(col_type, value, type_hint=hint)
                         if hint
                         else self._converter.convert(col_type, value)
-                        for col_type, value, hint in zip(column_types, row, col_hints, strict=False)
+                        for col_type, value, hint in zip(col_types, row, col_hints, strict=False)
                     )
                 else:
                     converted_row = tuple(
                         self._converter.convert(col_type, value)
-                        for col_type, value in zip(column_types, row, strict=False)
+                        for col_type, value in zip(col_types, row, strict=False)
                     )
             self._rows.append(converted_row)
             rows_fetched += 1
diff --git a/tests/pyathena/test_parser.py b/tests/pyathena/test_parser.py
@@ -180,6 +180,15 @@ def test_struct_json_name_based_type_matching(self, converter):
         assert isinstance(result["age"], int)
         assert isinstance(result["name"], str)
 
+    def test_nested_array_json(self, converter):
+        """JSON path: nested array like [[1,2],[3]] must be parsed via json.loads."""
+        parser = TypeSignatureParser()
+        node = parser.parse("array(array(integer))")
+        result = converter.convert("[[1, 2], [3]]", node)
+        assert result == [[1, 2], [3]]
+        assert isinstance(result[0], list)
+        assert isinstance(result[0][0], int)
+
     def test_map_json_null_value_preserved(self, converter):
         """JSON path: map with null values vs "null" string values."""
         parser = TypeSignatureParser()