googleapis · tswast · Oct 6, 2025 · Oct 3, 2025 · Oct 3, 2025 · Oct 6, 2025
@@ -18,7 +18,6 @@
 import functools
 import typing
 from typing import Iterable, List, Mapping, Optional, Sequence, Tuple
-import warnings
 
 import google.cloud.bigquery
 import pandas
@@ -37,7 +36,6 @@
 import bigframes.core.tree_properties
 from bigframes.core.window_spec import WindowSpec
 import bigframes.dtypes
-import bigframes.exceptions as bfe
 import bigframes.operations as ops
 import bigframes.operations.aggregations as agg_ops
 
@@ -101,12 +99,6 @@ def from_table(
     ):
         if offsets_col and primary_key:
             raise ValueError("must set at most one of 'offests', 'primary_key'")
-        if any(i.field_type == "JSON" for i in table.schema if i.name in schema.names):
-            msg = bfe.format_message(
-                "JSON column interpretation as a custom PyArrow extention in `db_dtypes` "
-                "is a preview feature and subject to change."
-            )
-            warnings.warn(msg, bfe.PreviewWarning)
         # define data source only for needed columns, this makes row-hashing cheaper
         table_def = nodes.GbqTable.from_table(table, columns=schema.names)
 

@@ -171,12 +171,16 @@ def shape(self) -> typing.Tuple[int]:
 
     @property
     def dtype(self):
-        return self._block.index.dtypes[0] if self.nlevels == 1 else np.dtype("O")
+        dtype = self._block.index.dtypes[0] if self.nlevels == 1 else np.dtype("O")
+        bigframes.dtypes.warn_on_db_dtypes_json_dtype([dtype])
+        return dtype
 
     @property
     def dtypes(self) -> pandas.Series:
+        dtypes = self._block.index.dtypes
+        bigframes.dtypes.warn_on_db_dtypes_json_dtype(dtypes)
         return pandas.Series(
-            data=self._block.index.dtypes,
+            data=dtypes,
             index=typing.cast(typing.Tuple, self._block.index.names),
         )
 

@@ -321,7 +321,9 @@ def at(self) -> indexers.AtDataFrameIndexer:
 
     @property
     def dtypes(self) -> pandas.Series:
-        return pandas.Series(data=self._block.dtypes, index=self._block.column_labels)
+        dtypes = self._block.dtypes
+        bigframes.dtypes.warn_on_db_dtypes_json_dtype(dtypes)
+        return pandas.Series(data=dtypes, index=self._block.column_labels)
 
     @property
     def columns(self) -> pandas.Index:

@@ -20,6 +20,7 @@
 import textwrap
 import typing
 from typing import Any, Dict, List, Literal, Sequence, Union
+import warnings
 
 import bigframes_vendored.constants as constants
 import db_dtypes  # type: ignore
@@ -30,6 +31,8 @@
 import pyarrow as pa
 import shapely.geometry  # type: ignore
 
+import bigframes.exceptions
+
 # Type hints for Pandas dtypes supported by BigQuery DataFrame
 Dtype = Union[
     pd.BooleanDtype,
@@ -62,7 +65,8 @@
 # No arrow equivalent
 GEO_DTYPE = gpd.array.GeometryDtype()
 # JSON
-# TODO: switch to pyarrow.json_(pyarrow.string()) when available.
+# TODO(https://github.com/pandas-dev/pandas/issues/60958): switch to
+# pyarrow.json_(pyarrow.string()) when pandas 3+ and pyarrow 18+ is installed.
 JSON_ARROW_TYPE = db_dtypes.JSONArrowType()
 JSON_DTYPE = pd.ArrowDtype(JSON_ARROW_TYPE)
 OBJ_REF_DTYPE = pd.ArrowDtype(
@@ -915,3 +919,39 @@ def lcd_type_or_throw(dtype1: Dtype, dtype2: Dtype) -> Dtype:
 
 
 TIMEDELTA_DESCRIPTION_TAG = "#microseconds"
+
+
+def contains_db_dtypes_json_arrow_type(type_):
+    if isinstance(type_, db_dtypes.JSONArrowType):
+        return True
+
+    if isinstance(type_, pa.ListType):
+        return contains_db_dtypes_json_arrow_type(type_.value_type)
+
+    if isinstance(type_, pa.StructType):
+        return any(
+            contains_db_dtypes_json_arrow_type(field.type) for field in type_.fields
+        )
+    return False
+
+
+def contains_db_dtypes_json_dtype(dtype):
+    if not isinstance(dtype, pd.ArrowDtype):
+        return False
+
+    return contains_db_dtypes_json_arrow_type(dtype.pyarrow_dtype)
+
+
+def warn_on_db_dtypes_json_dtype(dtypes):
+    """Warn that the JSON dtype is changing.
+
+    Note: only call this function if the user is explicitly checking the
+    dtypes.
+    """
+    if any(contains_db_dtypes_json_dtype(dtype) for dtype in dtypes):
+        msg = bigframes.exceptions.format_message(
+            "JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_()) "
+            "instead of using `db_dtypes` in the future when available in pandas "
+            "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow."
+        )
+        warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)
@@ -111,6 +111,10 @@ class FunctionAxisOnePreviewWarning(PreviewWarning):
     """Remote Function and Managed UDF with axis=1 preview."""
 
 
+class JSONDtypeWarning(PreviewWarning):
+    """JSON dtype will be pd.ArrowDtype(pa.json_()) in the future."""
+
+
 class FunctionConflictTypeHintWarning(UserWarning):
     """Conflicting type hints in a BigFrames function."""
 

@@ -113,10 +113,12 @@ def dt(self) -> dt.DatetimeMethods:
 
     @property
     def dtype(self):
+        bigframes.dtypes.warn_on_db_dtypes_json_dtype([self._dtype])
         return self._dtype
 
     @property
     def dtypes(self):
+        bigframes.dtypes.warn_on_db_dtypes_json_dtype([self._dtype])
         return self._dtype
 
     @property

@@ -20,7 +20,6 @@
 import pandas as pd
 import pyarrow as pa  # type: ignore
 import pytest
-import shapely.geometry  # type: ignore
 
 import bigframes.core.compile.ibis_types
 import bigframes.dtypes
@@ -225,22 +224,6 @@ def test_bigframes_string_dtype_converts(ibis_dtype, bigframes_dtype_str):
     assert result == ibis_dtype
 
 
-@pytest.mark.parametrize(
-    ["python_type", "expected_dtype"],
-    [
-        (bool, bigframes.dtypes.BOOL_DTYPE),
-        (int, bigframes.dtypes.INT_DTYPE),
-        (str, bigframes.dtypes.STRING_DTYPE),
-        (shapely.geometry.Point, bigframes.dtypes.GEO_DTYPE),
-        (shapely.geometry.Polygon, bigframes.dtypes.GEO_DTYPE),
-        (shapely.geometry.base.BaseGeometry, bigframes.dtypes.GEO_DTYPE),
-    ],
-)
-def test_bigframes_type_supports_python_types(python_type, expected_dtype):
-    got_dtype = bigframes.dtypes.bigframes_type(python_type)
-    assert got_dtype == expected_dtype
-
-
 def test_unsupported_dtype_raises_unexpected_datatype():
     """Incompatible dtypes should fail when passed into BigQuery DataFrames"""
     with pytest.raises(ValueError, match="Datatype has no ibis type mapping"):
@@ -265,19 +248,3 @@ def test_literal_to_ibis_scalar_converts(literal, ibis_scalar):
     assert bigframes.core.compile.ibis_types.literal_to_ibis_scalar(literal).equals(
         ibis_scalar
     )
-
-
-@pytest.mark.parametrize(
-    ["scalar", "expected_dtype"],
-    [
-        (pa.scalar(1_000_000_000, type=pa.int64()), bigframes.dtypes.INT_DTYPE),
-        (pa.scalar(True, type=pa.bool_()), bigframes.dtypes.BOOL_DTYPE),
-        (pa.scalar("hello", type=pa.string()), bigframes.dtypes.STRING_DTYPE),
-        # Support NULL scalars.
-        (pa.scalar(None, type=pa.int64()), bigframes.dtypes.INT_DTYPE),
-        (pa.scalar(None, type=pa.bool_()), bigframes.dtypes.BOOL_DTYPE),
-        (pa.scalar(None, type=pa.string()), bigframes.dtypes.STRING_DTYPE),
-    ],
-)
-def test_infer_literal_type_arrow_scalar(scalar, expected_dtype):
-    assert bigframes.dtypes.infer_literal_type(scalar) == expected_dtype
@@ -0,0 +1,73 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import db_dtypes
+import pyarrow as pa  # type: ignore
+import pytest
+import shapely.geometry  # type: ignore
+
+import bigframes.dtypes
+
+
+@pytest.mark.parametrize(
+    ["python_type", "expected_dtype"],
+    [
+        (bool, bigframes.dtypes.BOOL_DTYPE),
+        (int, bigframes.dtypes.INT_DTYPE),
+        (str, bigframes.dtypes.STRING_DTYPE),
+        (shapely.geometry.Point, bigframes.dtypes.GEO_DTYPE),
+        (shapely.geometry.Polygon, bigframes.dtypes.GEO_DTYPE),
+        (shapely.geometry.base.BaseGeometry, bigframes.dtypes.GEO_DTYPE),
+    ],
+)
+def test_bigframes_type_supports_python_types(python_type, expected_dtype):
+    got_dtype = bigframes.dtypes.bigframes_type(python_type)
+    assert got_dtype == expected_dtype
+
+
+@pytest.mark.parametrize(
+    ["scalar", "expected_dtype"],
+    [
+        (pa.scalar(1_000_000_000, type=pa.int64()), bigframes.dtypes.INT_DTYPE),
+        (pa.scalar(True, type=pa.bool_()), bigframes.dtypes.BOOL_DTYPE),
+        (pa.scalar("hello", type=pa.string()), bigframes.dtypes.STRING_DTYPE),
+        # Support NULL scalars.
+        (pa.scalar(None, type=pa.int64()), bigframes.dtypes.INT_DTYPE),
+        (pa.scalar(None, type=pa.bool_()), bigframes.dtypes.BOOL_DTYPE),
+        (pa.scalar(None, type=pa.string()), bigframes.dtypes.STRING_DTYPE),
+    ],
+)
+def test_infer_literal_type_arrow_scalar(scalar, expected_dtype):
+    assert bigframes.dtypes.infer_literal_type(scalar) == expected_dtype
+
+
+@pytest.mark.parametrize(
+    ["type_", "expected"],
+    [
+        (pa.int64(), False),
+        (db_dtypes.JSONArrowType(), True),
+        (pa.struct([("int", pa.int64()), ("str", pa.string())]), False),
+        (pa.struct([("int", pa.int64()), ("json", db_dtypes.JSONArrowType())]), True),
+        (pa.list_(pa.int64()), False),
+        (pa.list_(db_dtypes.JSONArrowType()), True),
+        (
+            pa.list_(
+                pa.struct([("int", pa.int64()), ("json", db_dtypes.JSONArrowType())])
+            ),
+            True,
+        ),
+    ],
+)
+def test_contains_db_dtypes_json_arrow_type(type_, expected):
+    assert bigframes.dtypes.contains_db_dtypes_json_arrow_type(type_) == expected