From dc8783b677baa6593f80e27cc2e107dc6257f507 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Thu, 11 Sep 2025 23:35:26 +0000 Subject: [PATCH] feat: add bigframes.bigquery.to_json --- bigframes/bigquery/__init__.py | 2 ++ bigframes/bigquery/_operations/json.py | 34 +++++++++++++++++++ .../ibis_compiler/scalar_op_registry.py | 10 ++++++ bigframes/operations/__init__.py | 2 ++ bigframes/operations/json_ops.py | 14 ++++++++ tests/system/small/bigquery/test_json.py | 25 ++++++++++++++ 6 files changed, 87 insertions(+) diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index 7b74c1eb88..a7668d345f 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -50,6 +50,7 @@ json_value, json_value_array, parse_json, + to_json, to_json_string, ) from bigframes.bigquery._operations.search import create_vector_index, vector_search @@ -88,6 +89,7 @@ json_value, json_value_array, parse_json, + to_json, to_json_string, # search ops create_vector_index, diff --git a/bigframes/bigquery/_operations/json.py b/bigframes/bigquery/_operations/json.py index a972380334..656e59af0d 100644 --- a/bigframes/bigquery/_operations/json.py +++ b/bigframes/bigquery/_operations/json.py @@ -430,6 +430,40 @@ def json_value_array( return input._apply_unary_op(ops.JSONValueArray(json_path=json_path)) +def to_json( + input: series.Series, +) -> series.Series: + """Converts a series with a JSON value to a JSON-formatted STRING value. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 2, 3]) + >>> bbq.to_json(s) + 0 1 + 1 2 + 2 3 + dtype: extension>[pyarrow] + + >>> s = bpd.Series([{"int": 1, "str": "pandas"}, {"int": 2, "str": "numpy"}]) + >>> bbq.to_json(s) + 0 {"int":1,"str":"pandas"} + 1 {"int":2,"str":"numpy"} + dtype: extension>[pyarrow] + + Args: + input (bigframes.series.Series): + The Series containing JSON or JSON-formatted string values. + + Returns: + bigframes.series.Series: A new Series with the JSON value. + """ + return input._apply_unary_op(ops.ToJSON()) + + def to_json_string( input: series.Series, ) -> series.Series: diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index af98252643..05636f25b4 100644 --- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -1300,6 +1300,11 @@ def parse_json_op_impl(x: ibis_types.Value, op: ops.ParseJSON): return parse_json(json_str=x) +@scalar_op_compiler.register_unary_op(ops.ToJSON) +def to_json_op_impl(json_obj: ibis_types.Value): + return to_json(json_obj=json_obj) + + @scalar_op_compiler.register_unary_op(ops.ToJSONString) def to_json_string_op_impl(x: ibis_types.Value): return to_json_string(value=x) @@ -2067,6 +2072,11 @@ def json_extract_string_array( # type: ignore[empty-body] """Extracts a JSON array and converts it to a SQL ARRAY of STRINGs.""" +@ibis_udf.scalar.builtin(name="to_json") +def to_json(json_obj) -> ibis_dtypes.JSON: # type: ignore[empty-body] + """Convert to JSON.""" + + @ibis_udf.scalar.builtin(name="to_json_string") def to_json_string(value) -> ibis_dtypes.String: # type: ignore[empty-body] """Convert value to JSON-formatted string.""" diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index e5888ace00..f91ea83c16 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -123,6 +123,7 @@ JSONValue, JSONValueArray, ParseJSON, + ToJSON, ToJSONString, ) from bigframes.operations.numeric_ops import ( @@ -375,6 +376,7 @@ "JSONValue", "JSONValueArray", "ParseJSON", + "ToJSON", "ToJSONString", # Bool ops "and_op", diff --git a/bigframes/operations/json_ops.py b/bigframes/operations/json_ops.py index b1186e433c..487c193cc5 100644 --- a/bigframes/operations/json_ops.py +++ b/bigframes/operations/json_ops.py @@ -102,6 +102,20 @@ def output_type(self, *input_types): return dtypes.JSON_DTYPE +@dataclasses.dataclass(frozen=True) +class ToJSON(base_ops.UnaryOp): + name: typing.ClassVar[str] = "to_json" + + def output_type(self, *input_types): + input_type = input_types[0] + if not dtypes.is_json_encoding_type(input_type): + raise TypeError( + "The value to be assigned must be a type that can be encoded as JSON." + + f"Received type: {input_type}" + ) + return dtypes.JSON_DTYPE + + @dataclasses.dataclass(frozen=True) class ToJSONString(base_ops.UnaryOp): name: typing.ClassVar[str] = "to_json_string" diff --git a/tests/system/small/bigquery/test_json.py b/tests/system/small/bigquery/test_json.py index 213db0849e..5a44c75f17 100644 --- a/tests/system/small/bigquery/test_json.py +++ b/tests/system/small/bigquery/test_json.py @@ -386,6 +386,31 @@ def test_parse_json_w_invalid_series_type(): bbq.parse_json(s) +def test_to_json_from_int(): + s = bpd.Series([1, 2, None, 3]) + actual = bbq.to_json(s) + expected = bpd.Series(["1.0", "2.0", "null", "3.0"], dtype=dtypes.JSON_DTYPE) + pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas()) + + +def test_to_json_from_struct(): + s = bpd.Series( + [ + {"version": 1, "project": "pandas"}, + {"version": 2, "project": "numpy"}, + ] + ) + assert dtypes.is_struct_like(s.dtype) + + actual = bbq.to_json(s) + expected = bpd.Series( + ['{"project":"pandas","version":1}', '{"project":"numpy","version":2}'], + dtype=dtypes.JSON_DTYPE, + ) + + pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas()) + + def test_to_json_string_from_int(): s = bpd.Series([1, 2, None, 3]) actual = bbq.to_json_string(s)