diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index 32412648d6..7b74c1eb88 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -50,6 +50,7 @@ json_value, json_value_array, parse_json, + to_json_string, ) from bigframes.bigquery._operations.search import create_vector_index, vector_search from bigframes.bigquery._operations.sql import sql_scalar @@ -87,6 +88,7 @@ json_value, json_value_array, parse_json, + to_json_string, # search ops create_vector_index, vector_search, diff --git a/bigframes/bigquery/_operations/json.py b/bigframes/bigquery/_operations/json.py index 7ad7855dba..a972380334 100644 --- a/bigframes/bigquery/_operations/json.py +++ b/bigframes/bigquery/_operations/json.py @@ -430,6 +430,40 @@ def json_value_array( return input._apply_unary_op(ops.JSONValueArray(json_path=json_path)) +def to_json_string( + input: series.Series, +) -> series.Series: + """Converts a series to a JSON-formatted STRING value. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 2, 3]) + >>> bbq.to_json_string(s) + 0 1 + 1 2 + 2 3 + dtype: string + + >>> s = bpd.Series([{"int": 1, "str": "pandas"}, {"int": 2, "str": "numpy"}]) + >>> bbq.to_json_string(s) + 0 {"int":1,"str":"pandas"} + 1 {"int":2,"str":"numpy"} + dtype: string + + Args: + input (bigframes.series.Series): + The Series to be converted. + + Returns: + bigframes.series.Series: A new Series with the JSON-formatted STRING value. + """ + return input._apply_unary_op(ops.ToJSONString()) + + @utils.preview(name="The JSON-related API `parse_json`") def parse_json( input: series.Series, diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index a37d390b51..af98252643 100644 --- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -2068,9 +2068,7 @@ def json_extract_string_array( # type: ignore[empty-body] @ibis_udf.scalar.builtin(name="to_json_string") -def to_json_string( # type: ignore[empty-body] - value, -) -> ibis_dtypes.String: +def to_json_string(value) -> ibis_dtypes.String: # type: ignore[empty-body] """Convert value to JSON-formatted string.""" diff --git a/bigframes/operations/json_ops.py b/bigframes/operations/json_ops.py index d3f62fb4f2..b1186e433c 100644 --- a/bigframes/operations/json_ops.py +++ b/bigframes/operations/json_ops.py @@ -107,6 +107,12 @@ class ToJSONString(base_ops.UnaryOp): name: typing.ClassVar[str] = "to_json_string" def output_type(self, *input_types): + input_type = input_types[0] + if not dtypes.is_json_encoding_type(input_type): + raise TypeError( + "The value to be assigned must be a type that can be encoded as JSON." + + f"Received type: {input_type}" + ) return dtypes.STRING_DTYPE diff --git a/tests/system/small/bigquery/test_json.py b/tests/system/small/bigquery/test_json.py index 4ecbd01318..213db0849e 100644 --- a/tests/system/small/bigquery/test_json.py +++ b/tests/system/small/bigquery/test_json.py @@ -384,3 +384,28 @@ def test_parse_json_w_invalid_series_type(): s = bpd.Series([1, 2]) with pytest.raises(TypeError): bbq.parse_json(s) + + +def test_to_json_string_from_int(): + s = bpd.Series([1, 2, None, 3]) + actual = bbq.to_json_string(s) + expected = bpd.Series(["1", "2", "null", "3"], dtype=dtypes.STRING_DTYPE) + pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas()) + + +def test_to_json_string_from_struct(): + s = bpd.Series( + [ + {"version": 1, "project": "pandas"}, + {"version": 2, "project": "numpy"}, + ] + ) + assert dtypes.is_struct_like(s.dtype) + + actual = bbq.to_json_string(s) + expected = bpd.Series( + ['{"project":"pandas","version":1}', '{"project":"numpy","version":2}'], + dtype=dtypes.STRING_DTYPE, + ) + + pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())