diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index 6eb725975e..02dd77fdd9 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -38,6 +38,7 @@ json_extract_array, json_extract_string_array, json_set, + json_value, parse_json, ) from bigframes.bigquery._operations.search import create_vector_index, vector_search @@ -61,6 +62,7 @@ "json_extract", "json_extract_array", "json_extract_string_array", + "json_value", "parse_json", # search ops "create_vector_index", diff --git a/bigframes/bigquery/_operations/json.py b/bigframes/bigquery/_operations/json.py index 07efc5fa51..b59fe40d99 100644 --- a/bigframes/bigquery/_operations/json.py +++ b/bigframes/bigquery/_operations/json.py @@ -231,6 +231,40 @@ def json_extract_string_array( return array_series +def json_value( + input: series.Series, + json_path: str, +) -> series.Series: + """Extracts a JSON scalar value and converts it to a SQL ``STRING`` value. In + addtion, this function: + - Removes the outermost quotes and unescapes the values. + - Returns a SQL ``NULL`` if a non-scalar value is selected. + - Uses double quotes to escape invalid ``JSON_PATH`` characters in JSON keys. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(['{"name": "Jakob", "age": "6"}', '{"name": "Jakob", "age": []}']) + >>> bbq.json_value(s, json_path="$.age") + 0 6 + 1 + dtype: string + + Args: + input (bigframes.series.Series): + The Series containing JSON data (as native JSON objects or JSON-formatted strings). + json_path (str): + The JSON path identifying the data that you want to obtain from the input. + + Returns: + bigframes.series.Series: A new Series with the JSON-formatted STRING. + """ + return input._apply_unary_op(ops.JSONValue(json_path=json_path)) + + @utils.preview(name="The JSON-related API `parse_json`") def parse_json( input: series.Series, diff --git a/tests/system/small/bigquery/test_json.py b/tests/system/small/bigquery/test_json.py index df5a524b55..b9d21f226a 100644 --- a/tests/system/small/bigquery/test_json.py +++ b/tests/system/small/bigquery/test_json.py @@ -212,6 +212,34 @@ def test_json_extract_string_array_w_invalid_series_type(): bbq.json_extract_string_array(s) +def test_json_value_from_json(): + s = bpd.Series( + ['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'], + dtype=dtypes.JSON_DTYPE, + ) + actual = bbq.json_value(s, "$.a.b") + expected = bpd.Series([None, None, "0"], dtype=dtypes.STRING_DTYPE) + + pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas()) + + +def test_json_value_from_string(): + s = bpd.Series( + ['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'], + dtype=pd.StringDtype(storage="pyarrow"), + ) + actual = bbq.json_value(s, "$.a.b") + expected = bpd.Series([None, None, "0"], dtype=dtypes.STRING_DTYPE) + + pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas()) + + +def test_json_value_w_invalid_series_type(): + s = bpd.Series([1, 2]) + with pytest.raises(TypeError): + bbq.json_value(s, "$.a") + + def test_parse_json_w_invalid_series_type(): s = bpd.Series([1, 2]) with pytest.raises(TypeError):