Skip to content

feat: add bigframes.bigquery.json_value #1697

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bigframes/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
json_extract_array,
json_extract_string_array,
json_set,
json_value,
parse_json,
)
from bigframes.bigquery._operations.search import create_vector_index, vector_search
Expand All @@ -61,6 +62,7 @@
"json_extract",
"json_extract_array",
"json_extract_string_array",
"json_value",
"parse_json",
# search ops
"create_vector_index",
Expand Down
34 changes: 34 additions & 0 deletions bigframes/bigquery/_operations/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,40 @@ def json_extract_string_array(
return array_series


def json_value(
input: series.Series,
json_path: str,
) -> series.Series:
"""Extracts a JSON scalar value and converts it to a SQL ``STRING`` value. In
addtion, this function:
- Removes the outermost quotes and unescapes the values.
- Returns a SQL ``NULL`` if a non-scalar value is selected.
- Uses double quotes to escape invalid ``JSON_PATH`` characters in JSON keys.

**Examples:**

>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq
>>> bpd.options.display.progress_bar = None

>>> s = bpd.Series(['{"name": "Jakob", "age": "6"}', '{"name": "Jakob", "age": []}'])
>>> bbq.json_value(s, json_path="$.age")
0 6
1 <NA>
dtype: string

Args:
input (bigframes.series.Series):
The Series containing JSON data (as native JSON objects or JSON-formatted strings).
json_path (str):
The JSON path identifying the data that you want to obtain from the input.

Returns:
bigframes.series.Series: A new Series with the JSON-formatted STRING.
"""
return input._apply_unary_op(ops.JSONValue(json_path=json_path))


@utils.preview(name="The JSON-related API `parse_json`")
def parse_json(
input: series.Series,
Expand Down
28 changes: 28 additions & 0 deletions tests/system/small/bigquery/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,34 @@ def test_json_extract_string_array_w_invalid_series_type():
bbq.json_extract_string_array(s)


def test_json_value_from_json():
s = bpd.Series(
['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'],
dtype=dtypes.JSON_DTYPE,
)
actual = bbq.json_value(s, "$.a.b")
expected = bpd.Series([None, None, "0"], dtype=dtypes.STRING_DTYPE)

pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_value_from_string():
s = bpd.Series(
['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'],
dtype=pd.StringDtype(storage="pyarrow"),
)
actual = bbq.json_value(s, "$.a.b")
expected = bpd.Series([None, None, "0"], dtype=dtypes.STRING_DTYPE)

pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())


def test_json_value_w_invalid_series_type():
s = bpd.Series([1, 2])
with pytest.raises(TypeError):
bbq.json_value(s, "$.a")


def test_parse_json_w_invalid_series_type():
s = bpd.Series([1, 2])
with pytest.raises(TypeError):
Expand Down