Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions bigframes/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,27 @@
json_extract_array,
json_extract_string_array,
json_set,
parse_json,
)
from bigframes.bigquery._operations.search import create_vector_index, vector_search
from bigframes.bigquery._operations.struct import struct

__all__ = [
# approximate aggregate ops
"approx_top_count",
# array ops
"array_length",
"array_agg",
"array_to_string",
# json ops
"json_set",
"json_extract",
"json_extract_array",
"json_extract_string_array",
"approx_top_count",
"struct",
"parse_json",
# search ops
"create_vector_index",
"vector_search",
# struct ops
"struct",
]
40 changes: 40 additions & 0 deletions bigframes/bigquery/_operations/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,26 @@

from typing import Any, cast, Optional, Sequence, Tuple, Union

import bigframes.core.utils as utils
import bigframes.dtypes
import bigframes.operations as ops
import bigframes.series as series

from . import array


@utils.preview(name="The JSON-related API `json_set`")
def json_set(
input: series.Series,
json_path_value_pairs: Sequence[Tuple[str, Any]],
) -> series.Series:
"""Produces a new JSON value within a Series by inserting or replacing values at
specified paths.

.. warning::
The JSON-related API `parse_json` is in preview. Its behavior may change in
future versions.

**Examples:**

>>> import bigframes.pandas as bpd
Expand Down Expand Up @@ -223,3 +229,37 @@ def json_extract_string_array(
),
)
return array_series


@utils.preview(name="The JSON-related API `parse_json`")
def parse_json(
input: series.Series,
) -> series.Series:
"""Converts a series with a JSON-formatted STRING value to a JSON value.

.. warning::
The JSON-related API `parse_json` is in preview. Its behavior may change in
future versions.

**Examples:**

>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq
>>> bpd.options.display.progress_bar = None

>>> s = bpd.Series(['{"class": {"students": [{"id": 5}, {"id": 12}]}}'])
>>> s
0 {"class": {"students": [{"id": 5}, {"id": 12}]}}
dtype: string
>>> bbq.parse_json(s)
0 {"class":{"students":[{"id":5},{"id":12}]}}
dtype: large_string[pyarrow]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We plan to change this in future, right?

Make sure to make this function as Preview in our documentation and raise a subclass of our Preview warning when users use this function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a preview warning on the docstring, and a "preview" decorator for a warning message. Please double check. Thanks!


Args:
input (bigframes.series.Series):
The Series containing JSON-formatted strings).

Returns:
bigframes.series.Series: A new Series with the JSON value.
"""
return input._apply_unary_op(ops.ParseJSON())
12 changes: 8 additions & 4 deletions bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import bigframes_vendored.constants as constants
import bigframes_vendored.ibis.expr.api as ibis_api
import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes
import bigframes_vendored.ibis.expr.operations as ibis_ops
import bigframes_vendored.ibis.expr.operations.generic as ibis_generic
import bigframes_vendored.ibis.expr.operations.udf as ibis_udf
import bigframes_vendored.ibis.expr.types as ibis_types
Expand Down Expand Up @@ -1181,13 +1180,13 @@ def json_set_op_impl(x: ibis_types.Value, y: ibis_types.Value, op: ops.JSONSet):
)
else:
# Enabling JSON type eliminates the need for less efficient string conversions.
return ibis_ops.ToJsonString(
return to_json_string(
json_set( # type: ignore
json_obj=parse_json(x),
json_obj=parse_json(json_str=x),
json_path=op.json_path,
json_value=y,
)
).to_expr()
)


@scalar_op_compiler.register_unary_op(ops.JSONExtract, pass_op=True)
Expand All @@ -1210,6 +1209,11 @@ def json_extract_string_array_op_impl(
return json_extract_string_array(json_obj=x, json_path=op.json_path)


@scalar_op_compiler.register_unary_op(ops.ParseJSON, pass_op=True)
def parse_json_op_impl(x: ibis_types.Value, op: ops.ParseJSON):
return parse_json(json_str=x)


@scalar_op_compiler.register_unary_op(ops.ToJSONString)
def to_json_string_op_impl(json_obj: ibis_types.Value):
return to_json_string(json_obj=json_obj)
Expand Down
25 changes: 25 additions & 0 deletions bigframes/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
import re
import typing
from typing import Hashable, Iterable, List
import warnings

import bigframes_vendored.pandas.io.common as vendored_pandas_io_common
import pandas as pd
import typing_extensions

import bigframes.exceptions as exc

UNNAMED_COLUMN_ID = "bigframes_unnamed_column"
UNNAMED_INDEX_ID = "bigframes_unnamed_index"

Expand Down Expand Up @@ -164,3 +168,24 @@ def merge_column_labels(
result_labels.append(col_label)

return pd.Index(result_labels)


def warn_preview(msg=""):
"""Warn a preview API."""
warnings.warn(msg, exc.PreviewWarning)


def preview(*, name: str):
"""Decorate to warn of a preview API."""

def decorator(func):
msg = f"{name} is in preview. Its behavior may change in future versions."

@functools.wraps(func)
def wrapper(*args, **kwargs):
warn_preview(msg=msg)
return func(*args, **kwargs)

return wrapper

return decorator
17 changes: 14 additions & 3 deletions bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,20 @@ def output_type(self, *input_types):
)


@dataclasses.dataclass(frozen=True)
class ParseJSON(UnaryOp):
name: typing.ClassVar[str] = "parse_json"

def output_type(self, *input_types):
input_type = input_types[0]
if input_type != dtypes.STRING_DTYPE:
raise TypeError(
"Input type must be an valid JSON-formatted string type."
+ f" Received type: {input_type}"
)
return dtypes.JSON_DTYPE


@dataclasses.dataclass(frozen=True)
class ToJSONString(UnaryOp):
name: typing.ClassVar[str] = "to_json_string"
Expand All @@ -754,9 +768,6 @@ def output_type(self, *input_types):
return dtypes.STRING_DTYPE


to_json_string_op = ToJSONString()


## Blob Ops
@dataclasses.dataclass(frozen=True)
class ObjGetAccessUrl(UnaryOp):
Expand Down
4 changes: 2 additions & 2 deletions bigframes/operations/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ def image_blur(
)
dst_rt = dst._apply_unary_op(ops.ObjGetAccessUrl(mode="RW"))

src_rt = src_rt._apply_unary_op(ops.to_json_string_op)
dst_rt = dst_rt._apply_unary_op(ops.to_json_string_op)
src_rt = src_rt._apply_unary_op(ops.ToJSONString())
dst_rt = dst_rt._apply_unary_op(ops.ToJSONString())

df = src_rt.to_frame().join(dst_rt.to_frame(), how="outer")
df["ksize_x"], df["ksize_y"] = ksize
Expand Down
5 changes: 5 additions & 0 deletions tests/system/small/bigquery/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,8 @@ def test_json_in_struct():
"SELECT STRUCT(JSON '{\\\"a\\\": 1}' AS data, 1 AS number) as struct_col"
)
assert df["struct_col"].struct.field("data")[0] == '{"a":1}'


def test_parse_json_w_invalid_series_type():
with pytest.raises(TypeError):
bbq.parse_json(bpd.Series([1, 2]))
Loading