diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index e8c7a524d9..c599a4b543 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -40,6 +40,7 @@ st_intersection, st_isclosed, st_length, + st_simplify, ) from bigframes.bigquery._operations.json import ( json_extract, @@ -80,6 +81,7 @@ st_intersection, st_isclosed, st_length, + st_simplify, # json ops json_extract, json_extract_array, diff --git a/bigframes/bigquery/_operations/geo.py b/bigframes/bigquery/_operations/geo.py index 254d2ae13f..6b7e5d88a2 100644 --- a/bigframes/bigquery/_operations/geo.py +++ b/bigframes/bigquery/_operations/geo.py @@ -675,3 +675,23 @@ def st_length( series = series._apply_unary_op(ops.GeoStLengthOp(use_spheroid=use_spheroid)) series.name = None return series + + +def st_simplify( + geography: "bigframes.series.Series", + tolerance_meters: float, +) -> "bigframes.series.Series": + """Returns a simplified version of the input geography. + + Args: + geography (bigframes.series.Series): + A Series containing GEOGRAPHY data. + tolerance_meters (float): + A float64 value indicating the tolerance in meters. + + Returns: + a Series containing the simplified GEOGRAPHY data. + """ + return geography._apply_unary_op( + ops.GeoStSimplifyOp(tolerance_meters=tolerance_meters) + ) diff --git a/bigframes/core/compile/ibis_compiler/operations/geo_ops.py b/bigframes/core/compile/ibis_compiler/operations/geo_ops.py index f9155fed5a..2f06c76768 100644 --- a/bigframes/core/compile/ibis_compiler/operations/geo_ops.py +++ b/bigframes/core/compile/ibis_compiler/operations/geo_ops.py @@ -101,6 +101,12 @@ def geo_st_isclosed_op_impl(x: ibis_types.Value): return st_isclosed(x) +@register_unary_op(ops.GeoStSimplifyOp, pass_op=True) +def st_simplify_op_impl(x: ibis_types.Value, op: ops.GeoStSimplifyOp): + x = cast(ibis_types.GeoSpatialValue, x) + return st_simplify(x, op.tolerance_meters) + + @register_unary_op(ops.geo_x_op) def geo_x_op_impl(x: ibis_types.Value): return cast(ibis_types.GeoSpatialValue, x).x() @@ -157,3 +163,11 @@ def st_length(geog: ibis_dtypes.geography, use_spheroid: bool) -> ibis_dtypes.fl @ibis_udf.scalar.builtin def st_isclosed(a: ibis_dtypes.geography) -> ibis_dtypes.boolean: # type: ignore """Checks if a geography is closed.""" + + +@ibis_udf.scalar.builtin +def st_simplify( + geography: ibis_dtypes.geography, # type: ignore + tolerance_meters: ibis_dtypes.float, # type: ignore +) -> ibis_dtypes.geography: # type: ignore + ... diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py index f3558e4b34..660f1939a9 100644 --- a/bigframes/geopandas/geoseries.py +++ b/bigframes/geopandas/geoseries.py @@ -123,3 +123,8 @@ def distance(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # t def intersection(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore return self._apply_binary_op(other, ops.geo_st_intersection_op) + + def simplify(self, tolerance, preserve_topology=True): + raise NotImplementedError( + f"GeoSeries.simplify is not supported. Use bigframes.bigquery.st_simplify(series, tolerance_meters), instead. {constants.FEEDBACK_LINK}" + ) diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 24a7d6542f..cb03943ada 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -121,6 +121,7 @@ GeoStBufferOp, GeoStDistanceOp, GeoStLengthOp, + GeoStSimplifyOp, ) from bigframes.operations.json_ops import ( JSONExtract, @@ -416,6 +417,7 @@ "geo_st_isclosed_op", "GeoStBufferOp", "GeoStLengthOp", + "GeoStSimplifyOp", "geo_x_op", "geo_y_op", "GeoStDistanceOp", diff --git a/bigframes/operations/geo_ops.py b/bigframes/operations/geo_ops.py index 3b7754a47a..86e913d543 100644 --- a/bigframes/operations/geo_ops.py +++ b/bigframes/operations/geo_ops.py @@ -133,3 +133,12 @@ class GeoStLengthOp(base_ops.UnaryOp): def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: return dtypes.FLOAT_DTYPE + + +@dataclasses.dataclass(frozen=True) +class GeoStSimplifyOp(base_ops.UnaryOp): + name = "st_simplify" + tolerance_meters: float + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + return dtypes.GEO_DTYPE diff --git a/specs/2025-08-04-geoseries-scalars.md b/specs/2025-08-04-geoseries-scalars.md index 66ed77d0dd..e7bc6c61e1 100644 --- a/specs/2025-08-04-geoseries-scalars.md +++ b/specs/2025-08-04-geoseries-scalars.md @@ -261,7 +261,10 @@ Raster functions: Functions for analyzing geospatial rasters using geographies. ### Implementing a new scalar geography operation - [ ] **Define the operation dataclass:** - - [ ] In `bigframes/operations/geo_ops.py`, create a new dataclass inheriting from `base_ops.UnaryOp` or `base_ops.BinaryOp`. + - [ ] In `bigframes/operations/geo_ops.py`, create a new dataclass + inheriting from `base_ops.UnaryOp` or `base_ops.BinaryOp`. Note that + BinaryOp is for methods that take two **columns**. Any literal values can + be passed as parameters to a UnaryOp. - [ ] Define the `name` of the operation and any parameters it requires. - [ ] Implement the `output_type` method to specify the data type of the result. - [ ] **Export the new operation:** @@ -283,13 +286,17 @@ Raster functions: Functions for analyzing geospatial rasters using geographies. - [ ] Add a comprehensive docstring with examples. - [ ] In `bigframes/bigquery/__init__.py`, import your new user-facing function and add it to the `__all__` list. - [ ] For a `GeoSeries` property or method: - - [ ] In `bigframes/geopandas/geoseries.py`, create the property or method. + - [ ] In `bigframes/geopandas/geoseries.py`, create the property or + method. Omit the docstring. - [ ] If the operation is not possible to be supported, such as if the geopandas method returns values in units corresponding to the coordinate system rather than meters that BigQuery uses, raise a - `NotImplementedError` with a helpful message. + `NotImplementedError` with a helpful message. Likewise, if a + required parameter takes a value in terms of the coordinate + system, but BigQuery uses meters, raise a `NotImplementedError`. - [ ] Otherwise, call `series._apply_unary_op` or `series._apply_binary_op`, passing the operation dataclass. - - [ ] Add a comprehensive docstring with examples. + - [ ] Add a comprehensive docstring with examples to the superclass in + `third_party/bigframes_vendored/geopandas/geoseries.py`. - [ ] **Add Tests:** - [ ] Add system tests in `tests/system/small/bigquery/test_geo.py` or `tests/system/small/geopandas/test_geoseries.py` to verify the end-to-end functionality. Test various inputs, including edge cases and `NULL` values. - [ ] If you are overriding a pandas or GeoPandas property and raising `NotImplementedError`, add a unit test to ensure the correct error is raised. diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py index c89ca59aca..28db58c711 100644 --- a/tests/system/small/bigquery/test_geo.py +++ b/tests/system/small/bigquery/test_geo.py @@ -480,3 +480,12 @@ def test_st_buffer(session): result = bbq.st_buffer(geoseries, 1000).to_pandas() assert result.iloc[0].geom_type == "Polygon" assert result.iloc[1].geom_type == "Polygon" + + +def test_st_simplify(session): + geoseries = bigframes.geopandas.GeoSeries( + [LineString([(0, 0), (1, 1), (2, 0)])], session=session + ) + result = bbq.st_simplify(geoseries, 100000).to_pandas() + assert len(result.index) == 1 + assert result.isna().sum() == 0 diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py index 20587b4d57..642cf2fc90 100644 --- a/third_party/bigframes_vendored/geopandas/geoseries.py +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -496,3 +496,31 @@ def is_closed(self: GeoSeries) -> bigframes.series.Series: ``bigframes.bigquery.st_isclosed(series)``, instead. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def simplify(self, tolerance: float, preserve_topology: bool = True) -> bigframes.series.Series: # type: ignore + """[Not Implemented] Use ``bigframes.bigquery.st_simplify(series, tolerance_meters)``, + instead to set the tolerance in meters. + + In GeoPandas, this returns a GeoSeries containing a simplified + representation of each geometry. + + Args: + tolerance (float): + All parts of a simplified geometry will be no more than + tolerance distance from the original. It has the same units as + the coordinate reference system of the GeoSeries. For example, + using tolerance=100 in a projected CRS with meters as units + means a distance of 100 meters in reality. + preserve_topology (bool): + Default True. False uses a quicker algorithm, but may produce + self-intersecting or otherwise invalid geometries. + + Returns: + bigframes.geopandas.GeoSeries: + Series of simplified geometries. + + Raises: + NotImplementedError: + GeoSeries.simplify is not supported. Use bigframes.bigquery.st_simplify(series, tolerance_meters), instead. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)