From 85c54072ab852dd7685953fc5ffc08cace1ec395 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Thu, 30 Oct 2025 18:42:05 +0000 Subject: [PATCH 1/5] feat: add bigframes.bigquery.st_simplify --- bigframes/bigquery/__init__.py | 2 ++ bigframes/bigquery/_operations/geo.py | 20 +++++++++++++++++++ .../ibis_compiler/operations/geo_ops.py | 14 +++++++++++++ bigframes/geopandas/geoseries.py | 5 +++++ bigframes/operations/__init__.py | 2 ++ bigframes/operations/geo_ops.py | 9 +++++++++ specs/2025-08-04-geoseries-scalars.md | 15 ++++++++++---- tests/system/small/bigquery/test_geo.py | 9 +++++++++ 8 files changed, 72 insertions(+), 4 deletions(-) diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index e8c7a524d9..c599a4b543 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -40,6 +40,7 @@ st_intersection, st_isclosed, st_length, + st_simplify, ) from bigframes.bigquery._operations.json import ( json_extract, @@ -80,6 +81,7 @@ st_intersection, st_isclosed, st_length, + st_simplify, # json ops json_extract, json_extract_array, diff --git a/bigframes/bigquery/_operations/geo.py b/bigframes/bigquery/_operations/geo.py index 254d2ae13f..6b7e5d88a2 100644 --- a/bigframes/bigquery/_operations/geo.py +++ b/bigframes/bigquery/_operations/geo.py @@ -675,3 +675,23 @@ def st_length( series = series._apply_unary_op(ops.GeoStLengthOp(use_spheroid=use_spheroid)) series.name = None return series + + +def st_simplify( + geography: "bigframes.series.Series", + tolerance_meters: float, +) -> "bigframes.series.Series": + """Returns a simplified version of the input geography. + + Args: + geography (bigframes.series.Series): + A Series containing GEOGRAPHY data. + tolerance_meters (float): + A float64 value indicating the tolerance in meters. + + Returns: + a Series containing the simplified GEOGRAPHY data. + """ + return geography._apply_unary_op( + ops.GeoStSimplifyOp(tolerance_meters=tolerance_meters) + ) diff --git a/bigframes/core/compile/ibis_compiler/operations/geo_ops.py b/bigframes/core/compile/ibis_compiler/operations/geo_ops.py index f9155fed5a..9614c0ff13 100644 --- a/bigframes/core/compile/ibis_compiler/operations/geo_ops.py +++ b/bigframes/core/compile/ibis_compiler/operations/geo_ops.py @@ -101,6 +101,12 @@ def geo_st_isclosed_op_impl(x: ibis_types.Value): return st_isclosed(x) +@register_unary_op(ops.GeoStSimplifyOp, pass_op=True) +def st_simplify_op_impl(x: ibis_types.Value, op: ops.GeoStSimplifyOp): + x = cast(ibis_types.GeoSpatialValue, x) + return st_simplify(x, op.tolerance_meters) + + @register_unary_op(ops.geo_x_op) def geo_x_op_impl(x: ibis_types.Value): return cast(ibis_types.GeoSpatialValue, x).x() @@ -157,3 +163,11 @@ def st_length(geog: ibis_dtypes.geography, use_spheroid: bool) -> ibis_dtypes.fl @ibis_udf.scalar.builtin def st_isclosed(a: ibis_dtypes.geography) -> ibis_dtypes.boolean: # type: ignore """Checks if a geography is closed.""" + + +@ibis_udf.scalar.builtin +def st_simplify( + geography: ibis_dtypes.geography, + tolerance_meters: ibis_dtypes.float, +) -> ibis_dtypes.geography: # type: ignore + ... diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py index f3558e4b34..660f1939a9 100644 --- a/bigframes/geopandas/geoseries.py +++ b/bigframes/geopandas/geoseries.py @@ -123,3 +123,8 @@ def distance(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # t def intersection(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore return self._apply_binary_op(other, ops.geo_st_intersection_op) + + def simplify(self, tolerance, preserve_topology=True): + raise NotImplementedError( + f"GeoSeries.simplify is not supported. Use bigframes.bigquery.st_simplify(series, tolerance_meters), instead. {constants.FEEDBACK_LINK}" + ) diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 24a7d6542f..cb03943ada 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -121,6 +121,7 @@ GeoStBufferOp, GeoStDistanceOp, GeoStLengthOp, + GeoStSimplifyOp, ) from bigframes.operations.json_ops import ( JSONExtract, @@ -416,6 +417,7 @@ "geo_st_isclosed_op", "GeoStBufferOp", "GeoStLengthOp", + "GeoStSimplifyOp", "geo_x_op", "geo_y_op", "GeoStDistanceOp", diff --git a/bigframes/operations/geo_ops.py b/bigframes/operations/geo_ops.py index 3b7754a47a..86e913d543 100644 --- a/bigframes/operations/geo_ops.py +++ b/bigframes/operations/geo_ops.py @@ -133,3 +133,12 @@ class GeoStLengthOp(base_ops.UnaryOp): def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: return dtypes.FLOAT_DTYPE + + +@dataclasses.dataclass(frozen=True) +class GeoStSimplifyOp(base_ops.UnaryOp): + name = "st_simplify" + tolerance_meters: float + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + return dtypes.GEO_DTYPE diff --git a/specs/2025-08-04-geoseries-scalars.md b/specs/2025-08-04-geoseries-scalars.md index 66ed77d0dd..93237ec467 100644 --- a/specs/2025-08-04-geoseries-scalars.md +++ b/specs/2025-08-04-geoseries-scalars.md @@ -261,7 +261,10 @@ Raster functions: Functions for analyzing geospatial rasters using geographies. ### Implementing a new scalar geography operation - [ ] **Define the operation dataclass:** - - [ ] In `bigframes/operations/geo_ops.py`, create a new dataclass inheriting from `base_ops.UnaryOp` or `base_ops.BinaryOp`. + - [ ] In `bigframes/operations/geo_ops.py`, create a new dataclass + inheriting from `base_ops.UnaryOp` or `base_ops.BinaryOp`. Note that + BinaryOp is for methods that take two **columns**. Any literal values can + be passed as parameters to a UnaryOp. - [ ] Define the `name` of the operation and any parameters it requires. - [ ] Implement the `output_type` method to specify the data type of the result. - [ ] **Export the new operation:** @@ -283,13 +286,17 @@ Raster functions: Functions for analyzing geospatial rasters using geographies. - [ ] Add a comprehensive docstring with examples. - [ ] In `bigframes/bigquery/__init__.py`, import your new user-facing function and add it to the `__all__` list. - [ ] For a `GeoSeries` property or method: - - [ ] In `bigframes/geopandas/geoseries.py`, create the property or method. + - [ ] In `bigframes/geopandas/geoseries.py`, create the property or + method. Omit the docstring. - [ ] If the operation is not possible to be supported, such as if the geopandas method returns values in units corresponding to the coordinate system rather than meters that BigQuery uses, raise a - `NotImplementedError` with a helpful message. + `NotImplementedError` with a helpful message. Likewise, if a + required parameter takes a value in terms of the coordinate + system, but BigQuery uses meters, rais a `NotImplementedError`. - [ ] Otherwise, call `series._apply_unary_op` or `series._apply_binary_op`, passing the operation dataclass. - - [ ] Add a comprehensive docstring with examples. + - [ ] Add a comprehensive docstring with examples to the superclass in + `third_party/bigframes_vendored/geopandas/geoseries.py`. - [ ] **Add Tests:** - [ ] Add system tests in `tests/system/small/bigquery/test_geo.py` or `tests/system/small/geopandas/test_geoseries.py` to verify the end-to-end functionality. Test various inputs, including edge cases and `NULL` values. - [ ] If you are overriding a pandas or GeoPandas property and raising `NotImplementedError`, add a unit test to ensure the correct error is raised. diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py index c89ca59aca..28db58c711 100644 --- a/tests/system/small/bigquery/test_geo.py +++ b/tests/system/small/bigquery/test_geo.py @@ -480,3 +480,12 @@ def test_st_buffer(session): result = bbq.st_buffer(geoseries, 1000).to_pandas() assert result.iloc[0].geom_type == "Polygon" assert result.iloc[1].geom_type == "Polygon" + + +def test_st_simplify(session): + geoseries = bigframes.geopandas.GeoSeries( + [LineString([(0, 0), (1, 1), (2, 0)])], session=session + ) + result = bbq.st_simplify(geoseries, 100000).to_pandas() + assert len(result.index) == 1 + assert result.isna().sum() == 0 From 52de10a6ab3fda582e64a10747120e58d0c40d4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Thu, 30 Oct 2025 19:09:38 +0000 Subject: [PATCH 2/5] fix mypy errors --- bigframes/core/compile/ibis_compiler/operations/geo_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigframes/core/compile/ibis_compiler/operations/geo_ops.py b/bigframes/core/compile/ibis_compiler/operations/geo_ops.py index 9614c0ff13..2f06c76768 100644 --- a/bigframes/core/compile/ibis_compiler/operations/geo_ops.py +++ b/bigframes/core/compile/ibis_compiler/operations/geo_ops.py @@ -167,7 +167,7 @@ def st_isclosed(a: ibis_dtypes.geography) -> ibis_dtypes.boolean: # type: ignor @ibis_udf.scalar.builtin def st_simplify( - geography: ibis_dtypes.geography, - tolerance_meters: ibis_dtypes.float, + geography: ibis_dtypes.geography, # type: ignore + tolerance_meters: ibis_dtypes.float, # type: ignore ) -> ibis_dtypes.geography: # type: ignore ... From f8cf91eb84650d8f6bda91bc5b26aaccd17b8565 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Thu, 30 Oct 2025 19:15:48 +0000 Subject: [PATCH 3/5] add docstring --- .../bigframes_vendored/geopandas/geoseries.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py index 20587b4d57..8ddb7674c6 100644 --- a/third_party/bigframes_vendored/geopandas/geoseries.py +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -496,3 +496,31 @@ def is_closed(self: GeoSeries) -> bigframes.series.Series: ``bigframes.bigquery.st_isclosed(series)``, instead. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def simplify(self, crs=None) -> bigframes.series.Series: # type: ignore + """[Not Implemented] Use ``bigframes.bigquery.st_simplify(series, tolerance_meters)``, + instead to set the tolerance in meters. + + In GeoPandas, this returns a GeoSeries containing a simplified + representation of each geometry. + + Args: + tolerance (float): + All parts of a simplified geometry will be no more than + tolerance distance from the original. It has the same units as + the coordinate reference system of the GeoSeries. For example, + using tolerance=100 in a projected CRS with meters as units + means a distance of 100 meters in reality. + preserve_topology (bool): + Default True. False uses a quicker algorithm, but may produce + self-intersecting or otherwise invalid geometries. + + Returns: + bigframes.geopandas.GeoSeries: + Series of simplified geometries. + + Raises: + NotImplementedError: + GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From f679464536d146da01c98092a5ed504934703a32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Thu, 30 Oct 2025 19:36:04 +0000 Subject: [PATCH 4/5] fix mypy again --- third_party/bigframes_vendored/geopandas/geoseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py index 8ddb7674c6..dfa070e84c 100644 --- a/third_party/bigframes_vendored/geopandas/geoseries.py +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -497,7 +497,7 @@ def is_closed(self: GeoSeries) -> bigframes.series.Series: """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) - def simplify(self, crs=None) -> bigframes.series.Series: # type: ignore + def simplify(self, tolerance: float, preserve_topology: bool = True) -> bigframes.series.Series: # type: ignore """[Not Implemented] Use ``bigframes.bigquery.st_simplify(series, tolerance_meters)``, instead to set the tolerance in meters. From 79857ae3480a0970c4c1fe92d91c363e6c80572f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Fri, 31 Oct 2025 11:11:36 -0500 Subject: [PATCH 5/5] fix typos --- specs/2025-08-04-geoseries-scalars.md | 2 +- third_party/bigframes_vendored/geopandas/geoseries.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/specs/2025-08-04-geoseries-scalars.md b/specs/2025-08-04-geoseries-scalars.md index 93237ec467..e7bc6c61e1 100644 --- a/specs/2025-08-04-geoseries-scalars.md +++ b/specs/2025-08-04-geoseries-scalars.md @@ -293,7 +293,7 @@ Raster functions: Functions for analyzing geospatial rasters using geographies. coordinate system rather than meters that BigQuery uses, raise a `NotImplementedError` with a helpful message. Likewise, if a required parameter takes a value in terms of the coordinate - system, but BigQuery uses meters, rais a `NotImplementedError`. + system, but BigQuery uses meters, raise a `NotImplementedError`. - [ ] Otherwise, call `series._apply_unary_op` or `series._apply_binary_op`, passing the operation dataclass. - [ ] Add a comprehensive docstring with examples to the superclass in `third_party/bigframes_vendored/geopandas/geoseries.py`. diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py index dfa070e84c..642cf2fc90 100644 --- a/third_party/bigframes_vendored/geopandas/geoseries.py +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -521,6 +521,6 @@ def simplify(self, tolerance: float, preserve_topology: bool = True) -> bigframe Raises: NotImplementedError: - GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. + GeoSeries.simplify is not supported. Use bigframes.bigquery.st_simplify(series, tolerance_meters), instead. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)