Skip to content

Commit ecee2bc

Browse files
authored
feat: add bigframes.bigquery.st_simplify (#2210)
* feat: add bigframes.bigquery.st_simplify * fix mypy errors * add docstring * fix mypy again * fix typos
1 parent 316ba9f commit ecee2bc

File tree

9 files changed

+100
-4
lines changed

9 files changed

+100
-4
lines changed

bigframes/bigquery/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
st_intersection,
4141
st_isclosed,
4242
st_length,
43+
st_simplify,
4344
)
4445
from bigframes.bigquery._operations.json import (
4546
json_extract,
@@ -80,6 +81,7 @@
8081
st_intersection,
8182
st_isclosed,
8283
st_length,
84+
st_simplify,
8385
# json ops
8486
json_extract,
8587
json_extract_array,

bigframes/bigquery/_operations/geo.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,3 +675,23 @@ def st_length(
675675
series = series._apply_unary_op(ops.GeoStLengthOp(use_spheroid=use_spheroid))
676676
series.name = None
677677
return series
678+
679+
680+
def st_simplify(
681+
geography: "bigframes.series.Series",
682+
tolerance_meters: float,
683+
) -> "bigframes.series.Series":
684+
"""Returns a simplified version of the input geography.
685+
686+
Args:
687+
geography (bigframes.series.Series):
688+
A Series containing GEOGRAPHY data.
689+
tolerance_meters (float):
690+
A float64 value indicating the tolerance in meters.
691+
692+
Returns:
693+
a Series containing the simplified GEOGRAPHY data.
694+
"""
695+
return geography._apply_unary_op(
696+
ops.GeoStSimplifyOp(tolerance_meters=tolerance_meters)
697+
)

bigframes/core/compile/ibis_compiler/operations/geo_ops.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,12 @@ def geo_st_isclosed_op_impl(x: ibis_types.Value):
101101
return st_isclosed(x)
102102

103103

104+
@register_unary_op(ops.GeoStSimplifyOp, pass_op=True)
105+
def st_simplify_op_impl(x: ibis_types.Value, op: ops.GeoStSimplifyOp):
106+
x = cast(ibis_types.GeoSpatialValue, x)
107+
return st_simplify(x, op.tolerance_meters)
108+
109+
104110
@register_unary_op(ops.geo_x_op)
105111
def geo_x_op_impl(x: ibis_types.Value):
106112
return cast(ibis_types.GeoSpatialValue, x).x()
@@ -157,3 +163,11 @@ def st_length(geog: ibis_dtypes.geography, use_spheroid: bool) -> ibis_dtypes.fl
157163
@ibis_udf.scalar.builtin
158164
def st_isclosed(a: ibis_dtypes.geography) -> ibis_dtypes.boolean: # type: ignore
159165
"""Checks if a geography is closed."""
166+
167+
168+
@ibis_udf.scalar.builtin
169+
def st_simplify(
170+
geography: ibis_dtypes.geography, # type: ignore
171+
tolerance_meters: ibis_dtypes.float, # type: ignore
172+
) -> ibis_dtypes.geography: # type: ignore
173+
...

bigframes/geopandas/geoseries.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,8 @@ def distance(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # t
123123

124124
def intersection(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore
125125
return self._apply_binary_op(other, ops.geo_st_intersection_op)
126+
127+
def simplify(self, tolerance, preserve_topology=True):
128+
raise NotImplementedError(
129+
f"GeoSeries.simplify is not supported. Use bigframes.bigquery.st_simplify(series, tolerance_meters), instead. {constants.FEEDBACK_LINK}"
130+
)

bigframes/operations/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@
121121
GeoStBufferOp,
122122
GeoStDistanceOp,
123123
GeoStLengthOp,
124+
GeoStSimplifyOp,
124125
)
125126
from bigframes.operations.json_ops import (
126127
JSONExtract,
@@ -416,6 +417,7 @@
416417
"geo_st_isclosed_op",
417418
"GeoStBufferOp",
418419
"GeoStLengthOp",
420+
"GeoStSimplifyOp",
419421
"geo_x_op",
420422
"geo_y_op",
421423
"GeoStDistanceOp",

bigframes/operations/geo_ops.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,3 +133,12 @@ class GeoStLengthOp(base_ops.UnaryOp):
133133

134134
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
135135
return dtypes.FLOAT_DTYPE
136+
137+
138+
@dataclasses.dataclass(frozen=True)
139+
class GeoStSimplifyOp(base_ops.UnaryOp):
140+
name = "st_simplify"
141+
tolerance_meters: float
142+
143+
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
144+
return dtypes.GEO_DTYPE

specs/2025-08-04-geoseries-scalars.md

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,10 @@ Raster functions: Functions for analyzing geospatial rasters using geographies.
261261
### Implementing a new scalar geography operation
262262

263263
- [ ] **Define the operation dataclass:**
264-
- [ ] In `bigframes/operations/geo_ops.py`, create a new dataclass inheriting from `base_ops.UnaryOp` or `base_ops.BinaryOp`.
264+
- [ ] In `bigframes/operations/geo_ops.py`, create a new dataclass
265+
inheriting from `base_ops.UnaryOp` or `base_ops.BinaryOp`. Note that
266+
BinaryOp is for methods that take two **columns**. Any literal values can
267+
be passed as parameters to a UnaryOp.
265268
- [ ] Define the `name` of the operation and any parameters it requires.
266269
- [ ] Implement the `output_type` method to specify the data type of the result.
267270
- [ ] **Export the new operation:**
@@ -283,13 +286,17 @@ Raster functions: Functions for analyzing geospatial rasters using geographies.
283286
- [ ] Add a comprehensive docstring with examples.
284287
- [ ] In `bigframes/bigquery/__init__.py`, import your new user-facing function and add it to the `__all__` list.
285288
- [ ] For a `GeoSeries` property or method:
286-
- [ ] In `bigframes/geopandas/geoseries.py`, create the property or method.
289+
- [ ] In `bigframes/geopandas/geoseries.py`, create the property or
290+
method. Omit the docstring.
287291
- [ ] If the operation is not possible to be supported, such as if the
288292
geopandas method returns values in units corresponding to the
289293
coordinate system rather than meters that BigQuery uses, raise a
290-
`NotImplementedError` with a helpful message.
294+
`NotImplementedError` with a helpful message. Likewise, if a
295+
required parameter takes a value in terms of the coordinate
296+
system, but BigQuery uses meters, raise a `NotImplementedError`.
291297
- [ ] Otherwise, call `series._apply_unary_op` or `series._apply_binary_op`, passing the operation dataclass.
292-
- [ ] Add a comprehensive docstring with examples.
298+
- [ ] Add a comprehensive docstring with examples to the superclass in
299+
`third_party/bigframes_vendored/geopandas/geoseries.py`.
293300
- [ ] **Add Tests:**
294301
- [ ] Add system tests in `tests/system/small/bigquery/test_geo.py` or `tests/system/small/geopandas/test_geoseries.py` to verify the end-to-end functionality. Test various inputs, including edge cases and `NULL` values.
295302
- [ ] If you are overriding a pandas or GeoPandas property and raising `NotImplementedError`, add a unit test to ensure the correct error is raised.

tests/system/small/bigquery/test_geo.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,3 +480,12 @@ def test_st_buffer(session):
480480
result = bbq.st_buffer(geoseries, 1000).to_pandas()
481481
assert result.iloc[0].geom_type == "Polygon"
482482
assert result.iloc[1].geom_type == "Polygon"
483+
484+
485+
def test_st_simplify(session):
486+
geoseries = bigframes.geopandas.GeoSeries(
487+
[LineString([(0, 0), (1, 1), (2, 0)])], session=session
488+
)
489+
result = bbq.st_simplify(geoseries, 100000).to_pandas()
490+
assert len(result.index) == 1
491+
assert result.isna().sum() == 0

third_party/bigframes_vendored/geopandas/geoseries.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,3 +496,31 @@ def is_closed(self: GeoSeries) -> bigframes.series.Series:
496496
``bigframes.bigquery.st_isclosed(series)``, instead.
497497
"""
498498
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
499+
500+
def simplify(self, tolerance: float, preserve_topology: bool = True) -> bigframes.series.Series: # type: ignore
501+
"""[Not Implemented] Use ``bigframes.bigquery.st_simplify(series, tolerance_meters)``,
502+
instead to set the tolerance in meters.
503+
504+
In GeoPandas, this returns a GeoSeries containing a simplified
505+
representation of each geometry.
506+
507+
Args:
508+
tolerance (float):
509+
All parts of a simplified geometry will be no more than
510+
tolerance distance from the original. It has the same units as
511+
the coordinate reference system of the GeoSeries. For example,
512+
using tolerance=100 in a projected CRS with meters as units
513+
means a distance of 100 meters in reality.
514+
preserve_topology (bool):
515+
Default True. False uses a quicker algorithm, but may produce
516+
self-intersecting or otherwise invalid geometries.
517+
518+
Returns:
519+
bigframes.geopandas.GeoSeries:
520+
Series of simplified geometries.
521+
522+
Raises:
523+
NotImplementedError:
524+
GeoSeries.simplify is not supported. Use bigframes.bigquery.st_simplify(series, tolerance_meters), instead.
525+
"""
526+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

0 commit comments

Comments
 (0)