Skip to content

Commit 9dc9695

Browse files
fix: Throw type error for incomparable join keys (#2098)
1 parent 328a765 commit 9dc9695

File tree

4 files changed

+30
-13
lines changed

4 files changed

+30
-13
lines changed

bigframes/core/array_value.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,14 @@ def relational_join(
480480
type: typing.Literal["inner", "outer", "left", "right", "cross"] = "inner",
481481
propogate_order: Optional[bool] = None,
482482
) -> typing.Tuple[ArrayValue, typing.Tuple[dict[str, str], dict[str, str]]]:
483+
for lcol, rcol in conditions:
484+
ltype = self.get_column_type(lcol)
485+
rtype = other.get_column_type(rcol)
486+
if not bigframes.dtypes.can_compare(ltype, rtype):
487+
raise TypeError(
488+
f"Cannot join with non-comparable join key types: {ltype}, {rtype}"
489+
)
490+
483491
l_mapping = { # Identity mapping, only rename right side
484492
lcol.name: lcol.name for lcol in self.node.ids
485493
}

bigframes/dtypes.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,11 @@ def is_comparable(type_: ExpressionType) -> bool:
358358
return (type_ is not None) and is_orderable(type_)
359359

360360

361+
def can_compare(type1: ExpressionType, type2: ExpressionType) -> bool:
362+
coerced_type = coerce_to_common(type1, type2)
363+
return is_comparable(coerced_type)
364+
365+
361366
def get_struct_fields(type_: ExpressionType) -> dict[str, Dtype]:
362367
assert isinstance(type_, pd.ArrowDtype)
363368
assert isinstance(type_.pyarrow_dtype, pa.StructType)

bigframes/operations/type.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -174,15 +174,7 @@ class CoerceCommon(BinaryTypeSignature):
174174
def output_type(
175175
self, left_type: ExpressionType, right_type: ExpressionType
176176
) -> ExpressionType:
177-
try:
178-
return bigframes.dtypes.coerce_to_common(left_type, right_type)
179-
except TypeError:
180-
pass
181-
if bigframes.dtypes.can_coerce(left_type, right_type):
182-
return right_type
183-
if bigframes.dtypes.can_coerce(right_type, left_type):
184-
return left_type
185-
raise TypeError(f"Cannot coerce {left_type} and {right_type} to a common type.")
177+
return bigframes.dtypes.coerce_to_common(left_type, right_type)
186178

187179

188180
@dataclasses.dataclass
@@ -192,8 +184,7 @@ class Comparison(BinaryTypeSignature):
192184
def output_type(
193185
self, left_type: ExpressionType, right_type: ExpressionType
194186
) -> ExpressionType:
195-
common_type = CoerceCommon().output_type(left_type, right_type)
196-
if not bigframes.dtypes.is_comparable(common_type):
187+
if not bigframes.dtypes.can_compare(left_type, right_type):
197188
raise TypeError(f"Types {left_type} and {right_type} are not comparable")
198189
return bigframes.dtypes.BOOL_DTYPE
199190

tests/system/small/test_dataframe.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3129,8 +3129,6 @@ def test_series_binop_add_different_table(
31293129
@all_joins
31303130
def test_join_same_table(scalars_dfs_maybe_ordered, how):
31313131
bf_df, pd_df = scalars_dfs_maybe_ordered
3132-
if not bf_df._session._strictly_ordered and how == "cross":
3133-
pytest.skip("Cross join not supported in partial ordering mode.")
31343132

31353133
bf_df_a = bf_df.set_index("int64_too")[["string_col", "int64_col"]]
31363134
bf_df_a = bf_df_a.sort_index()
@@ -3153,6 +3151,21 @@ def test_join_same_table(scalars_dfs_maybe_ordered, how):
31533151
assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
31543152

31553153

3154+
def test_join_incompatible_key_type_error(scalars_dfs):
3155+
bf_df, _ = scalars_dfs
3156+
3157+
bf_df_a = bf_df.set_index("int64_too")[["string_col", "int64_col"]]
3158+
bf_df_a = bf_df_a.sort_index()
3159+
3160+
bf_df_b = bf_df.set_index("date_col")[["float64_col"]]
3161+
bf_df_b = bf_df_b[bf_df_b.float64_col > 0]
3162+
bf_df_b = bf_df_b.sort_values("float64_col")
3163+
3164+
with pytest.raises(TypeError):
3165+
# joining incompatible date, int columns
3166+
bf_df_a.join(bf_df_b, how="left")
3167+
3168+
31563169
@all_joins
31573170
def test_join_different_table(
31583171
scalars_df_index, scalars_df_2_index, scalars_pandas_df_index, how

0 commit comments

Comments
 (0)