diff --git a/bigframes/core/array_value.py b/bigframes/core/array_value.py index b37c581a4a..878d62bcb5 100644 --- a/bigframes/core/array_value.py +++ b/bigframes/core/array_value.py @@ -480,6 +480,14 @@ def relational_join( type: typing.Literal["inner", "outer", "left", "right", "cross"] = "inner", propogate_order: Optional[bool] = None, ) -> typing.Tuple[ArrayValue, typing.Tuple[dict[str, str], dict[str, str]]]: + for lcol, rcol in conditions: + ltype = self.get_column_type(lcol) + rtype = other.get_column_type(rcol) + if not bigframes.dtypes.can_compare(ltype, rtype): + raise TypeError( + f"Cannot join with non-comparable join key types: {ltype}, {rtype}" + ) + l_mapping = { # Identity mapping, only rename right side lcol.name: lcol.name for lcol in self.node.ids } diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index 2c4cccefd2..3695110672 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -358,6 +358,11 @@ def is_comparable(type_: ExpressionType) -> bool: return (type_ is not None) and is_orderable(type_) +def can_compare(type1: ExpressionType, type2: ExpressionType) -> bool: + coerced_type = coerce_to_common(type1, type2) + return is_comparable(coerced_type) + + def get_struct_fields(type_: ExpressionType) -> dict[str, Dtype]: assert isinstance(type_, pd.ArrowDtype) assert isinstance(type_.pyarrow_dtype, pa.StructType) diff --git a/bigframes/operations/type.py b/bigframes/operations/type.py index b4029d74c7..020bd0ea57 100644 --- a/bigframes/operations/type.py +++ b/bigframes/operations/type.py @@ -174,15 +174,7 @@ class CoerceCommon(BinaryTypeSignature): def output_type( self, left_type: ExpressionType, right_type: ExpressionType ) -> ExpressionType: - try: - return bigframes.dtypes.coerce_to_common(left_type, right_type) - except TypeError: - pass - if bigframes.dtypes.can_coerce(left_type, right_type): - return right_type - if bigframes.dtypes.can_coerce(right_type, left_type): - return left_type - raise TypeError(f"Cannot coerce {left_type} and {right_type} to a common type.") + return bigframes.dtypes.coerce_to_common(left_type, right_type) @dataclasses.dataclass @@ -192,8 +184,7 @@ class Comparison(BinaryTypeSignature): def output_type( self, left_type: ExpressionType, right_type: ExpressionType ) -> ExpressionType: - common_type = CoerceCommon().output_type(left_type, right_type) - if not bigframes.dtypes.is_comparable(common_type): + if not bigframes.dtypes.can_compare(left_type, right_type): raise TypeError(f"Types {left_type} and {right_type} are not comparable") return bigframes.dtypes.BOOL_DTYPE diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index bad90d0562..1a942a023e 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -3129,8 +3129,6 @@ def test_series_binop_add_different_table( @all_joins def test_join_same_table(scalars_dfs_maybe_ordered, how): bf_df, pd_df = scalars_dfs_maybe_ordered - if not bf_df._session._strictly_ordered and how == "cross": - pytest.skip("Cross join not supported in partial ordering mode.") bf_df_a = bf_df.set_index("int64_too")[["string_col", "int64_col"]] bf_df_a = bf_df_a.sort_index() @@ -3153,6 +3151,21 @@ def test_join_same_table(scalars_dfs_maybe_ordered, how): assert_pandas_df_equal(bf_result, pd_result, ignore_order=True) +def test_join_incompatible_key_type_error(scalars_dfs): + bf_df, _ = scalars_dfs + + bf_df_a = bf_df.set_index("int64_too")[["string_col", "int64_col"]] + bf_df_a = bf_df_a.sort_index() + + bf_df_b = bf_df.set_index("date_col")[["float64_col"]] + bf_df_b = bf_df_b[bf_df_b.float64_col > 0] + bf_df_b = bf_df_b.sort_values("float64_col") + + with pytest.raises(TypeError): + # joining incompatible date, int columns + bf_df_a.join(bf_df_b, how="left") + + @all_joins def test_join_different_table( scalars_df_index, scalars_df_2_index, scalars_pandas_df_index, how