From cf7c145a364b568273d57cbd4a7b5cc4d9deab5a Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 28 Mar 2025 17:42:27 +0000 Subject: [PATCH 1/2] [WIP] Log __init__ calls of timedelta-related ops --- bigframes/core/compile/scalar_op_compiler.py | 15 ++++++----- bigframes/core/rewrite/timedeltas.py | 26 +++++++++--------- bigframes/operations/__init__.py | 28 ++++++++++---------- bigframes/operations/date_ops.py | 5 ++-- bigframes/operations/datetime_ops.py | 7 +++-- bigframes/operations/timedelta_ops.py | 20 +++++--------- 6 files changed, 47 insertions(+), 54 deletions(-) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 0296762447..e855144d4b 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -26,6 +26,7 @@ import numpy as np import pandas as pd +from bigframes.core import log_adapter from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS import bigframes.core.compile.default_ordering import bigframes.core.compile.ibis_types @@ -725,32 +726,32 @@ def unix_millis_op_impl(x: ibis_types.TimestampValue): return unix_millis(x) -@scalar_op_compiler.register_binary_op(ops.timestamp_diff_op) +@scalar_op_compiler.register_binary_op(ops.TimestampDiffOp) def timestamp_diff_op_impl(x: ibis_types.TimestampValue, y: ibis_types.TimestampValue): return x.delta(y, "microsecond") -@scalar_op_compiler.register_binary_op(ops.timestamp_add_op) +@scalar_op_compiler.register_binary_op(ops.TimestampAddOp) def timestamp_add_op_impl(x: ibis_types.TimestampValue, y: ibis_types.IntegerValue): return x + y.to_interval("us") -@scalar_op_compiler.register_binary_op(ops.timestamp_sub_op) +@scalar_op_compiler.register_binary_op(ops.TimestampSubOp) def timestamp_sub_op_impl(x: ibis_types.TimestampValue, y: ibis_types.IntegerValue): return x - y.to_interval("us") -@scalar_op_compiler.register_binary_op(ops.date_diff_op) +@scalar_op_compiler.register_binary_op(ops.DateDiffOp) def date_diff_op_impl(x: ibis_types.DateValue, y: ibis_types.DateValue): return x.delta(y, "day") * int(UNIT_TO_US_CONVERSION_FACTORS["d"]) # type: ignore -@scalar_op_compiler.register_binary_op(ops.date_add_op) +@scalar_op_compiler.register_binary_op(ops.DateAddOp) def date_add_op_impl(x: ibis_types.DateValue, y: ibis_types.IntegerValue): return x.cast("timestamp") + y.to_interval("us") # type: ignore -@scalar_op_compiler.register_binary_op(ops.date_sub_op) +@scalar_op_compiler.register_binary_op(ops.DateSubOp) def date_sub_op_impl(x: ibis_types.DateValue, y: ibis_types.IntegerValue): return x.cast("timestamp") - y.to_interval("us") # type: ignore @@ -1208,7 +1209,7 @@ def to_timedelta_op_impl(x: ibis_types.Value, op: ops.ToTimedeltaOp): ).floor() -@scalar_op_compiler.register_unary_op(ops.timedelta_floor_op) +@scalar_op_compiler.register_unary_op(ops.TimedeltaFloorOp) def timedelta_floor_op_impl(x: ibis_types.NumericValue): return x.floor() diff --git a/bigframes/core/rewrite/timedeltas.py b/bigframes/core/rewrite/timedeltas.py index ea8e608a84..fac0c82956 100644 --- a/bigframes/core/rewrite/timedeltas.py +++ b/bigframes/core/rewrite/timedeltas.py @@ -28,7 +28,7 @@ @dataclasses.dataclass class _TypedExpr: expr: ex.Expression - dtype: dtypes.Dtype + dtype: dtypes.ExpressionType @classmethod def create_op_expr( @@ -146,36 +146,36 @@ def _rewrite_op_expr( def _rewrite_sub_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr: if dtypes.is_datetime_like(left.dtype) and dtypes.is_datetime_like(right.dtype): - return _TypedExpr.create_op_expr(ops.timestamp_diff_op, left, right) + return _TypedExpr.create_op_expr(ops.TimestampDiffOp(), left, right) if dtypes.is_datetime_like(left.dtype) and right.dtype == dtypes.TIMEDELTA_DTYPE: - return _TypedExpr.create_op_expr(ops.timestamp_sub_op, left, right) + return _TypedExpr.create_op_expr(ops.TimestampSubOp(), left, right) if left.dtype == dtypes.DATE_DTYPE and right.dtype == dtypes.DATE_DTYPE: - return _TypedExpr.create_op_expr(ops.date_diff_op, left, right) + return _TypedExpr.create_op_expr(ops.DateDiffOp(), left, right) if left.dtype == dtypes.DATE_DTYPE and right.dtype == dtypes.TIMEDELTA_DTYPE: - return _TypedExpr.create_op_expr(ops.date_sub_op, left, right) + return _TypedExpr.create_op_expr(ops.DateSubOp(), left, right) return _TypedExpr.create_op_expr(ops.sub_op, left, right) def _rewrite_add_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr: if dtypes.is_datetime_like(left.dtype) and right.dtype == dtypes.TIMEDELTA_DTYPE: - return _TypedExpr.create_op_expr(ops.timestamp_add_op, left, right) + return _TypedExpr.create_op_expr(ops.TimestampAddOp(), left, right) if left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_datetime_like(right.dtype): # Re-arrange operands such that timestamp is always on the left and timedelta is # always on the right. - return _TypedExpr.create_op_expr(ops.timestamp_add_op, right, left) + return _TypedExpr.create_op_expr(ops.TimestampAddOp(), right, left) if left.dtype == dtypes.DATE_DTYPE and right.dtype == dtypes.TIMEDELTA_DTYPE: - return _TypedExpr.create_op_expr(ops.date_add_op, left, right) + return _TypedExpr.create_op_expr(ops.DateAddOp(), left, right) if left.dtype == dtypes.TIMEDELTA_DTYPE and right.dtype == dtypes.DATE_DTYPE: # Re-arrange operands such that date is always on the left and timedelta is # always on the right. - return _TypedExpr.create_op_expr(ops.date_add_op, right, left) + return _TypedExpr.create_op_expr(ops.DateAddOp(), right, left) return _TypedExpr.create_op_expr(ops.add_op, left, right) @@ -184,9 +184,9 @@ def _rewrite_mul_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr: result = _TypedExpr.create_op_expr(ops.mul_op, left, right) if left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype): - return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result) + return _TypedExpr.create_op_expr(ops.TimedeltaFloorOp(), result) if dtypes.is_numeric(left.dtype) and right.dtype == dtypes.TIMEDELTA_DTYPE: - return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result) + return _TypedExpr.create_op_expr(ops.TimedeltaFloorOp(), result) return result @@ -195,7 +195,7 @@ def _rewrite_div_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr: result = _TypedExpr.create_op_expr(ops.div_op, left, right) if left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype): - return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result) + return _TypedExpr.create_op_expr(ops.TimedeltaFloorOp(), result) return result @@ -204,7 +204,7 @@ def _rewrite_floordiv_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr: result = _TypedExpr.create_op_expr(ops.floordiv_op, left, right) if left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype): - return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result) + return _TypedExpr.create_op_expr(ops.TimedeltaFloorOp(), result) return result diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 3e0ebd5089..f6fb4d47c7 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -39,7 +39,7 @@ ne_op, ) from bigframes.operations.date_ops import ( - date_diff_op, + DateDiffOp, day_op, dayofweek_op, month_op, @@ -50,7 +50,7 @@ date_op, StrftimeOp, time_op, - timestamp_diff_op, + TimestampDiffOp, ToDatetimeOp, ToTimestampOp, UnixMicros, @@ -188,11 +188,11 @@ from bigframes.operations.struct_ops import StructFieldOp, StructOp from bigframes.operations.time_ops import hour_op, minute_op, normalize_op, second_op from bigframes.operations.timedelta_ops import ( - date_add_op, - date_sub_op, - timedelta_floor_op, - timestamp_add_op, - timestamp_sub_op, + DateAddOp, + DateSubOp, + TimedeltaFloorOp, + TimestampAddOp, + TimestampSubOp, ToTimedeltaOp, ) @@ -255,7 +255,7 @@ "upper_op", "ZfillOp", # Date ops - "date_diff_op", + "DateDiffOp", "day_op", "month_op", "year_op", @@ -267,16 +267,16 @@ "second_op", "normalize_op", # Timedelta ops - "date_add_op", - "date_sub_op", - "timedelta_floor_op", - "timestamp_add_op", - "timestamp_sub_op", + "DateAddOp", + "DateSubOp", + "TimedeltaFloorOp", + "TimestampAddOp", + "TimestampSubOp", "ToTimedeltaOp", # Datetime ops "date_op", "time_op", - "timestamp_diff_op", + "TimestampDiffOp", "ToDatetimeOp", "ToTimestampOp", "StrftimeOp", diff --git a/bigframes/operations/date_ops.py b/bigframes/operations/date_ops.py index 32d8eec118..5ff40a2667 100644 --- a/bigframes/operations/date_ops.py +++ b/bigframes/operations/date_ops.py @@ -16,6 +16,7 @@ import typing from bigframes import dtypes +from bigframes.core import log_adapter from bigframes.operations import base_ops import bigframes.operations.type as op_typing @@ -45,6 +46,7 @@ ) +@log_adapter.class_logger @dataclasses.dataclass(frozen=True) class DateDiffOp(base_ops.BinaryOp): name: typing.ClassVar[str] = "date_diff" @@ -59,6 +61,3 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT raise TypeError("expected date input") return dtypes.TIMEDELTA_DTYPE - - -date_diff_op = DateDiffOp() diff --git a/bigframes/operations/datetime_ops.py b/bigframes/operations/datetime_ops.py index 3ea4c652f1..b609a05578 100644 --- a/bigframes/operations/datetime_ops.py +++ b/bigframes/operations/datetime_ops.py @@ -19,6 +19,7 @@ import pyarrow as pa from bigframes import dtypes +from bigframes.core import log_adapter from bigframes.operations import base_ops import bigframes.operations.type as op_typing @@ -109,8 +110,9 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT return dtypes.INT_DTYPE +@log_adapter.class_logger @dataclasses.dataclass(frozen=True) -class TimestampDiff(base_ops.BinaryOp): +class TimestampDiffOp(base_ops.BinaryOp): name: typing.ClassVar[str] = "timestamp_diff" def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: @@ -123,6 +125,3 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT raise TypeError("expected timestamp input") return dtypes.TIMEDELTA_DTYPE - - -timestamp_diff_op = TimestampDiff() diff --git a/bigframes/operations/timedelta_ops.py b/bigframes/operations/timedelta_ops.py index 5e9a1189e4..7370817075 100644 --- a/bigframes/operations/timedelta_ops.py +++ b/bigframes/operations/timedelta_ops.py @@ -17,9 +17,11 @@ import typing from bigframes import dtypes +from bigframes.core import log_adapter from bigframes.operations import base_ops +@log_adapter.class_logger @dataclasses.dataclass(frozen=True) class ToTimedeltaOp(base_ops.UnaryOp): name: typing.ClassVar[str] = "to_timedelta" @@ -35,6 +37,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT raise TypeError("expected integer or float input") +@log_adapter.class_logger @dataclasses.dataclass(frozen=True) class TimedeltaFloorOp(base_ops.UnaryOp): """Floors the numeric value to the nearest integer and use it to represent a timedelta. @@ -51,9 +54,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT raise TypeError(f"unsupported type: {input_type}") -timedelta_floor_op = TimedeltaFloorOp() - - +@log_adapter.class_logger @dataclasses.dataclass(frozen=True) class TimestampAddOp(base_ops.BinaryOp): name: typing.ClassVar[str] = "timestamp_add" @@ -76,9 +77,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT ) -timestamp_add_op = TimestampAddOp() - - +@log_adapter.class_logger @dataclasses.dataclass(frozen=True) class TimestampSubOp(base_ops.BinaryOp): name: typing.ClassVar[str] = "timestamp_sub" @@ -96,9 +95,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT ) -timestamp_sub_op = TimestampSubOp() - - +@log_adapter.class_logger @dataclasses.dataclass(frozen=True) class DateAddOp(base_ops.BinaryOp): name: typing.ClassVar[str] = "date_add" @@ -122,9 +119,8 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT ) -date_add_op = DateAddOp() - +@log_adapter.class_logger @dataclasses.dataclass(frozen=True) class DateSubOp(base_ops.BinaryOp): name: typing.ClassVar[str] = "date_sub" @@ -141,5 +137,3 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT f"unsupported types for date_sub. left: {input_types[0]} right: {input_types[1]}" ) - -date_sub_op = DateSubOp() From c5f5973c49b84c7c58fe0345be5fe48297f995d1 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Mon, 31 Mar 2025 21:57:24 +0000 Subject: [PATCH 2/2] enable internal logging --- bigframes/core/compile/scalar_op_compiler.py | 1 - bigframes/operations/date_ops.py | 2 +- bigframes/operations/datetime_ops.py | 2 +- bigframes/operations/timedelta_ops.py | 14 ++++++-------- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index e855144d4b..c47d37a82c 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -26,7 +26,6 @@ import numpy as np import pandas as pd -from bigframes.core import log_adapter from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS import bigframes.core.compile.default_ordering import bigframes.core.compile.ibis_types diff --git a/bigframes/operations/date_ops.py b/bigframes/operations/date_ops.py index 5ff40a2667..e79d9fb95b 100644 --- a/bigframes/operations/date_ops.py +++ b/bigframes/operations/date_ops.py @@ -46,7 +46,7 @@ ) -@log_adapter.class_logger +@log_adapter.class_logger(include_internal_calls=True) @dataclasses.dataclass(frozen=True) class DateDiffOp(base_ops.BinaryOp): name: typing.ClassVar[str] = "date_diff" diff --git a/bigframes/operations/datetime_ops.py b/bigframes/operations/datetime_ops.py index b609a05578..e7f611badb 100644 --- a/bigframes/operations/datetime_ops.py +++ b/bigframes/operations/datetime_ops.py @@ -110,7 +110,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT return dtypes.INT_DTYPE -@log_adapter.class_logger +@log_adapter.class_logger(include_internal_calls=True) @dataclasses.dataclass(frozen=True) class TimestampDiffOp(base_ops.BinaryOp): name: typing.ClassVar[str] = "timestamp_diff" diff --git a/bigframes/operations/timedelta_ops.py b/bigframes/operations/timedelta_ops.py index 7370817075..e50a822832 100644 --- a/bigframes/operations/timedelta_ops.py +++ b/bigframes/operations/timedelta_ops.py @@ -21,7 +21,7 @@ from bigframes.operations import base_ops -@log_adapter.class_logger +@log_adapter.class_logger(include_internal_calls=True) @dataclasses.dataclass(frozen=True) class ToTimedeltaOp(base_ops.UnaryOp): name: typing.ClassVar[str] = "to_timedelta" @@ -37,7 +37,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT raise TypeError("expected integer or float input") -@log_adapter.class_logger +@log_adapter.class_logger(include_internal_calls=True) @dataclasses.dataclass(frozen=True) class TimedeltaFloorOp(base_ops.UnaryOp): """Floors the numeric value to the nearest integer and use it to represent a timedelta. @@ -54,7 +54,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT raise TypeError(f"unsupported type: {input_type}") -@log_adapter.class_logger +@log_adapter.class_logger(include_internal_calls=True) @dataclasses.dataclass(frozen=True) class TimestampAddOp(base_ops.BinaryOp): name: typing.ClassVar[str] = "timestamp_add" @@ -77,7 +77,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT ) -@log_adapter.class_logger +@log_adapter.class_logger(include_internal_calls=True) @dataclasses.dataclass(frozen=True) class TimestampSubOp(base_ops.BinaryOp): name: typing.ClassVar[str] = "timestamp_sub" @@ -95,7 +95,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT ) -@log_adapter.class_logger +@log_adapter.class_logger(include_internal_calls=True) @dataclasses.dataclass(frozen=True) class DateAddOp(base_ops.BinaryOp): name: typing.ClassVar[str] = "date_add" @@ -119,8 +119,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT ) - -@log_adapter.class_logger +@log_adapter.class_logger(include_internal_calls=True) @dataclasses.dataclass(frozen=True) class DateSubOp(base_ops.BinaryOp): name: typing.ClassVar[str] = "date_sub" @@ -136,4 +135,3 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT raise TypeError( f"unsupported types for date_sub. left: {input_types[0]} right: {input_types[1]}" ) -