Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions data_diff/abcs/database_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@ class Date(TemporalType):
pass


@attrs.define(frozen=True)
class Time(TemporalType):
pass


@attrs.define(frozen=True)
class NumericType(ColType):
# 'precision' signifies how many fractional digits (after the dot) we want to compare
Expand Down
19 changes: 19 additions & 0 deletions data_diff/databases/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
TemporalType,
Boolean,
UnknownColType,
Time,
Date,
)
from data_diff.databases.base import (
BaseDialect,
Expand Down Expand Up @@ -63,6 +65,8 @@ class Dialect(BaseDialect):
# Dates
"TIMESTAMP": Timestamp,
"DATETIME": Datetime,
"DATE": Date,
"TIME": Time,
# Numbers
"INT64": Integer,
"INT32": Integer,
Expand Down Expand Up @@ -160,6 +164,21 @@ def md5_as_hex(self, s: str) -> str:
return f"md5({s})"

def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
try:
is_date = coltype.is_date
is_time = coltype.is_time
except:
is_date = False
is_time = False
if isinstance(coltype, Date) or is_date:
return f"FORMAT_DATE('%F', {value})"
if isinstance(coltype, Time) or is_time:
microseconds = f"TIME_DIFF( {value}, cast('00:00:00' as time), microsecond)"
rounded = f"ROUND({microseconds}, -6 + {coltype.precision})"
time_value = f"TIME_ADD(cast('00:00:00' as time), interval cast({rounded} as int64) microsecond)"
converted = f"FORMAT_TIME('%H:%M:%E6S', {time_value})"
return converted

if coltype.rounds:
timestamp = f"timestamp_micros(cast(round(unix_micros(cast({value} as timestamp))/1000000, {coltype.precision})*1000000 as int))"
return f"FORMAT_TIMESTAMP('%F %H:%M:%E6S', {timestamp})"
Expand Down
4 changes: 3 additions & 1 deletion data_diff/databases/mssql.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
)
from data_diff.abcs.database_types import (
JSON,
Date,
NumericType,
Timestamp,
TimestampTZ,
Expand All @@ -25,6 +24,8 @@
Native_UUID,
Text,
Boolean,
Date,
Time
)


Expand All @@ -48,6 +49,7 @@ class Dialect(BaseDialect):
"datetime2": Timestamp,
"smalldatetime": Timestamp,
"date": Date,
"time": Time,
# Numbers
"float": Float,
"real": Float,
Expand Down
20 changes: 20 additions & 0 deletions data_diff/databases/postgresql.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
FractionalType,
Boolean,
Date,
Time
)
from data_diff.databases.base import BaseDialect, ThreadedDatabase, import_helper, ConnectError
from data_diff.databases.base import (
Expand Down Expand Up @@ -57,6 +58,8 @@ class PostgresqlDialect(BaseDialect):
"timestamp without time zone": Timestamp,
"timestamp": Timestamp,
"date": Date,
"time with time zone": Time,
"time without time zone": Time,
# Numbers
"double precision": Float,
"real": Float,
Expand Down Expand Up @@ -111,6 +114,23 @@ def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
def _add_padding(coltype: TemporalType, timestamp6: str):
return f"RPAD(LEFT({timestamp6}, {TIMESTAMP_PRECISION_POS+coltype.precision}), {TIMESTAMP_PRECISION_POS+6}, '0')"

try:
is_date = coltype.is_date
is_time = coltype.is_time
except:
is_date = False
is_time = False

if isinstance(coltype, Date) or is_date:
return f"cast({value} as varchar)"

if isinstance(coltype, Time) or is_time:
seconds = f"EXTRACT( epoch from {value})"
rounded = f"ROUND({seconds}, {coltype.precision})"
time_value = f"CAST('00:00:00' as time) + make_interval(0, 0, 0, 0, 0, 0, {rounded})" # 6th arg = seconds
converted = f"to_char({time_value}, 'hh24:mi:ss.ff6')"
return converted

if coltype.rounds:
# NULL value expected to return NULL after normalization
null_case_begin = f"CASE WHEN {value} IS NULL THEN NULL ELSE "
Expand Down
17 changes: 17 additions & 0 deletions data_diff/databases/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
DbPath,
Boolean,
Date,
Time,
)
from data_diff.databases.base import (
BaseDialect,
Expand Down Expand Up @@ -45,6 +46,7 @@ class Dialect(BaseDialect):
"TIMESTAMP_LTZ": Timestamp,
"TIMESTAMP_TZ": TimestampTZ,
"DATE": Date,
"TIME": Time,
# Numbers
"NUMBER": Decimal,
"FLOAT": Float,
Expand Down Expand Up @@ -81,6 +83,21 @@ def md5_as_hex(self, s: str) -> str:
return f"md5({s})"

def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
try:
is_date = coltype.is_date
is_time = coltype.is_time
except:
is_date = False
is_time = False
if isinstance(coltype, Date) or is_date:
return f"({value}::varchar)"
elif isinstance(coltype, Time) or is_time:
microseconds = f"TIMEDIFF(microsecond, cast('00:00:00' as time), {value})"
rounded = f"round({microseconds}, -6 + {coltype.precision})"
time_value = f"TIMEADD(microsecond, {rounded}, cast('00:00:00' as time))"
converted = f"TO_VARCHAR({time_value}, 'HH24:MI:SS.FF6')"
return converted

if coltype.rounds:
timestamp = f"to_timestamp(round(date_part(epoch_nanosecond, convert_timezone('UTC', {value})::timestamp(9))/1000000000, {coltype.precision}))"
else:
Expand Down