34
34
import threading
35
35
import traceback
36
36
import uuid
37
+ import warnings
37
38
import zlib
38
39
from collections .abc import Iterable , Iterator , Sequence
39
40
from contextlib import closing , contextmanager
110
111
from superset .utils .database import get_example_database
111
112
from superset .utils .date_parser import parse_human_timedelta
112
113
from superset .utils .hashing import md5_sha_from_dict , md5_sha_from_str
114
+ from superset .utils .pandas import detect_datetime_format
113
115
114
116
if TYPE_CHECKING :
115
117
from superset .connectors .sqla .models import BaseDatasource , TableColumn
@@ -1858,6 +1860,62 @@ def get_legacy_time_column(
1858
1860
)
1859
1861
1860
1862
1863
+ def _process_datetime_column (
1864
+ df : pd .DataFrame ,
1865
+ col : DateColumn ,
1866
+ ) -> None :
1867
+ """Process a single datetime column with format detection."""
1868
+ if col .timestamp_format in ("epoch_s" , "epoch_ms" ):
1869
+ dttm_series = df [col .col_label ]
1870
+ if is_numeric_dtype (dttm_series ):
1871
+ # Column is formatted as a numeric value
1872
+ unit = col .timestamp_format .replace ("epoch_" , "" )
1873
+ df [col .col_label ] = pd .to_datetime (
1874
+ dttm_series ,
1875
+ utc = False ,
1876
+ unit = unit ,
1877
+ origin = "unix" ,
1878
+ errors = "coerce" ,
1879
+ exact = False ,
1880
+ )
1881
+ else :
1882
+ # Column has already been formatted as a timestamp.
1883
+ try :
1884
+ df [col .col_label ] = dttm_series .apply (
1885
+ lambda x : pd .Timestamp (x ) if pd .notna (x ) else pd .NaT
1886
+ )
1887
+ except ValueError :
1888
+ logger .warning (
1889
+ "Unable to convert column %s to datetime, ignoring" ,
1890
+ col .col_label ,
1891
+ )
1892
+ else :
1893
+ # Try to detect format if not specified
1894
+ format_to_use = col .timestamp_format or detect_datetime_format (
1895
+ df [col .col_label ]
1896
+ )
1897
+
1898
+ # Parse with or without format (suppress warning if no format)
1899
+ if format_to_use :
1900
+ df [col .col_label ] = pd .to_datetime (
1901
+ df [col .col_label ],
1902
+ utc = False ,
1903
+ format = format_to_use ,
1904
+ errors = "coerce" ,
1905
+ exact = False ,
1906
+ )
1907
+ else :
1908
+ with warnings .catch_warnings ():
1909
+ warnings .filterwarnings ("ignore" , message = ".*Could not infer format.*" )
1910
+ df [col .col_label ] = pd .to_datetime (
1911
+ df [col .col_label ],
1912
+ utc = False ,
1913
+ format = None ,
1914
+ errors = "coerce" ,
1915
+ exact = False ,
1916
+ )
1917
+
1918
+
1861
1919
def normalize_dttm_col (
1862
1920
df : pd .DataFrame ,
1863
1921
dttm_cols : tuple [DateColumn , ...] = tuple (), # noqa: C408
@@ -1866,38 +1924,8 @@ def normalize_dttm_col(
1866
1924
if _col .col_label not in df .columns :
1867
1925
continue
1868
1926
1869
- if _col .timestamp_format in ("epoch_s" , "epoch_ms" ):
1870
- dttm_series = df [_col .col_label ]
1871
- if is_numeric_dtype (dttm_series ):
1872
- # Column is formatted as a numeric value
1873
- unit = _col .timestamp_format .replace ("epoch_" , "" )
1874
- df [_col .col_label ] = pd .to_datetime (
1875
- dttm_series ,
1876
- utc = False ,
1877
- unit = unit ,
1878
- origin = "unix" ,
1879
- errors = "coerce" ,
1880
- exact = False ,
1881
- )
1882
- else :
1883
- # Column has already been formatted as a timestamp.
1884
- try :
1885
- df [_col .col_label ] = dttm_series .apply (
1886
- lambda x : pd .Timestamp (x ) if pd .notna (x ) else pd .NaT
1887
- )
1888
- except ValueError :
1889
- logger .warning (
1890
- "Unable to convert column %s to datetime, ignoring" ,
1891
- _col .col_label ,
1892
- )
1893
- else :
1894
- df [_col .col_label ] = pd .to_datetime (
1895
- df [_col .col_label ],
1896
- utc = False ,
1897
- format = _col .timestamp_format ,
1898
- errors = "coerce" ,
1899
- exact = False ,
1900
- )
1927
+ _process_datetime_column (df , _col )
1928
+
1901
1929
if _col .offset :
1902
1930
df [_col .col_label ] += timedelta (hours = _col .offset )
1903
1931
if _col .time_shift is not None :
0 commit comments