167
167
stata_epoch = datetime .datetime (1960 , 1 , 1 )
168
168
169
169
170
- def _stata_elapsed_date_to_datetime_vec (dates : Series , fmt : str ) -> Series :
170
+ def _stata_elapsed_date_to_datetime_vec (dates , fmt ) -> Series :
171
171
"""
172
172
Convert from SIF to datetime. http://www.stata.com/help.cgi?datetime
173
173
@@ -219,7 +219,7 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series:
219
219
MIN_MS_DELTA = MIN_DAY_DELTA * 24 * 3600 * 1000
220
220
MAX_MS_DELTA = MAX_DAY_DELTA * 24 * 3600 * 1000
221
221
222
- def convert_year_month_safe (year : Series , month : Series ) -> Series :
222
+ def convert_year_month_safe (year , month ) -> Series :
223
223
"""
224
224
Convert year and month to datetimes, using pandas vectorized versions
225
225
when the date range falls within the range supported by pandas.
@@ -234,7 +234,7 @@ def convert_year_month_safe(year: Series, month: Series) -> Series:
234
234
[datetime .datetime (y , m , 1 ) for y , m in zip (year , month )], index = index
235
235
)
236
236
237
- def convert_year_days_safe (year : Series , days : Series ) -> Series :
237
+ def convert_year_days_safe (year , days ) -> Series :
238
238
"""
239
239
Converts year (e.g. 1999) and days since the start of the year to a
240
240
datetime or datetime64 Series
@@ -249,9 +249,7 @@ def convert_year_days_safe(year: Series, days: Series) -> Series:
249
249
]
250
250
return Series (value , index = index )
251
251
252
- def convert_delta_safe (
253
- base : datetime .datetime , deltas : Series , unit : str
254
- ) -> Series :
252
+ def convert_delta_safe (base , deltas , unit ) -> Series :
255
253
"""
256
254
Convert base dates and deltas to datetimes, using pandas vectorized
257
255
versions if the deltas satisfy restrictions required to be expressed
@@ -298,21 +296,21 @@ def convert_delta_safe(
298
296
# Delta days relative to base
299
297
elif fmt .startswith (("%td" , "td" , "%d" , "d" )):
300
298
base = stata_epoch
301
- days : Series = dates
299
+ days = dates
302
300
conv_dates = convert_delta_safe (base , days , "d" )
303
301
# does not count leap days - 7 days is a week.
304
302
# 52nd week may have more than 7 days
305
303
elif fmt .startswith (("%tw" , "tw" )):
306
- year : Series = stata_epoch .year + dates // 52
304
+ year = stata_epoch .year + dates // 52
307
305
days = (dates % 52 ) * 7
308
306
conv_dates = convert_year_days_safe (year , days )
309
307
elif fmt .startswith (("%tm" , "tm" )): # Delta months relative to base
310
308
year = stata_epoch .year + dates // 12
311
- month : Series = (dates % 12 ) + 1
309
+ month = (dates % 12 ) + 1
312
310
conv_dates = convert_year_month_safe (year , month )
313
311
elif fmt .startswith (("%tq" , "tq" )): # Delta quarters relative to base
314
312
year = stata_epoch .year + dates // 4
315
- quarter_month : Series = (dates % 4 ) * 3 + 1
313
+ quarter_month = (dates % 4 ) * 3 + 1
316
314
conv_dates = convert_year_month_safe (year , quarter_month )
317
315
elif fmt .startswith (("%th" , "th" )): # Delta half-years relative to base
318
316
year = stata_epoch .year + dates // 2
@@ -347,19 +345,19 @@ def _datetime_to_stata_elapsed_vec(dates: Series, fmt: str) -> Series:
347
345
NS_PER_DAY = 24 * 3600 * 1000 * 1000 * 1000
348
346
US_PER_DAY = NS_PER_DAY / 1000
349
347
350
- def parse_dates_safe (
351
- dates : Series , delta : bool = False , year : bool = False , days : bool = False
352
- ):
353
- d : Dict [str , Any ] = {}
348
+ def parse_dates_safe (dates , delta = False , year = False , days = False ):
349
+ d = {}
354
350
if is_datetime64_dtype (dates .values ):
355
351
if delta :
356
- time_delta : Series = dates - stata_epoch
352
+ time_delta = dates - stata_epoch
357
353
d ["delta" ] = time_delta .values .astype (np .int64 ) // 1000 # microseconds
358
354
if days or year :
355
+ # ignore since mypy reports that DatetimeIndex has no year/month
359
356
date_index = DatetimeIndex (dates )
360
- d ["year" ], d ["month" ] = date_index .year , date_index .month
357
+ d ["year" ] = date_index .year # type: ignore
358
+ d ["month" ] = date_index .month # type: ignore
361
359
if days :
362
- days_in_ns : Series = dates .astype (np .int64 ) - to_datetime (
360
+ days_in_ns = dates .astype (np .int64 ) - to_datetime (
363
361
d ["year" ], format = "%Y"
364
362
).astype (np .int64 )
365
363
d ["days" ] = days_in_ns // NS_PER_DAY
@@ -588,10 +586,10 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
588
586
categories = catarray .cat .categories
589
587
self .value_labels = list (zip (np .arange (len (categories )), categories ))
590
588
self .value_labels .sort (key = lambda x : x [0 ])
591
- self .text_len = np . int32 ( 0 )
592
- self .off = []
593
- self .val = []
594
- self .txt = []
589
+ self .text_len = 0
590
+ self .off : List [ int ] = []
591
+ self .val : List [ int ] = []
592
+ self .txt : List [ bytes ] = []
595
593
self .n = 0
596
594
597
595
# Compute lengths and setup lists of offsets and labels
@@ -2131,7 +2129,7 @@ def _prepare_categoricals(self, data: DataFrame) -> DataFrame:
2131
2129
2132
2130
is_cat = [is_categorical_dtype (data [col ]) for col in data ]
2133
2131
self ._is_col_cat = is_cat
2134
- self ._value_labels = []
2132
+ self ._value_labels : List [ StataValueLabel ] = []
2135
2133
if not any (is_cat ):
2136
2134
return data
2137
2135
@@ -2290,8 +2288,8 @@ def _check_column_names(self, data: DataFrame) -> DataFrame:
2290
2288
return data
2291
2289
2292
2290
def _set_formats_and_types (self , dtypes : Series ) -> None :
2293
- self .typlist = []
2294
- self .fmtlist = []
2291
+ self .fmtlist : List [ str ] = []
2292
+ self .typlist : List [ int ] = []
2295
2293
for col , dtype in dtypes .items ():
2296
2294
self .fmtlist .append (_dtype_to_default_stata_fmt (dtype , self .data [col ]))
2297
2295
self .typlist .append (_dtype_to_stata_type (dtype , self .data [col ]))
0 commit comments