167
167
stata_epoch = datetime .datetime (1960 , 1 , 1 )
168
168
169
169
170
- def _stata_elapsed_date_to_datetime_vec (dates : Series , fmt : str ) -> Series :
170
+ # TODO: Add typing. As of January 2020 it is not possible to type this function since
171
+ # mypy doesn't understand that a Series and an int can be combined using mathematical
172
+ # operations. (+, -).
173
+ def _stata_elapsed_date_to_datetime_vec (dates , fmt ) -> Series :
171
174
"""
172
175
Convert from SIF to datetime. http://www.stata.com/help.cgi?datetime
173
176
@@ -219,7 +222,7 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series:
219
222
MIN_MS_DELTA = MIN_DAY_DELTA * 24 * 3600 * 1000
220
223
MAX_MS_DELTA = MAX_DAY_DELTA * 24 * 3600 * 1000
221
224
222
- def convert_year_month_safe (year : Series , month : Series ) -> Series :
225
+ def convert_year_month_safe (year , month ) -> Series :
223
226
"""
224
227
Convert year and month to datetimes, using pandas vectorized versions
225
228
when the date range falls within the range supported by pandas.
@@ -234,7 +237,7 @@ def convert_year_month_safe(year: Series, month: Series) -> Series:
234
237
[datetime .datetime (y , m , 1 ) for y , m in zip (year , month )], index = index
235
238
)
236
239
237
- def convert_year_days_safe (year : Series , days : Series ) -> Series :
240
+ def convert_year_days_safe (year , days ) -> Series :
238
241
"""
239
242
Converts year (e.g. 1999) and days since the start of the year to a
240
243
datetime or datetime64 Series
@@ -249,9 +252,7 @@ def convert_year_days_safe(year: Series, days: Series) -> Series:
249
252
]
250
253
return Series (value , index = index )
251
254
252
- def convert_delta_safe (
253
- base : datetime .datetime , deltas : Series , unit : str
254
- ) -> Series :
255
+ def convert_delta_safe (base , deltas , unit ) -> Series :
255
256
"""
256
257
Convert base dates and deltas to datetimes, using pandas vectorized
257
258
versions if the deltas satisfy restrictions required to be expressed
@@ -298,21 +299,21 @@ def convert_delta_safe(
298
299
# Delta days relative to base
299
300
elif fmt .startswith (("%td" , "td" , "%d" , "d" )):
300
301
base = stata_epoch
301
- days : Series = dates
302
+ days = dates
302
303
conv_dates = convert_delta_safe (base , days , "d" )
303
304
# does not count leap days - 7 days is a week.
304
305
# 52nd week may have more than 7 days
305
306
elif fmt .startswith (("%tw" , "tw" )):
306
- year : Series = stata_epoch .year + dates // 52
307
+ year = stata_epoch .year + dates // 52
307
308
days = (dates % 52 ) * 7
308
309
conv_dates = convert_year_days_safe (year , days )
309
310
elif fmt .startswith (("%tm" , "tm" )): # Delta months relative to base
310
311
year = stata_epoch .year + dates // 12
311
- month : Series = (dates % 12 ) + 1
312
+ month = (dates % 12 ) + 1
312
313
conv_dates = convert_year_month_safe (year , month )
313
314
elif fmt .startswith (("%tq" , "tq" )): # Delta quarters relative to base
314
315
year = stata_epoch .year + dates // 4
315
- quarter_month : Series = (dates % 4 ) * 3 + 1
316
+ quarter_month = (dates % 4 ) * 3 + 1
316
317
conv_dates = convert_year_month_safe (year , quarter_month )
317
318
elif fmt .startswith (("%th" , "th" )): # Delta half-years relative to base
318
319
year = stata_epoch .year + dates // 2
@@ -347,19 +348,19 @@ def _datetime_to_stata_elapsed_vec(dates: Series, fmt: str) -> Series:
347
348
NS_PER_DAY = 24 * 3600 * 1000 * 1000 * 1000
348
349
US_PER_DAY = NS_PER_DAY / 1000
349
350
350
- def parse_dates_safe (
351
- dates : Series , delta : bool = False , year : bool = False , days : bool = False
352
- ):
353
- d : Dict [str , Any ] = {}
351
+ def parse_dates_safe (dates , delta = False , year = False , days = False ):
352
+ d = {}
354
353
if is_datetime64_dtype (dates .values ):
355
354
if delta :
356
- time_delta : Series = dates - stata_epoch
355
+ time_delta = dates - stata_epoch
357
356
d ["delta" ] = time_delta .values .astype (np .int64 ) // 1000 # microseconds
358
357
if days or year :
358
+ # ignore since mypy reports that DatetimeIndex has no year/month
359
359
date_index = DatetimeIndex (dates )
360
- d ["year" ], d ["month" ] = date_index .year , date_index .month
360
+ d ["year" ] = date_index .year # type: ignore
361
+ d ["month" ] = date_index .month # type: ignore
361
362
if days :
362
- days_in_ns : Series = dates .astype (np .int64 ) - to_datetime (
363
+ days_in_ns = dates .astype (np .int64 ) - to_datetime (
363
364
d ["year" ], format = "%Y"
364
365
).astype (np .int64 )
365
366
d ["days" ] = days_in_ns // NS_PER_DAY
@@ -588,10 +589,10 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
588
589
categories = catarray .cat .categories
589
590
self .value_labels = list (zip (np .arange (len (categories )), categories ))
590
591
self .value_labels .sort (key = lambda x : x [0 ])
591
- self .text_len = np . int32 ( 0 )
592
- self .off = []
593
- self .val = []
594
- self .txt = []
592
+ self .text_len = 0
593
+ self .off : List [ int ] = []
594
+ self .val : List [ int ] = []
595
+ self .txt : List [ bytes ] = []
595
596
self .n = 0
596
597
597
598
# Compute lengths and setup lists of offsets and labels
@@ -2131,7 +2132,7 @@ def _prepare_categoricals(self, data: DataFrame) -> DataFrame:
2131
2132
2132
2133
is_cat = [is_categorical_dtype (data [col ]) for col in data ]
2133
2134
self ._is_col_cat = is_cat
2134
- self ._value_labels = []
2135
+ self ._value_labels : List [ StataValueLabel ] = []
2135
2136
if not any (is_cat ):
2136
2137
return data
2137
2138
@@ -2290,8 +2291,8 @@ def _check_column_names(self, data: DataFrame) -> DataFrame:
2290
2291
return data
2291
2292
2292
2293
def _set_formats_and_types (self , dtypes : Series ) -> None :
2293
- self .typlist = []
2294
- self .fmtlist = []
2294
+ self .fmtlist : List [ str ] = []
2295
+ self .typlist : List [ int ] = []
2295
2296
for col , dtype in dtypes .items ():
2296
2297
self .fmtlist .append (_dtype_to_default_stata_fmt (dtype , self .data [col ]))
2297
2298
self .typlist .append (_dtype_to_stata_type (dtype , self .data [col ]))
0 commit comments