@@ -178,6 +178,7 @@ class BatchMessage(Message):
178
178
If none is provided, 'jsonl' will be assumed. e.g. 'csv'.
179
179
* compression (string, optional) - An indication of file compression format. e.g. 'gzip'.
180
180
* batch_size (int, optional) - Number of records in this batch. e.g. 100000.
181
+ * time_extracted (datetime, optional) - TZ-aware datetime with batch extraction time.
181
182
182
183
If file_properties are not provided, uncompressed jsonl files are assumed.
183
184
@@ -192,12 +193,19 @@ class BatchMessage(Message):
192
193
193
194
"""
194
195
195
- def __init__ (self , stream , filepath , file_format = None , compression = None , batch_size = None ):
196
+ def __init__ (
197
+ self , stream , filepath , file_format = None , compression = None ,
198
+ batch_size = None , time_extracted = None
199
+ ):
196
200
self .stream = stream
197
201
self .filepath = filepath
198
202
self .format = file_format or 'jsonl'
199
203
self .compression = compression
200
204
self .batch_size = batch_size
205
+ self .time_extracted = time_extracted
206
+ if time_extracted and not time_extracted .tzinfo :
207
+ raise ValueError ("'time_extracted' must be either None " +
208
+ "or an aware datetime (with a time zone)" )
201
209
202
210
def asdict (self ):
203
211
result = {
@@ -210,6 +218,9 @@ def asdict(self):
210
218
result ['compression' ] = self .compression
211
219
if self .batch_size is not None :
212
220
result ['batch_size' ] = self .batch_size
221
+ if self .time_extracted :
222
+ as_utc = self .time_extracted .astimezone (pytz .utc )
223
+ result ['time_extracted' ] = u .strftime (as_utc )
213
224
return result
214
225
215
226
@@ -262,11 +273,22 @@ def parse_message(msg):
262
273
version = _required_key (obj , 'version' ))
263
274
264
275
elif msg_type == 'BATCH' :
265
- return BatchMessage (stream = _required_key (obj , 'stream' ),
266
- filepath = _required_key (obj , 'filepath' ),
267
- file_format = _required_key (obj , 'format' ),
268
- compression = obj .get ('compression' ),
269
- batch_size = obj .get ('batch_size' ))
276
+ time_extracted = obj .get ('time_extracted' )
277
+ if time_extracted :
278
+ try :
279
+ time_extracted = ciso8601 .parse_datetime (time_extracted )
280
+ except :
281
+ LOGGER .warning ("unable to parse time_extracted with ciso8601 library" )
282
+ time_extracted = None
283
+
284
+ return BatchMessage (
285
+ stream = _required_key (obj , 'stream' ),
286
+ filepath = _required_key (obj , 'filepath' ),
287
+ file_format = _required_key (obj , 'format' ),
288
+ compression = obj .get ('compression' ),
289
+ batch_size = obj .get ('batch_size' ),
290
+ time_extracted = time_extracted
291
+ )
270
292
271
293
else :
272
294
return None
@@ -342,7 +364,7 @@ def write_version(stream_name, version):
342
364
343
365
def write_batch (
344
366
stream_name , filepath , file_format = None ,
345
- compression = None , batch_size = None
367
+ compression = None , batch_size = None , time_extracted = None
346
368
):
347
369
"""Write a batch message.
348
370
@@ -352,4 +374,13 @@ def write_batch(
352
374
compression = None
353
375
batch_size = 100000
354
376
"""
355
- write_message (BatchMessage (stream_name , filepath , file_format , compression , batch_size ))
377
+ write_message (
378
+ BatchMessage (
379
+ stream = stream_name ,
380
+ filepath = filepath ,
381
+ file_format = file_format ,
382
+ compression = compression ,
383
+ batch_size = batch_size ,
384
+ time_extracted = time_extracted
385
+ )
386
+ )
0 commit comments