@@ -291,53 +291,47 @@ def _extract_zip(from_path: str, to_path: str, compression: Optional[str]) -> No
291
291
}
292
292
293
293
294
- def _verify_archive_type (archive_type : str ) -> None :
295
- if archive_type not in _ARCHIVE_EXTRACTORS .keys ():
296
- valid_types = "', '" .join (_ARCHIVE_EXTRACTORS .keys ())
297
- raise RuntimeError (f"Unknown archive type '{ archive_type } '. Known archive types are '{ valid_types } '." )
298
-
294
+ def _detect_file_type (file : str ) -> Tuple [str , Optional [str ], Optional [str ]]:
295
+ """Detect the archive type and/or compression of a file.
299
296
300
- def _verify_compression (compression : str ) -> None :
301
- if compression not in _COMPRESSED_FILE_OPENERS .keys ():
302
- valid_types = "', '" .join (_COMPRESSED_FILE_OPENERS .keys ())
303
- raise RuntimeError (f"Unknown compression '{ compression } '. Known compressions are '{ valid_types } '." )
297
+ Args:
298
+ file (str): the filename
304
299
300
+ Returns:
301
+ (tuple): tuple of suffix, archive type, and compression
305
302
306
- def _detect_file_type ( file : str ) -> Tuple [ str , Optional [ str ], Optional [ str ]] :
307
- path = pathlib . Path ( file )
308
- suffix = path . suffix
303
+ Raises :
304
+ RuntimeError: if file has no suffix or suffix is not supported
305
+ """
309
306
suffixes = pathlib .Path (file ).suffixes
310
307
if not suffixes :
311
308
raise RuntimeError (
312
309
f"File '{ file } ' has no suffixes that could be used to detect the archive type and compression."
313
310
)
314
- elif len (suffixes ) > 2 :
315
- raise RuntimeError (
316
- "Archive type and compression detection only works for 1 or 2 suffixes. " f"Got { len (suffixes )} instead."
317
- )
318
- elif len (suffixes ) == 2 :
319
- # if we have exactly two suffixes we assume the first one is the archive type and the second on is the
320
- # compression
321
- archive_type , compression = suffixes
322
- _verify_archive_type (archive_type )
323
- _verify_compression (compression )
324
- return "" .join (suffixes ), archive_type , compression
311
+ suffix = suffixes [- 1 ]
325
312
326
313
# check if the suffix is a known alias
327
- with contextlib . suppress ( KeyError ) :
314
+ if suffix in _FILE_TYPE_ALIASES :
328
315
return (suffix , * _FILE_TYPE_ALIASES [suffix ])
329
316
330
317
# check if the suffix is an archive type
331
- with contextlib .suppress (RuntimeError ):
332
- _verify_archive_type (suffix )
318
+ if suffix in _ARCHIVE_EXTRACTORS :
333
319
return suffix , suffix , None
334
320
335
321
# check if the suffix is a compression
336
- with contextlib .suppress (RuntimeError ):
337
- _verify_compression (suffix )
322
+ if suffix in _COMPRESSED_FILE_OPENERS :
323
+ # check for suffix hierarchy
324
+ if len (suffixes ) > 1 :
325
+ suffix2 = suffixes [- 2 ]
326
+
327
+ # check if the suffix2 is an archive type
328
+ if suffix2 in _ARCHIVE_EXTRACTORS :
329
+ return suffix2 + suffix , suffix2 , suffix
330
+
338
331
return suffix , None , suffix
339
332
340
- raise RuntimeError (f"Suffix '{ suffix } ' is neither recognized as archive type nor as compression." )
333
+ valid_suffixes = sorted (set (_FILE_TYPE_ALIASES ) | set (_ARCHIVE_EXTRACTORS ) | set (_COMPRESSED_FILE_OPENERS ))
334
+ raise RuntimeError (f"Unknown compression or archive type: '{ suffix } '.\n Known suffixes are: '{ valid_suffixes } '." )
341
335
342
336
343
337
def _decompress (from_path : str , to_path : Optional [str ] = None , remove_finished : bool = False ) -> str :
0 commit comments