Skip to content

Commit ff4a693

Browse files
authored
Get parquet contents on remote directories. (#576)
* Always send remote directory contents on read. * Use live nested.
1 parent 340bb60 commit ff4a693

File tree

2 files changed

+3
-5
lines changed

2 files changed

+3
-5
lines changed

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
# empty
1+
git+https://github.com/lincc-frameworks/nested-pandas.git@main

src/hats/io/file_io/file_io.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -303,11 +303,9 @@ def read_parquet_file_to_pandas(file_pointer: str | Path | UPath, **kwargs) -> n
303303
Pandas DataFrame with the data from the parquet file(s)
304304
"""
305305
file_pointer = get_upath(file_pointer)
306-
# If we are trying to read a directory over http, we need to send the explicit list of files instead.
306+
# If we are trying to read a remote directory, we need to send the explicit list of files instead.
307307
# We don't want to get the list unnecessarily because it can be expensive.
308-
if (
309-
isinstance(file_pointer, upath.implementations.http.HTTPPath) and file_pointer.is_dir()
310-
): # pragma: no cover
308+
if file_pointer.protocol not in ("", "file") and file_pointer.is_dir(): # pragma: no cover
311309
file_pointers = [f for f in file_pointer.iterdir() if f.is_file()]
312310
return npd.read_parquet(
313311
file_pointers,

0 commit comments

Comments
 (0)