Skip to content

Commit 7c57312

Browse files
Simplified method signature to save_results, as download context is already understood.
1 parent e418d29 commit 7c57312

6 files changed

Lines changed: 75 additions & 71 deletions

File tree

README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,7 @@ source venv/bin/activate
9595

9696
### Basic Usage
9797

98-
The following examples will download Article PDFs to a 'downloads' directory,
99-
relative to your current working directory, and name the files `<doi>`.pdf. Run all code in your [Virtual Environment](#install).
98+
The following examples will download Article PDFs to a 'downloads' directory, and name the files `<doi>`.pdf. All file & directory paths are relative to your current working directory (pwd). Run all code in your [Virtual Environment](#install).
10099

101100
**Instantiate client**
102101
```python
@@ -121,6 +120,11 @@ tdm.download_pdfs(["10.1111/jtsb.12390", "10.1111/jlse.12141"])
121120
tdm.download_pdfs("dois.txt")
122121
```
123122

123+
**Save download results to file (default: results.csv)**
124+
```python
125+
tdm.save_results()
126+
```
127+
124128
**More examples**
125129

126130
See more [examples](examples/).

examples/example3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,4 @@
3232
tdm.download_pdfs("oa-dois.txt")
3333

3434
# Save the download results to a CSV file: 'results.csv'
35-
tdm.save_download_results()
35+
tdm.save_results()

src/wiley_tdm/tdm_client.py

Lines changed: 59 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def __init__(
105105
# Public attributes (set via public properties)
106106
self.download_dir = download_dir
107107
self.api_rate_limit = TDMClient.API_RATE_LIMIT
108-
self.download_results_errors_only: bool = False
108+
self.only_record_errors: bool = False
109109
self.skip_existing_files: bool = True
110110

111111
# Private attributes
@@ -114,7 +114,7 @@ def __init__(
114114
self._api_token: str
115115
self._api_headers: dict[str, str] = {}
116116
self._api_session: requests.Session
117-
self._download_results: List[DownloadResult] = []
117+
self._results: List[DownloadResult] = []
118118

119119
self._validate_api_token(api_token)
120120
self._create_api_session()
@@ -260,9 +260,57 @@ def download_pdf(self, doi: str) -> DownloadResult:
260260
duration = time.perf_counter() - start_time
261261
result.duration = duration
262262

263-
self._add_download_result(result)
263+
self._add_result(result)
264264
return result
265265

266+
def download_pdfs(
267+
self,
268+
dois: Union[List[str], StrPath],
269+
on_result: Optional[Callable[[DownloadResult], None]] = None,
270+
) -> List[DownloadResult]:
271+
"""Download multiple PDFs from either a list of DOIs or a file containing DOIs.
272+
273+
Args:
274+
dois: Either a list of DOI strings or a path to a file containing DOIs (one per line)
275+
on_result: Optional callback function called after each download
276+
Relative paths are resolved from the current working directory
277+
278+
Returns:
279+
List[DownloadResult]: List of download results
280+
281+
Raises:
282+
FileNotFoundError: If dois is a file path that doesn't exist
283+
ValueError: If dois list is empty
284+
"""
285+
# Check if `dois` is a string or Path (matches StrPath type alias)
286+
if isinstance(dois, (str, Path)):
287+
dois_list: List[str] = FileUtils.read_lines_from_file(dois)
288+
self.logger.info("Found %d DOIs in %s", len(dois_list), dois)
289+
# Explicitly check that `dois` is a list of strings
290+
elif isinstance(dois, list):
291+
dois_list: List[str] = dois
292+
else:
293+
raise TypeError("Invalid type for `dois`. Expected List[str] or StrPath.")
294+
295+
if not dois_list:
296+
raise ValueError("DOIs list cannot be empty")
297+
298+
unique_dois: List[str] = DOIUtils.dedupe(dois_list)
299+
results: List[DownloadResult] = []
300+
301+
for doi in unique_dois:
302+
result = self.download_pdf(doi)
303+
if result:
304+
results.append(result)
305+
if on_result:
306+
on_result(result)
307+
if result.status != DownloadStatus.EXISTING_FILE:
308+
self.logger.debug(
309+
"Rate limiting: sleeping for %s seconds", self.api_rate_limit
310+
)
311+
time.sleep(self.api_rate_limit)
312+
return results
313+
266314
def _download_pdf(self, doi: str) -> DownloadResult:
267315
"""Download an article PDF given its DOI.
268316
@@ -358,54 +406,7 @@ def _save_pdf(self, response: requests.Response, doi: str) -> DownloadResult:
358406
except IOError as e:
359407
return DownloadResult(doi, DownloadStatus.STORAGE_ERROR, str(e), pdf_path)
360408

361-
def download_pdfs(
362-
self,
363-
dois: Union[List[str], StrPath],
364-
on_result: Optional[Callable[[DownloadResult], None]] = None,
365-
) -> List[DownloadResult]:
366-
"""Download multiple PDFs from either a list of DOIs or a file containing DOIs.
367-
368-
Args:
369-
dois: Either a list of DOI strings or a path to a file containing DOIs (one per line)
370-
on_result: Optional callback function called after each download
371-
372-
Returns:
373-
List[DownloadResult]: List of download results
374-
375-
Raises:
376-
FileNotFoundError: If dois is a file path that doesn't exist
377-
ValueError: If dois list is empty
378-
"""
379-
# Check if `dois` is a string or Path (matches StrPath type alias)
380-
if isinstance(dois, (str, Path)):
381-
dois_list: List[str] = FileUtils.read_lines_from_file(dois)
382-
self.logger.info("Found %d DOIs in %s", len(dois_list), dois)
383-
# Explicitly check that `dois` is a list of strings
384-
elif isinstance(dois, list):
385-
dois_list: List[str] = dois
386-
else:
387-
raise TypeError("Invalid type for `dois`. Expected List[str] or StrPath.")
388-
389-
if not dois_list:
390-
raise ValueError("DOIs list cannot be empty")
391-
392-
unique_dois: List[str] = DOIUtils.dedupe(dois_list)
393-
results: List[DownloadResult] = []
394-
395-
for doi in unique_dois:
396-
result = self.download_pdf(doi)
397-
if result:
398-
results.append(result)
399-
if on_result:
400-
on_result(result)
401-
if result.status != DownloadStatus.EXISTING_FILE:
402-
self.logger.debug(
403-
"Rate limiting: sleeping for %s seconds", self.api_rate_limit
404-
)
405-
time.sleep(self.api_rate_limit)
406-
return results
407-
408-
def _add_download_result(self, result: DownloadResult) -> None:
409+
def _add_result(self, result: DownloadResult) -> None:
409410
"""Record download result.
410411
411412
Args:
@@ -415,37 +416,35 @@ def _add_download_result(self, result: DownloadResult) -> None:
415416
message = f"DOI: {result.doi} - {result.status}"
416417

417418
if result.status in (DownloadStatus.SUCCESS, DownloadStatus.EXISTING_FILE):
418-
if self.download_results_errors_only:
419+
if self.only_record_errors:
419420
return
420421
self.logger.info(message)
421422
else:
422423
self.logger.error(message)
423424

424-
self._download_results.append(result)
425+
self._results.append(result)
425426

426427
@property
427-
def download_results(self) -> List[DownloadResult]:
428+
def results(self) -> List[DownloadResult]:
428429
"""Get the current download results.
429430
430431
Returns:
431432
List[DownloadResult]: List of all download attempts and their results
432433
"""
433-
return (
434-
self._download_results.copy()
435-
) # Return copy to prevent external modification
434+
return self._results.copy() # Return copy to prevent external modification
436435

437-
def save_download_results(self, csv_path: StrPath = RESULTS_FILE) -> None:
436+
def save_results(self, csv_path: StrPath = RESULTS_FILE) -> None:
438437
"""Save download results to CSV file. Defaults to 'results.csv'
439438
440439
Args:
441440
csv_path: Path where CSV file will be saved. Can be either:
442441
- A string path
443442
- A PathLike object (recommended)
444-
Relative paths are resolved from the current working directory.
443+
Relative paths are resolved from the current working directory
445444
Returns:
446445
None
447446
448447
Raises:
449448
OSError: If the file cannot be written due to permissions or other OS issues
450449
"""
451-
TDMReporting.save_download_results(self._download_results, csv_path)
450+
TDMReporting.save_results(self._results, csv_path)

src/wiley_tdm/tdm_reporting.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,15 @@ class TDMReporting:
2626
]
2727

2828
@staticmethod
29-
def save_download_results(results: List[DownloadResult], csv_path: StrPath) -> None:
29+
def save_results(results: List[DownloadResult], csv_path: StrPath) -> None:
3030
"""Save download results to CSV file.
3131
3232
Args:
3333
results: List of download results to save
3434
csv_path: Path to the CSV file. Can be either:
3535
- A PathLike object (recommend): Path("downloads") / "results.csv"
3636
- A string: "downloads/results.csv" Will be converted to Path internally
37+
Relative paths are resolved from the current working directory
3738
3839
Note:
3940
Recommend using Path objects for platform-independent path handling.

tests/test_tdm_client.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def test_multiple_downloads(tdm):
101101
"""Test multiple PDF downloads."""
102102
results = tdm.download_pdfs(OA_DOIS)
103103
assert len(results) == len(OA_DOIS)
104-
assert len(tdm.download_results) == len(results)
104+
assert len(tdm.results) == len(results)
105105

106106
for result in results:
107107
assert_successful_download(result, result.doi)
@@ -112,21 +112,21 @@ def test_multiple_downloads_from_file(tdm):
112112

113113
results = tdm.download_pdfs(DOIS_FILE)
114114
assert len(results) == DOIS_FILE_COUNT
115-
assert len(tdm.download_results) == len(results)
115+
assert len(tdm.results) == len(results)
116116

117117
for result in results:
118118
assert_successful_download(result, result.doi)
119119

120120

121-
def test_save_download_results(tdm, tmp_path):
121+
def test_save_results(tdm, tmp_path):
122122
"""Test saving download results to CSV file."""
123123
# First generate some download results
124124
result = tdm.download_pdf(OA_DOI)
125125
assert_successful_download(result, OA_DOI)
126126

127127
# Save results to temporary CSV file
128128
csv_path = tmp_path / "results.csv"
129-
tdm.save_download_results(str(csv_path))
129+
tdm.save_results(str(csv_path))
130130

131131
# Verify file exists and contains data
132132
assert csv_path.exists()

tests/test_tdm_reporting.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,12 @@ def results_fixture():
4747
]
4848

4949

50-
def test_save_download_results(tmp_path, results):
50+
def test_save_results(tmp_path, results):
5151
"""Test saving results to CSV with all fields."""
5252

5353
csv_path = tmp_path / TEST_CSV
5454

55-
TDMReporting.save_download_results(results, csv_path)
55+
TDMReporting.save_results(results, csv_path)
5656

5757
assert csv_path.exists()
5858

@@ -87,6 +87,6 @@ def test_save_empty_results(tmp_path):
8787
"""Test handling of empty results list."""
8888

8989
csv_path = tmp_path / TEST_CSV
90-
TDMReporting.save_download_results([], str(csv_path))
90+
TDMReporting.save_results([], str(csv_path))
9191

9292
assert not csv_path.exists()

0 commit comments

Comments
 (0)