Skip to content

Commit 518ead3

Browse files
authored
Ruff fixes (#40)
* Reformatted codebase with new ruff preview * Ruff fixes for utils * ruff: more fixes * ruff: remaining utils issues * Fix Path usages in utils * 2 more ruff ignores
1 parent 29fe5a3 commit 518ead3

32 files changed

+271
-185
lines changed

libpdf/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from libpdf.core import main_cli
2323

2424
# define importable objects
25-
__all__ = ["load", "__version__", "__summary__"]
25+
__all__ = ["__summary__", "__version__", "load"]
2626

2727
# Enable running
2828
# python -m libpdf.__init__

libpdf/_import_forks.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
1010
These 2 methods take time, so below solution is a short-term workaround.
1111
"""
12+
1213
import os
1314
import sys
1415

libpdf/catalog.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""PDF catalog extraction."""
2+
23
import logging
34
import re
45
from typing import Any, Dict, List, Union
@@ -211,14 +212,12 @@ def chapter_number_giver(
211212

212213
if chapter_number:
213214
# The assumption is that only one match is found
214-
chapters_in_outline[idx_chapter].update({"number": chapter_number[0]})
215-
chapters_in_outline[idx_chapter].update(
216-
{"title": chapter_title.replace(chapter_number[0], "", 1).strip()}
217-
)
215+
chapter.update({"number": chapter_number[0]})
216+
chapter.update({
217+
"title": chapter_title.replace(chapter_number[0], "", 1).strip()
218+
})
218219
else:
219-
chapters_in_outline[idx_chapter].update(
220-
{"number": f"virt.{new_hierarchical_level}"}
221-
)
220+
chapter.update({"number": f"virt.{new_hierarchical_level}"})
222221

223222
if chapter["content"]:
224223
# next deeper level
@@ -653,9 +652,9 @@ def _resolve_pdf_obj_refs(
653652
)
654653
resolved_dict[key] = ret_list
655654
else:
656-
resolved_dict[
657-
key
658-
] = resolved # add resolved element to dictionary
655+
resolved_dict[key] = (
656+
resolved # add resolved element to dictionary
657+
)
659658
else:
660659
# leave other types as they are
661660
resolved_dict[key] = value

libpdf/core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# not importing load(), so no circular import when importing from root __init__.py
1111
from libpdf import __summary__, __version__, parameters # pylint: disable=cyclic-import
1212
from libpdf.apiobjects import ApiObjects
13-
from libpdf.extract import LibpdfException, extract
13+
from libpdf.extract import LibpdfError, extract
1414
from libpdf.log import config_logger, get_level_name, set_log_level
1515
from libpdf.parameters import RENDER_ELEMENTS
1616
from libpdf.process import output_dump
@@ -139,7 +139,7 @@ def main( # pylint: disable=too-many-arguments,too-many-locals # no reasonable
139139
no_rects,
140140
overall_pbar,
141141
)
142-
except LibpdfException:
142+
except LibpdfError:
143143
if cli_usage:
144144
LOG.critical("Exiting with code 1")
145145
sys.exit(1)

libpdf/exceptions.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
"""Libpdf exceptions."""
22

33

4-
class LibpdfException(Exception):
4+
class LibpdfError(Exception):
55
"""Generic libpdf exception class."""
6+
7+
8+
class TextContainsNewlineError(ValueError):
9+
"""Text cannot contain newline character."""
10+
11+
def __init__(self, text: str):
12+
super().__init__(f'Input text "{text}" contains a new line character.')

libpdf/extract.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Core routines for PDF extraction."""
2+
23
import itertools
34
import logging
45
import os
@@ -14,7 +15,7 @@
1415
from libpdf import process as pro
1516
from libpdf.apiobjects import ApiObjects
1617
from libpdf.catalog import catalog, extract_catalog
17-
from libpdf.exceptions import LibpdfException
18+
from libpdf.exceptions import LibpdfError
1819
from libpdf.log import logging_needed
1920
from libpdf.models.figure import Figure
2021
from libpdf.models.file import File
@@ -86,7 +87,7 @@ def extract( # pylint: disable=too-many-locals, too-many-branches, too-many-sta
8687
:param no_rects: flag triggering the exclusion of rects
8788
:param overall_pbar: total progress bar for whole libpdf run
8889
:return: instance of Objects class
89-
:raise LibpdfException: PDF contains no pages
90+
:raise LibpdfError: PDF contains no pages
9091
"""
9192
LOG.info("PDF extraction started ...")
9293

@@ -116,7 +117,7 @@ def extract( # pylint: disable=too-many-locals, too-many-branches, too-many-sta
116117
if len(pdf.pages) == 0:
117118
message = "Page range selection: no pages left in the PDF to analyze."
118119
LOG.critical(message)
119-
raise LibpdfException(message)
120+
raise LibpdfError(message)
120121

121122
overall_pbar.update(5)
122123
pdf = delete_page_ann(pdf)
@@ -131,7 +132,7 @@ def extract( # pylint: disable=too-many-locals, too-many-branches, too-many-sta
131132
pages_list = extract_page_metadata(pdf)
132133

133134
if not pages_list:
134-
raise LibpdfException("PDF contains no pages")
135+
raise LibpdfError("PDF contains no pages")
135136

136137
overall_pbar.update(1)
137138

@@ -530,15 +531,15 @@ def _get_datetime_format(date: str):
530531
if "CreationDate" in pdf.metadata:
531532
preprocessed_date = _time_preprocess(pdf.metadata["CreationDate"])
532533
time_format = _get_datetime_format(preprocessed_date)
533-
file_meta_params.update(
534-
{"creation_date": datetime.strptime(preprocessed_date, time_format)}
535-
)
534+
file_meta_params.update({
535+
"creation_date": datetime.strptime(preprocessed_date, time_format)
536+
})
536537
if "ModDate" in pdf.metadata:
537538
preprocessed_date = _time_preprocess(pdf.metadata["ModDate"])
538539
time_format = _get_datetime_format(preprocessed_date)
539-
file_meta_params.update(
540-
{"modified_date": datetime.strptime(preprocessed_date, time_format)}
541-
)
540+
file_meta_params.update({
541+
"modified_date": datetime.strptime(preprocessed_date, time_format)
542+
})
542543
if "Trapped" in pdf.metadata:
543544
file_meta_params.update({"trapped": pdf.metadata["Trapped"]})
544545

@@ -705,7 +706,7 @@ def extract_rects(
705706
)
706707

707708
LOG.info(
708-
f"found rect at {rect_bbox} at page {idx_page+1}: color {non_stroking_color}"
709+
f"found rect at {rect_bbox} at page {idx_page + 1}: color {non_stroking_color}"
709710
)
710711
lt_textbox = lt_textbox_crop(
711712
rect_bbox,
@@ -722,7 +723,9 @@ def extract_rects(
722723
rect_list.append(rect)
723724

724725
else:
725-
LOG.info(f"found no rects on page {idx_page+1}: {page_crop.objects.keys()}")
726+
LOG.info(
727+
f"found no rects on page {idx_page + 1}: {page_crop.objects.keys()}"
728+
)
726729

727730
# return figure_list
728731
return rect_list

libpdf/models/element.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Definition for PDF elements."""
2+
23
from abc import ABC, abstractmethod
34
from typing import TYPE_CHECKING
45

libpdf/models/figure.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Definition for PDF figures."""
2+
23
from typing import TYPE_CHECKING, List
34

45
from libpdf.models.element import Element

libpdf/models/file.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Definition for PDF file."""
2+
23
from typing import TYPE_CHECKING
34

45
from libpdf.models.file_meta import FileMeta

libpdf/models/file_meta.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Definition for PDF file meta data."""
2+
23
from datetime import datetime
34
from typing import TYPE_CHECKING
45

0 commit comments

Comments
 (0)