Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 51 additions & 7 deletions docling_core/types/doc/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -1188,6 +1188,7 @@ class TextItem(DocItem):
DocItemLabel.REFERENCE,
DocItemLabel.TEXT,
DocItemLabel.EMPTY_VALUE,
DocItemLabel.FORM_KEY,
]

orig: str # untreated representation
Expand Down Expand Up @@ -1920,14 +1921,55 @@ def export_to_document_tokens(
text = serializer.serialize(item=self).text
return text

class CheckboxItem(ListItem):
"""FormTextItem."""

label: typing.Literal[DocItemLabel.CHECKBOX] = DocItemLabel.CHECKBOX

checked: bool = False

"""
class FormHeaderItem(SectionHeaderItem):

label: typing.Literal[DocItemLabel.FORM_HEADER] = DocItemLabel.FORM_HEADER

class FormTextItem(TextItem):

label: typing.Literal[DocItemLabel.FORM_TEXT] = DocItemLabel.FORM_TEXT
"""

class FormListItem(DocItem):
"""FormListItem."""

label: typing.Literal[DocItemLabel.FORM_LISTITEM] = DocItemLabel.FORM_LISTITEM

marker: Optional[TextItem] = None

key: TextItem

def add_value(self, item: Union[CheckboxItem, ListItem, TextItem]) -> NodeItem:
item.parent = self.get_ref()
self.children.append(item)

return item



class FormItem(FloatingItem):
"""FormItem."""

label: typing.Literal[DocItemLabel.FORM] = DocItemLabel.FORM

graph: GraphData
def add(self, item: Union["FormItem", SectionHeaderItem, TextItem, FormListItem]) -> NodeItem:
item.parent = self.get_ref()
self.children.append(item.get_ref())

return item

def add_listitem(self, doc: DoclingDocument, prov: Optional[ProvenanceItem] = None) -> NodeItem:
li = FormListItem(self_ref=self.get_ref())
return item


ContentItem = Annotated[
Union[
Expand All @@ -1940,6 +1982,7 @@ class FormItem(FloatingItem):
PictureItem,
TableItem,
KeyValueItem,
FormItem,
],
Field(discriminator="label"),
]
Expand Down Expand Up @@ -2987,7 +3030,7 @@ def add_key_values(

def add_form(
self,
graph: GraphData,
form: Optional[FormItem] = None,
prov: Optional[ProvenanceItem] = None,
parent: Optional[NodeItem] = None,
):
Expand All @@ -3003,11 +3046,12 @@ def add_form(
form_index = len(self.form_items)
cref = f"#/form_items/{form_index}"

form_item = FormItem(
graph=graph,
self_ref=cref,
parent=parent.get_ref(),
)
if form is None:
form = FormItem(
self_ref=cref,
parent=parent.get_ref(),
)

if prov:
form_item.prov.append(prov)

Expand Down
5 changes: 5 additions & 0 deletions docling_core/types/doc/labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ class DocItemLabel(str, Enum):
HANDWRITTEN_TEXT = "handwritten_text"
EMPTY_VALUE = "empty_value" # used for empty value fields in fillable forms

# FORM_HEADER = "form_header"
FORM_KEY = "form_key"
FORM_LISTITEM = "form_listitem"
CHECKBOX = "checkbox"

# Additional labels for markup-based formats (e.g. HTML, Word)
PARAGRAPH = "paragraph"
REFERENCE = "reference"
Expand Down
2 changes: 1 addition & 1 deletion test/data/docling_document/unit/FormItem.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ graph:
source_cell_id: 1
target_cell_id: 0
image: null
label: form
label: key_value_region
parent: null
prov: []
references: []
Expand Down
38 changes: 36 additions & 2 deletions test/test_docling_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from docling_core.types.doc.base import BoundingBox, CoordOrigin, ImageRefMode, Size
from docling_core.types.doc.document import ( # BoundingBox,
CURRENT_VERSION,
CheckboxItem,
CodeItem,
ContentLayer,
DocItem,
Expand Down Expand Up @@ -42,6 +43,8 @@
TableItem,
TextItem,
TitleItem,
CheckboxItem,
FormListItem,
)
from docling_core.types.doc.labels import (
DocItemLabel,
Expand Down Expand Up @@ -491,6 +494,7 @@ def verify(dc, obj):

elif dc is FormItem:

"""
graph = GraphData(
cells=[
GraphCell(
Expand Down Expand Up @@ -524,7 +528,31 @@ def verify(dc, obj):
self_ref="#",
)
verify(dc, obj)
"""

key_name = TextItem(text="name", orig="name", self_ref="#", label=DocItemLabel.FORM_KEY)
val_name = TextItem(text="John Doe", orig="name", self_ref="#", label=DocItemLabel.TEXT)

form_item_name = FormListItem(key=key_name, self_ref="#")
form_item_name.add_value(val_name)

key_age = TextItem(text="Age", orig="Age", self_ref="#", label=DocItemLabel.FORM_KEY)

cb_age_0 = CheckboxItem(checked=True, text="0-20", orig="0-20", self_ref="#")
cb_age_1 = CheckboxItem(checked=False, text="20-40", orig="20-40", self_ref="#")
val_age = TextItem(text="other", orig="other", self_ref="#", label=DocItemLabel.TEXT)

form_item_age = FormListItem(key=key_age, self_ref="#") #, value=[cb_age_0, cb_age_1, val_age])
for _ in [cb_age_0, cb_age_1, val_age]:
form_item_age.add_value(_)

form = FormItem(self_ref="#")

form.add(form_item_name)
form.add(form_item_age)

verify(dc, obj)

elif dc is TitleItem:
obj = dc(
text="whatever",
Expand Down Expand Up @@ -571,8 +599,12 @@ def verify(dc, obj):
text="E=mc^2",
)
verify(dc, obj)
elif dc is GraphData: # we skip this on purpose
elif dc is CheckboxItem: # we skip this on purpose
continue
elif dc is FormListItem: # we skip this on purpose
continue
elif dc is GraphData: # we skip this on purpose
continue
else:
raise RuntimeError(f"New derived class detected {dc.__name__}")

Expand Down Expand Up @@ -1002,8 +1034,10 @@ def _construct_doc() -> DoclingDocument:

doc.add_key_values(graph=graph)

doc.add_form(graph=graph)
form_1 = doc.add_form(graph=graph)

form_1_item_1 = form_1.add_listitem(key="Name")

inline_fmt = doc.add_inline_group()
doc.add_text(
label=DocItemLabel.TEXT, text="Some formatting chops:", parent=inline_fmt
Expand Down
Loading