@@ -67,6 +67,8 @@ def __init__(
67
67
68
68
self .level = 0
69
69
self .listIter = 0
70
+ # Track list counters per numId and ilvl
71
+ self .list_counters : dict [tuple [int , int ], int ] = {}
70
72
71
73
self .history : dict [str , Any ] = {
72
74
"names" : [None ],
@@ -315,6 +317,108 @@ def _get_numId_and_ilvl(
315
317
316
318
return None , None # If the paragraph is not part of a list
317
319
320
+ def _get_list_counter (self , numid : int , ilvl : int ) -> int :
321
+ """Get and increment the counter for a specific numId and ilvl combination."""
322
+ key = (numid , ilvl )
323
+ if key not in self .list_counters :
324
+ self .list_counters [key ] = 0
325
+ self .list_counters [key ] += 1
326
+ return self .list_counters [key ]
327
+
328
+ def _reset_list_counters_for_new_sequence (self , numid : int ):
329
+ """Reset counters when starting a new numbering sequence."""
330
+ # Reset all counters for this numid
331
+ keys_to_reset = [key for key in self .list_counters .keys () if key [0 ] == numid ]
332
+ for key in keys_to_reset :
333
+ self .list_counters [key ] = 0
334
+
335
+ def _is_numbered_list (self , docx_obj : DocxDocument , numId : int , ilvl : int ) -> bool :
336
+ """Check if a list is numbered based on its numFmt value."""
337
+ try :
338
+ # Access the numbering part of the document
339
+ if not hasattr (docx_obj , "part" ) or not hasattr (docx_obj .part , "package" ):
340
+ return False
341
+
342
+ numbering_part = None
343
+ # Find the numbering part
344
+ for part in docx_obj .part .package .parts :
345
+ if "numbering" in part .partname :
346
+ numbering_part = part
347
+ break
348
+
349
+ if numbering_part is None :
350
+ return False
351
+
352
+ # Parse the numbering XML
353
+ numbering_root = numbering_part .element
354
+ namespaces = {
355
+ "w" : "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
356
+ }
357
+
358
+ # Find the numbering definition with the given numId
359
+ num_xpath = f".//w:num[@w:numId='{ numId } ']"
360
+ num_element = numbering_root .find (num_xpath , namespaces = namespaces )
361
+
362
+ if num_element is None :
363
+ return False
364
+
365
+ # Get the abstractNumId from the num element
366
+ abstract_num_id_elem = num_element .find (
367
+ ".//w:abstractNumId" , namespaces = namespaces
368
+ )
369
+ if abstract_num_id_elem is None :
370
+ return False
371
+
372
+ abstract_num_id = abstract_num_id_elem .get (
373
+ "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"
374
+ )
375
+ if abstract_num_id is None :
376
+ return False
377
+
378
+ # Find the abstract numbering definition
379
+ abstract_num_xpath = (
380
+ f".//w:abstractNum[@w:abstractNumId='{ abstract_num_id } ']"
381
+ )
382
+ abstract_num_element = numbering_root .find (
383
+ abstract_num_xpath , namespaces = namespaces
384
+ )
385
+
386
+ if abstract_num_element is None :
387
+ return False
388
+
389
+ # Find the level definition for the given ilvl
390
+ lvl_xpath = f".//w:lvl[@w:ilvl='{ ilvl } ']"
391
+ lvl_element = abstract_num_element .find (lvl_xpath , namespaces = namespaces )
392
+
393
+ if lvl_element is None :
394
+ return False
395
+
396
+ # Get the numFmt element
397
+ num_fmt_element = lvl_element .find (".//w:numFmt" , namespaces = namespaces )
398
+ if num_fmt_element is None :
399
+ return False
400
+
401
+ num_fmt = num_fmt_element .get (
402
+ "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"
403
+ )
404
+
405
+ # Numbered formats include: decimal, lowerRoman, upperRoman, lowerLetter, upperLetter
406
+ # Bullet formats include: bullet
407
+ numbered_formats = {
408
+ "decimal" ,
409
+ "lowerRoman" ,
410
+ "upperRoman" ,
411
+ "lowerLetter" ,
412
+ "upperLetter" ,
413
+ "decimalZero" ,
414
+ }
415
+
416
+ return num_fmt in numbered_formats
417
+
418
+ except Exception as e :
419
+ _log .debug (f"Error determining if list is numbered: { e } " )
420
+ return False
421
+
318
422
def _get_heading_and_level (self , style_label : str ) -> tuple [str , Optional [int ]]:
319
423
parts = self ._split_text_and_number (style_label )
320
424
@@ -713,8 +817,6 @@ def _handle_text_elements( # noqa: C901
713
817
# Common styles for bullet and numbered lists.
714
818
# "List Bullet", "List Number", "List Paragraph"
715
819
# Identify whether list is a numbered list or not
716
- # is_numbered = "List Bullet" not in paragraph.style.name
717
- is_numbered = False
718
820
p_style_id , p_level = self ._get_label_and_level (paragraph )
719
821
numid , ilevel = self ._get_numId_and_ilvl (paragraph )
720
822
@@ -727,6 +829,9 @@ def _handle_text_elements( # noqa: C901
727
829
and ilevel is not None
728
830
and p_style_id not in ["Title" , "Heading" ]
729
831
):
832
+ # Check if this is actually a numbered list by examining the numFmt
833
+ is_numbered = self ._is_numbered_list (docx_obj , numid , ilevel )
834
+
730
835
self ._add_list_item (
731
836
doc = doc ,
732
837
numid = numid ,
@@ -983,15 +1088,19 @@ def _add_list_item(
983
1088
if self ._prev_numid () is None : # Open new list
984
1089
self .level_at_new_list = level
985
1090
1091
+ # Reset counters for the new numbering sequence
1092
+ self ._reset_list_counters_for_new_sequence (numid )
1093
+
986
1094
self .parents [level ] = doc .add_list_group (
987
1095
name = "list" , parent = self .parents [level - 1 ]
988
1096
)
989
1097
990
1098
# Set marker and enumerated arguments if this is an enumeration element.
991
- self .listIter += 1
992
1099
if is_numbered :
993
- enum_marker = str (self .listIter ) + "."
994
- is_numbered = True
1100
+ counter = self ._get_list_counter (numid , ilevel )
1101
+ enum_marker = str (counter ) + "."
1102
+ else :
1103
+ enum_marker = ""
995
1104
self ._add_formatted_list_item (
996
1105
doc , elements , enum_marker , is_numbered , level
997
1106
)
@@ -1005,16 +1114,16 @@ def _add_list_item(
1005
1114
self .level_at_new_list + prev_indent + 1 ,
1006
1115
self .level_at_new_list + ilevel + 1 ,
1007
1116
):
1008
- self .listIter = 0
1009
1117
self .parents [i ] = doc .add_list_group (
1010
1118
name = "list" , parent = self .parents [i - 1 ]
1011
1119
)
1012
1120
1013
1121
# TODO: Set marker and enumerated arguments if this is an enumeration element.
1014
- self .listIter += 1
1015
1122
if is_numbered :
1016
- enum_marker = str (self .listIter ) + "."
1017
- is_numbered = True
1123
+ counter = self ._get_list_counter (numid , ilevel )
1124
+ enum_marker = str (counter ) + "."
1125
+ else :
1126
+ enum_marker = ""
1018
1127
self ._add_formatted_list_item (
1019
1128
doc ,
1020
1129
elements ,
@@ -1033,25 +1142,26 @@ def _add_list_item(
1033
1142
self .parents [k ] = None
1034
1143
1035
1144
# TODO: Set marker and enumerated arguments if this is an enumeration element.
1036
- self .listIter += 1
1037
1145
if is_numbered :
1038
- enum_marker = str (self .listIter ) + "."
1039
- is_numbered = True
1146
+ counter = self ._get_list_counter (numid , ilevel )
1147
+ enum_marker = str (counter ) + "."
1148
+ else :
1149
+ enum_marker = ""
1040
1150
self ._add_formatted_list_item (
1041
1151
doc ,
1042
1152
elements ,
1043
1153
enum_marker ,
1044
1154
is_numbered ,
1045
1155
self .level_at_new_list + ilevel ,
1046
1156
)
1047
- self .listIter = 0
1048
1157
1049
1158
elif self ._prev_numid () == numid or prev_indent == ilevel :
1050
1159
# TODO: Set marker and enumerated arguments if this is an enumeration element.
1051
- self .listIter += 1
1052
1160
if is_numbered :
1053
- enum_marker = str (self .listIter ) + "."
1054
- is_numbered = True
1161
+ counter = self ._get_list_counter (numid , ilevel )
1162
+ enum_marker = str (counter ) + "."
1163
+ else :
1164
+ enum_marker = ""
1055
1165
self ._add_formatted_list_item (
1056
1166
doc , elements , enum_marker , is_numbered , level - 1
1057
1167
)
0 commit comments