@@ -393,31 +393,22 @@ def detect_document(path):
393
393
394
394
document = image .detect_full_text ()
395
395
396
- for b , page in enumerate (document .pages ):
397
- page_text = ''
396
+ for page in document .pages :
397
+ for block in page .blocks :
398
+ block_words = []
399
+ for paragraph in block .paragraphs :
400
+ block_words .extend (paragraph .words )
398
401
399
- for bb , block in enumerate (page .blocks ):
400
- block_text = ''
401
-
402
- for p , paragraph in enumerate (block .paragraphs ):
403
- para_text = ''
404
-
405
- for w , word in enumerate (paragraph .words ):
406
- word_text = ''
407
-
408
- for s , symbol in enumerate (word .symbols ):
409
- word_text = word_text + symbol .text
402
+ block_symbols = []
403
+ for word in block_words :
404
+ block_symbols .extend (word .symbols )
410
405
411
- para_text = para_text + word_text
412
-
413
- block_text = block_text + para_text
414
- print ('\n --\n Content Block: {}' .format (block_text ))
415
- print ('Block Bounding Box:\n {}' .format (block .bounding_box ))
416
-
417
- page_text = page_text + block_text
406
+ block_text = ''
407
+ for symbol in block_symbols :
408
+ block_text = block_text + symbol .text
418
409
419
- print ('Page Content:\n {}' .format (page_text ))
420
- print ('Page Dimensions: w: {} h: {} ' .format (page . width , page . height ))
410
+ print ('Block Content: {}' .format (block_text ))
411
+ print ('Block Bounds: \n {} ' .format (block . bounding_box ))
421
412
422
413
423
414
def detect_document_uri (uri ):
@@ -428,31 +419,22 @@ def detect_document_uri(uri):
428
419
429
420
document = image .detect_full_text ()
430
421
431
- for b , page in enumerate (document .pages ):
432
- page_text = ''
422
+ for page in document .pages :
423
+ for block in page .blocks :
424
+ block_words = []
425
+ for paragraph in block .paragraphs :
426
+ block_words .extend (paragraph .words )
433
427
434
- for bb , block in enumerate (page .blocks ):
435
- block_text = ''
436
-
437
- for p , paragraph in enumerate (block .paragraphs ):
438
- para_text = ''
439
-
440
- for w , word in enumerate (paragraph .words ):
441
- word_text = ''
442
-
443
- for s , symbol in enumerate (word .symbols ):
444
- word_text = word_text + symbol .text
428
+ block_symbols = []
429
+ for word in block_words :
430
+ block_symbols .extend (word .symbols )
445
431
446
- para_text = para_text + word_text
447
-
448
- block_text = block_text + para_text
449
- print ('\n --\n Content Block: {}' .format (block_text ))
450
- print ('Block Bounding Box:\n {}' .format (block .bounding_box ))
451
-
452
- page_text = page_text + block_text
432
+ block_text = ''
433
+ for symbol in block_symbols :
434
+ block_text = block_text + symbol .text
453
435
454
- print ('Page Content:\n {}' .format (page_text ))
455
- print ('Page Dimensions: w: {} h: {} ' .format (page . width , page . height ))
436
+ print ('Block Content: {}' .format (block_text ))
437
+ print ('Block Bounds: \n {} ' .format (block . bounding_box ))
456
438
457
439
458
440
def run_local (args ):
0 commit comments