Skip to content

Commit 5328d61

Browse files
committed
Relax paragraph pattern
Fix #1298 Not all paragraphs in documentations start with a capital letter, as usual English text.
1 parent c54643e commit 5328d61

File tree

2 files changed

+33
-3
lines changed

2 files changed

+33
-3
lines changed

lib/rdoc/generator/darkfish.rb

+12-3
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,8 @@ def template_for file, page = true, klass = ERB
786786
template
787787
end
788788

789-
ParagraphExcerptRegexp = /[A-Z][^\.:\/]+\./
789+
ParagraphExcerptRegexpUnicode = %r[\G\P{Letter}*+\K\p{Letter}[^.:/]+\.]
790+
ParagraphExcerptRegexpOther = %r[\G[^A-Za-z]*+\K[A-Za-z][^.:/]+\.]
790791

791792
# Returns an excerpt of the comment for usage in meta description tags
792793
def excerpt(comment)
@@ -799,11 +800,19 @@ def excerpt(comment)
799800

800801
# Match from a capital letter to the first period, discarding any links, so
801802
# that we don't end up matching badges in the README
802-
first_paragraph_match = text.match(ParagraphExcerptRegexp)
803+
pattern = ParagraphExcerptRegexpUnicode
804+
begin
805+
first_paragraph_match = text.match(pattern)
806+
rescue Encoding::CompatibilityError
807+
# The doc is non-ASCII text and encoded in other than Unicode base encodings.
808+
raise unless pattern.eaual?(ParagraphExcerptRegexpUnicode)
809+
pattern = ParagraphExcerptRegexpOther
810+
retry
811+
end
803812
return text[0...150].tr_s("\n", " ").squeeze(" ") unless first_paragraph_match
804813

805814
extracted_text = first_paragraph_match[0]
806-
second_paragraph = first_paragraph_match.post_match.match(ParagraphExcerptRegexp)
815+
second_paragraph = text.match(pattern, first_paragraph_match.end(0))
807816
extracted_text << " " << second_paragraph[0] if second_paragraph
808817

809818
extracted_text[0...150].tr_s("\n", " ").squeeze(" ")

test/rdoc/test_rdoc_generator_darkfish.rb

+21
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,27 @@ def test_meta_tags_for_rdoc_files
449449
)
450450
end
451451

452+
def test_meta_tags_for_markdwon_files_paragraph
453+
top_level = @store.add_file("README.md", parser: RDoc::Parser::Simple)
454+
top_level.comment = <<~MARKDOWN
455+
# Distributed Ruby: dRuby
456+
457+
dRuby is a distributed object system for Ruby. It allows an object in one
458+
Ruby process to invoke methods on an object in another Ruby process.
459+
MARKDOWN
460+
461+
@g.generate
462+
463+
content = File.binread("README_md.html")
464+
assert_include(
465+
content,
466+
"<meta name=\"description\" content=\"" \
467+
"README: # Distributed Ruby: dRuby " \
468+
"dRuby is a distributed object system for Ruby. " \
469+
"It allows an object in one Ruby process to invoke methods on an object"
470+
)
471+
end
472+
452473
def test_meta_tags_for_markdown_files
453474
top_level = @store.add_file("MyPage.md", parser: RDoc::Parser::Markdown)
454475
top_level.comment = <<~MARKDOWN

0 commit comments

Comments
 (0)