@@ -786,7 +786,8 @@ def template_for file, page = true, klass = ERB
786
786
template
787
787
end
788
788
789
- ParagraphExcerptRegexp = /[A-Z][^\. :\/ ]+\. /
789
+ ParagraphExcerptRegexpUnicode = %r[\G \P {Letter}*+\K \p {Letter}[^.:/]+\. ]
790
+ ParagraphExcerptRegexpOther = %r[\G [^A-Za-z]*+\K [A-Za-z][^.:/]+\. ]
790
791
791
792
# Returns an excerpt of the comment for usage in meta description tags
792
793
def excerpt ( comment )
@@ -799,11 +800,19 @@ def excerpt(comment)
799
800
800
801
# Match from a capital letter to the first period, discarding any links, so
801
802
# that we don't end up matching badges in the README
802
- first_paragraph_match = text . match ( ParagraphExcerptRegexp )
803
+ pattern = ParagraphExcerptRegexpUnicode
804
+ begin
805
+ first_paragraph_match = text . match ( pattern )
806
+ rescue Encoding ::CompatibilityError
807
+ # The doc is non-ASCII text and encoded in other than Unicode base encodings.
808
+ raise unless pattern . eaual? ( ParagraphExcerptRegexpUnicode )
809
+ pattern = ParagraphExcerptRegexpOther
810
+ retry
811
+ end
803
812
return text [ 0 ...150 ] . tr_s ( "\n " , " " ) . squeeze ( " " ) unless first_paragraph_match
804
813
805
814
extracted_text = first_paragraph_match [ 0 ]
806
- second_paragraph = first_paragraph_match . post_match . match ( ParagraphExcerptRegexp )
815
+ second_paragraph = text . match ( pattern , first_paragraph_match . end ( 0 ) )
807
816
extracted_text << " " << second_paragraph [ 0 ] if second_paragraph
808
817
809
818
extracted_text [ 0 ...150 ] . tr_s ( "\n " , " " ) . squeeze ( " " )
0 commit comments