Skip to content

Commit b200173

Browse files
committed
readingTime: compute from CJK and non-CJK parts separately. Fixes #10031
1 parent 15463f8 commit b200173

File tree

3 files changed

+45
-38
lines changed

3 files changed

+45
-38
lines changed

docs/content/en/getting-started/configuration.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ Google Analytics tracking ID.
261261

262262
**Default value:** false
263263

264-
If true, auto-detect Chinese/Japanese/Korean Languages in the content. This will make `.Summary` and `.WordCount` behave correctly for CJK languages.
264+
If true, auto-detect Chinese/Japanese/Korean Languages in the content. This will make `.Summary` behave correctly for CJK languages.
265265

266266
### imaging
267267

hugolib/page__per_output.go

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"context"
1919
"fmt"
2020
"html/template"
21+
"math"
2122
"runtime/debug"
2223
"strings"
2324
"sync"
@@ -204,7 +205,7 @@ func newPageContentOutput(p *pageState, po *pageOutput) (*pageContentOutput, err
204205
cp.initPlain = cp.initMain.Branch(func() (any, error) {
205206
cp.plain = tpl.StripHTML(string(cp.content))
206207
cp.plainWords = strings.Fields(cp.plain)
207-
cp.setWordCounts(p.m.isCJKLanguage)
208+
cp.setWordCounts()
208209

209210
if err := cp.setAutoSummary(); err != nil {
210211
return err, nil
@@ -656,31 +657,29 @@ func (cp *pageContentOutput) renderContentWithConverter(c converter.Converter, c
656657
return r, err
657658
}
658659

659-
func (p *pageContentOutput) setWordCounts(isCJKLanguage bool) {
660-
if isCJKLanguage {
661-
p.wordCount = 0
662-
for _, word := range p.plainWords {
663-
runeCount := utf8.RuneCountInString(word)
664-
if len(word) == runeCount {
665-
p.wordCount++
666-
} else {
667-
p.wordCount += runeCount
668-
}
660+
func (p *pageContentOutput) setWordCounts() {
661+
cjkWordCount := 0
662+
nonCjkWordCount := 0
663+
for _, word := range p.plainWords {
664+
runeCount := utf8.RuneCountInString(word)
665+
if len(word) == runeCount {
666+
nonCjkWordCount++
667+
} else {
668+
cjkWordCount += runeCount
669669
}
670-
} else {
671-
p.wordCount = helpers.TotalWords(p.plain)
672670
}
673671

672+
p.wordCount = nonCjkWordCount + cjkWordCount
673+
674674
// TODO(bep) is set in a test. Fix that.
675675
if p.fuzzyWordCount == 0 {
676676
p.fuzzyWordCount = (p.wordCount + 100) / 100 * 100
677677
}
678678

679-
if isCJKLanguage {
680-
p.readingTime = (p.wordCount + 500) / 501
681-
} else {
682-
p.readingTime = (p.wordCount + 212) / 213
683-
}
679+
cjkReadingTime := float64(cjkWordCount) / 501
680+
nonCjkReadingTime := float64(nonCjkWordCount) / 213
681+
682+
p.readingTime = int(math.Ceil(cjkReadingTime + nonCjkReadingTime))
684683
}
685684

686685
// A callback to signal that we have inserted a placeholder into the rendered

hugolib/page_test.go

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1117,18 +1117,6 @@ Content
11171117
}
11181118
}
11191119

1120-
func TestWordCountWithAllCJKRunesWithoutHasCJKLanguage(t *testing.T) {
1121-
t.Parallel()
1122-
assertFunc := func(t *testing.T, ext string, pages page.Pages) {
1123-
p := pages[0]
1124-
if p.WordCount() != 8 {
1125-
t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 8, p.WordCount())
1126-
}
1127-
}
1128-
1129-
testAllMarkdownEnginesForPages(t, assertFunc, nil, simplePageWithAllCJKRunes)
1130-
}
1131-
11321120
func TestWordCountWithAllCJKRunesHasCJKLanguage(t *testing.T) {
11331121
t.Parallel()
11341122
settings := map[string]any{"hasCJKLanguage": true}
@@ -1161,18 +1149,38 @@ func TestWordCountWithMainEnglishWithCJKRunes(t *testing.T) {
11611149
testAllMarkdownEnginesForPages(t, assertFunc, settings, simplePageWithMainEnglishWithCJKRunes)
11621150
}
11631151

1164-
func TestWordCountWithIsCJKLanguageFalse(t *testing.T) {
1152+
func TestReadingTimeCJKMixed(t *testing.T) {
1153+
t.Parallel()
1154+
1155+
simplePage := fmt.Sprintf(`---
1156+
title: Simple
1157+
---
1158+
1159+
%s
1160+
1161+
%s
1162+
1163+
`, strings.Repeat("hello 你好 ", 2130), strings.Repeat("好", 750))
1164+
// 2130 * 2 + 750 = 10 * 501 -> 10 minutes of reading
1165+
1166+
assertFunc := func(t *testing.T, ext string, pages page.Pages) {
1167+
p := pages[0]
1168+
if p.ReadingTime() != 20 {
1169+
t.Fatalf("[%s] incorrect min read. expected %v, got %v", ext, 20, p.ReadingTime())
1170+
}
1171+
}
1172+
1173+
testAllMarkdownEnginesForPages(t, assertFunc, nil, simplePage)
1174+
}
1175+
1176+
func TestSummaryWithIsCJKLanguageFalse(t *testing.T) {
11651177
t.Parallel()
11661178
settings := map[string]any{
11671179
"hasCJKLanguage": true,
11681180
}
11691181

11701182
assertFunc := func(t *testing.T, ext string, pages page.Pages) {
11711183
p := pages[0]
1172-
if p.WordCount() != 75 {
1173-
t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.Plain(), 74, p.WordCount())
1174-
}
1175-
11761184
if p.Summary() != simplePageWithIsCJKLanguageFalseSummary {
11771185
t.Fatalf("[%s] incorrect Summary for content '%s'. expected %v, got %v", ext, p.Plain(),
11781186
simplePageWithIsCJKLanguageFalseSummary, p.Summary())
@@ -1781,9 +1789,9 @@ Summary: In Chinese, 好 means good.
17811789
b.AssertFileContent("public/p2/index.html", "WordCount: 314\nFuzzyWordCount: 400\nReadingTime: 2\nLen Plain: 1569\nLen PlainWords: 314\nTruncated: true\nLen Summary: 25\nLen Content: 1582")
17821790

17831791
b.AssertFileContent("public/p3/index.html", "WordCount: 206\nFuzzyWordCount: 300\nReadingTime: 1\nLen Plain: 638\nLen PlainWords: 7\nTruncated: true\nLen Summary: 43\nLen Content: 651")
1784-
b.AssertFileContent("public/p4/index.html", "WordCount: 7\nFuzzyWordCount: 100\nReadingTime: 1\nLen Plain: 638\nLen PlainWords: 7\nTruncated: true\nLen Summary: 43\nLen Content: 651")
1792+
b.AssertFileContent("public/p4/index.html", "WordCount: 206\nFuzzyWordCount: 300\nReadingTime: 1\nLen Plain: 638\nLen PlainWords: 7\nTruncated: true\nLen Summary: 43\nLen Content: 651")
17851793
b.AssertFileContent("public/p5/index.html", "WordCount: 206\nFuzzyWordCount: 300\nReadingTime: 1\nLen Plain: 638\nLen PlainWords: 7\nTruncated: true\nLen Summary: 229\nLen Content: 652")
1786-
b.AssertFileContent("public/p6/index.html", "WordCount: 7\nFuzzyWordCount: 100\nReadingTime: 1\nLen Plain: 638\nLen PlainWords: 7\nTruncated: false\nLen Summary: 637\nLen Content: 652")
1794+
b.AssertFileContent("public/p6/index.html", "WordCount: 206\nFuzzyWordCount: 300\nReadingTime: 1\nLen Plain: 638\nLen PlainWords: 7\nTruncated: false\nLen Summary: 637\nLen Content: 652")
17871795
}
17881796

17891797
func TestScratch(t *testing.T) {

0 commit comments

Comments
 (0)