Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions services/gitdiff/gitdiff.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"path"
"sort"
"strings"
"time"

"code.gitea.io/gitea/models/db"
git_model "code.gitea.io/gitea/models/git"
Expand Down Expand Up @@ -123,8 +124,14 @@ type DiffHTMLOperation struct {
// BlobExcerptChunkSize represent max lines of excerpt
const BlobExcerptChunkSize = 20

// MaxDiffHighlightEntireFileSize is the maximum file size that will be highlighted with "entire file diff"
const MaxDiffHighlightEntireFileSize = 1 * 1024 * 1024
// Chroma seems extremely slow when highlighting large files, it might take dozens or hundreds of milliseconds.
// When fully highlighting a diff with a lot of large files, it would take many seconds or even dozens of seconds.
// So, don't highlight the entire file if it's too large, or highlighting takes too long.
// When there is no full-file highlighting, the legacy "line-by-line" highlighting is still applied as the fallback.
const (
MaxFullFileHighlightSizeLimit = 256 * 1024
Comment thread
silverwind marked this conversation as resolved.
MaxFullFileHighlightTimeLimit = 2 * time.Second
Comment thread
silverwind marked this conversation as resolved.
)

// GetType returns the type of DiffLine.
func (d *DiffLine) GetType() int {
Expand Down Expand Up @@ -564,7 +571,7 @@ func getCommitFileLineCountAndLimitedContent(commit *git.Commit, filePath string
if err != nil {
return 0, nil
}
w := &limitByteWriter{limit: MaxDiffHighlightEntireFileSize + 1}
w := &limitByteWriter{limit: MaxFullFileHighlightSizeLimit + 1}
lineCount, err = blob.GetBlobLineCount(w)
if err != nil {
return 0, nil
Expand Down Expand Up @@ -1317,6 +1324,8 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit
return nil, err
}

startTime := time.Now()

checker, err := attribute.NewBatchChecker(gitRepo, opts.AfterCommitID, []string{attribute.LinguistVendored, attribute.LinguistGenerated, attribute.LinguistLanguage, attribute.GitlabLanguage, attribute.Diff})
if err != nil {
return nil, err
Expand Down Expand Up @@ -1356,7 +1365,8 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit
diffFile.Sections = append(diffFile.Sections, tailSection)
}

shouldFullFileHighlight := !setting.Git.DisableDiffHighlight && attrDiff.Value() == ""
shouldFullFileHighlight := attrDiff.Value() == "" // only do highlight if no custom diff command
shouldFullFileHighlight = shouldFullFileHighlight && time.Since(startTime) < MaxFullFileHighlightTimeLimit
if shouldFullFileHighlight {
if limitedContent.LeftContent != nil {
diffFile.highlightedLeftLines.value = highlightCodeLinesForDiffFile(diffFile, true /* left */, limitedContent.LeftContent.buf.Bytes())
Expand All @@ -1380,7 +1390,7 @@ func highlightCodeLinesForDiffFile(diffFile *DiffFile, isLeft bool, rawContent [
}

func highlightCodeLines(name, lang string, sections []*DiffSection, isLeft bool, rawContent []byte) map[int]template.HTML {
if setting.Git.DisableDiffHighlight || len(rawContent) > MaxDiffHighlightEntireFileSize {
if setting.Git.DisableDiffHighlight || len(rawContent) > MaxFullFileHighlightSizeLimit {
return nil
}

Expand Down
86 changes: 54 additions & 32 deletions services/gitdiff/highlightdiff.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,15 +298,21 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlContent template.HTML) s
return res.String()
}

func (hcd *highlightCodeDiff) extractNextPlaceholder(buf []byte, lastIdx int) (idx int, placeholder rune, runeLen int, token string) {
for idx = lastIdx; idx < len(buf); {
placeholder, runeLen = utf8.DecodeRune(buf[idx:])
if token = hcd.placeholderTokenMap[placeholder]; token != "" {
break
// recoverOneRune tries to recover one rune
// * if the rune is a placeholder, it will be recovered to the corresponding content
// * otherwise it will be returned as is
func (hcd *highlightCodeDiff) recoverOneRune(buf []byte) (r rune, runeLen int, isSingleTag bool, recovered string) {
r, runeLen = utf8.DecodeRune(buf)
token := hcd.placeholderTokenMap[r]
if token == "" {
return r, runeLen, false, "" // rune itself, not a placeholder
} else if token[0] == '<' {
if token[1] == '<' {
return 0, runeLen, false, token[1 : len(token)-1] // full tag `<<span>content</span>>`, recover to `<span>content</span>`
}
idx += runeLen
return r, runeLen, true, token // single tag
}
return idx, placeholder, runeLen, token
return 0, runeLen, false, token // HTML entity
}

func (hcd *highlightCodeDiff) recoverOneDiff(str string) template.HTML {
Expand All @@ -315,49 +321,65 @@ func (hcd *highlightCodeDiff) recoverOneDiff(str string) template.HTML {
var diffCodeOpenTag string
diffCodeCloseTag := hcd.placeholderTokenMap[hcd.diffCodeClose]
strBytes := util.UnsafeStringToBytes(str)

// this loop is slightly longer than expected, for performance consideration
for idx := 0; idx < len(strBytes); {
newIdx, placeholder, lastRuneLen, token := hcd.extractNextPlaceholder(strBytes, idx)
if newIdx != idx {
// take a look at the next rune
r, runeLen, isSingleTag, recovered := hcd.recoverOneRune(strBytes[idx:])
idx += runeLen

// loop section 1: if it isn't a single tag, then try to find the following runes until the next single tag, and recover them together
if !isSingleTag {
if diffCodeOpenTag != "" {
// start the "added/removed diff tag" if the current token is in the diff part
sb.WriteString(diffCodeOpenTag)
sb.Write(strBytes[idx:newIdx])
sb.WriteString(diffCodeCloseTag)
}
if recovered != "" {
sb.WriteString(recovered)
} else {
sb.Write(strBytes[idx:newIdx])
sb.WriteRune(r)
}
// inner loop to recover following runes until the next single tag
for idx < len(strBytes) {
r, runeLen, isSingleTag, recovered = hcd.recoverOneRune(strBytes[idx:])
idx += runeLen
if isSingleTag {
break
}
if recovered != "" {
sb.WriteString(recovered)
} else {
sb.WriteRune(r)
}
}
if diffCodeOpenTag != "" {
// end the "added/removed diff tag" if the current token is in the diff part
sb.WriteString(diffCodeCloseTag)
}
} // else: no text content before, the current token is a placeholder
if token == "" {
break // reaches the string end, no more placeholder
}
idx = newIdx + lastRuneLen

// for HTML entity
if token[0] == '&' {
sb.WriteString(token)
continue
if !isSingleTag {
break // the inner loop has already consumed all remaining runes, no more single tag found
}

// for various HTML tags
var recovered string
if token[1] == '<' { // full tag `<<span>content</span>>`, recover to `<span>content</span>`
recovered = token[1 : len(token)-1]
if diffCodeOpenTag != "" {
recovered = diffCodeOpenTag + recovered + diffCodeCloseTag
} // else: just use the recovered content
} else if token[1] != '/' { // opening tag
// loop section 2: for opening/closing HTML tags
placeholder := r
if recovered[1] != '/' { // opening tag
if placeholder == hcd.diffCodeAddedOpen || placeholder == hcd.diffCodeRemovedOpen {
diffCodeOpenTag = token
diffCodeOpenTag = recovered
recovered = ""
} else {
recovered = token
tagStack = append(tagStack, recovered)
}
} else { // closing tag
if placeholder == hcd.diffCodeClose {
diffCodeOpenTag = "" // the highlighted diff is closed, no more diff
recovered = ""
} else if len(tagStack) != 0 {
recovered = token
tagStack = tagStack[:len(tagStack)-1]
} // else: if no opening tag in stack yet, skip the closing tag
} else {
recovered = ""
}
}
sb.WriteString(recovered)
}
Expand Down
8 changes: 3 additions & 5 deletions services/gitdiff/highlightdiff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,13 @@ func TestDiffWithHighlight(t *testing.T) {

t.Run("ComplexDiff1", func(t *testing.T) {
oldCode, _ := highlight.RenderCodeFast("a.go", "Go", `xxx || yyy`)
newCode, _ := highlight.RenderCodeFast("a.go", "Go", `bot.xxx || bot.yyy`)
newCode, _ := highlight.RenderCodeFast("a.go", "Go", `bot&xxx || bot&yyy`)
hcd := newHighlightCodeDiff()
out := hcd.diffLineWithHighlight(DiffLineAdd, oldCode, newCode)
assert.Equal(t, strings.ReplaceAll(`
<span class="added-code"><span class="nx">bot</span></span>
<span class="added-code"><span class="p">.</span></span>
<span class="added-code"><span class="nx">bot</span></span><span class="o"><span class="added-code">&amp;</span></span>
<span class="nx">xxx</span><span class="w"> </span><span class="o">||</span><span class="w"> </span>
<span class="added-code"><span class="nx">bot</span></span>
<span class="added-code"><span class="p">.</span></span>
<span class="added-code"><span class="nx">bot</span></span><span class="o"><span class="added-code">&amp;</span></span>
<span class="nx">yyy</span>`, "\n", ""), string(out))
})

Expand Down
Loading