Skip to content

Commit 3e99ae3

Browse files
committed
fix
1 parent 2158cf6 commit 3e99ae3

7 files changed

Lines changed: 139 additions & 97 deletions

File tree

modules/highlight/highlight.go

Lines changed: 99 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,30 @@
55
package highlight
66

77
import (
8-
"bufio"
98
"bytes"
10-
"fmt"
11-
gohtml "html"
129
"html/template"
13-
"io"
14-
"strings"
10+
"slices"
1511
"sync"
1612

1713
"code.gitea.io/gitea/modules/log"
1814
"code.gitea.io/gitea/modules/setting"
1915
"code.gitea.io/gitea/modules/util"
20-
2116
"github.com/alecthomas/chroma/v2"
17+
2218
"github.com/alecthomas/chroma/v2/formatters/html"
2319
"github.com/alecthomas/chroma/v2/styles"
2420
)
2521

26-
// don't index files larger than this many bytes for performance purposes
27-
const sizeLimit = 1024 * 1024
22+
const (
23+
// don't index files larger than this many bytes for performance purposes
24+
sizeLimit = 1024 * 1024
25+
)
2826

2927
type globalVarsType struct {
30-
highlightMapping map[string]string
31-
githubStyles *chroma.Style
28+
highlightMapping map[string]string
29+
githubStyles *chroma.Style
30+
escapeFull []template.HTML
31+
escapeControlChars []template.HTML
3232
}
3333

3434
var (
@@ -44,10 +44,66 @@ func globalVars() *globalVarsType {
4444
globalVarsPtr = &globalVarsType{}
4545
globalVarsPtr.githubStyles = styles.Get("github")
4646
globalVarsPtr.highlightMapping = setting.GetHighlightMapping()
47+
globalVarsPtr.escapeControlChars = make([]template.HTML, 256)
48+
// ASCII Table 0x00 - 0x1F
49+
controlCharNames := []string{
50+
"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
51+
"BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI",
52+
"DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
53+
"CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
54+
}
55+
for i, s := range controlCharNames {
56+
globalVarsPtr.escapeControlChars[i] = template.HTML(`<span class="broken-code-point">` + s + `</span>`)
57+
}
58+
globalVarsPtr.escapeControlChars[0x7f] = `<span class="broken-code-point">DEL</span>`
59+
globalVarsPtr.escapeControlChars['\t'] = ""
60+
globalVarsPtr.escapeControlChars['\n'] = ""
61+
globalVarsPtr.escapeControlChars['\r'] = ""
62+
63+
globalVarsPtr.escapeFull = slices.Clone(globalVarsPtr.escapeControlChars)
64+
// exactly the same as Golang's html.EscapeString
65+
globalVarsPtr.escapeFull['&'] = "&amp;"
66+
globalVarsPtr.escapeFull['\''] = "&#39;"
67+
globalVarsPtr.escapeFull['<'] = "&lt;"
68+
globalVarsPtr.escapeFull['>'] = "&gt;"
69+
globalVarsPtr.escapeFull['"'] = "&#34;"
4770
}
4871
return globalVarsPtr
4972
}
5073

74+
func escapeByMap(code []byte, escapeMap []template.HTML) template.HTML {
75+
firstEscapePos := -1
76+
for i, c := range code {
77+
if escapeMap[c] != "" {
78+
firstEscapePos = i
79+
break
80+
}
81+
}
82+
if firstEscapePos == -1 {
83+
return template.HTML(util.UnsafeBytesToString(code))
84+
}
85+
86+
buf := make([]byte, firstEscapePos, len(code)*2)
87+
copy(buf[:firstEscapePos], code[:firstEscapePos])
88+
for i := firstEscapePos; i < len(code); i++ {
89+
c := code[i]
90+
if esc := escapeMap[c]; esc != "" {
91+
buf = append(buf, esc...)
92+
} else {
93+
buf = append(buf, c)
94+
}
95+
}
96+
return template.HTML(util.UnsafeBytesToString(buf))
97+
}
98+
99+
func escapeFullString(code string) template.HTML {
100+
return escapeByMap(util.UnsafeStringToBytes(code), globalVars().escapeFull)
101+
}
102+
103+
func escapeControlChars(code []byte) template.HTML {
104+
return escapeByMap(code, globalVars().escapeControlChars)
105+
}
106+
51107
// UnsafeSplitHighlightedLines splits highlighted code into lines preserving HTML tags
52108
// It always includes '\n', '\n' can appear at the end of each line or in the middle of HTML tags
53109
// The '\n' is necessary for copying code from web UI to preserve original code lines
@@ -90,7 +146,7 @@ func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML,
90146
}
91147

92148
if len(code) > sizeLimit {
93-
return template.HTML(template.HTMLEscapeString(code)), nil, ""
149+
return escapeFullString(code), nil, ""
94150
}
95151

96152
lexer = detectChromaLexerWithAnalyze(fileName, language, util.UnsafeStringToBytes(code)) // it is also slow
@@ -104,86 +160,61 @@ func RenderCodeByLexer(lexer chroma.Lexer, code string) template.HTML {
104160
html.PreventSurroundingPre(true),
105161
)
106162

107-
htmlbuf := bytes.Buffer{}
108-
htmlw := bufio.NewWriter(&htmlbuf)
109-
110163
iterator, err := lexer.Tokenise(nil, code)
111164
if err != nil {
112165
log.Error("Can't tokenize code: %v", err)
113-
return template.HTML(template.HTMLEscapeString(code))
166+
return escapeFullString(code)
114167
}
168+
169+
htmlBuf := &bytes.Buffer{}
115170
// style not used for live site but need to pass something
116-
err = formatter.Format(htmlw, globalVars().githubStyles, iterator)
171+
err = formatter.Format(htmlBuf, globalVars().githubStyles, iterator)
117172
if err != nil {
118173
log.Error("Can't format code: %v", err)
119-
return template.HTML(template.HTMLEscapeString(code))
174+
return escapeFullString(code)
120175
}
121-
122-
_ = htmlw.Flush()
123-
// Chroma will add newlines for certain lexers in order to highlight them properly
124-
// Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
125-
return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n"))
176+
return template.HTML(htmlBuf.String())
126177
}
127178

128179
// RenderFullFile returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
129-
func RenderFullFile(fileName, language string, code []byte) ([]template.HTML, string, error) {
130-
if len(code) > sizeLimit {
131-
return RenderPlainText(code), "", nil
180+
func RenderFullFile(fileName, language string, code []byte) ([]template.HTML, string) {
181+
if language == LanguagePlaintext || len(code) > sizeLimit {
182+
return renderPlainText(code), formatLexerName(LanguagePlaintext)
132183
}
133-
134-
formatter := html.New(html.WithClasses(true),
135-
html.WithLineNumbers(false),
136-
html.PreventSurroundingPre(true),
137-
)
138-
139184
lexer := detectChromaLexerWithAnalyze(fileName, language, code)
140185
lexerName := formatLexerName(lexer.Config().Name)
141-
142-
iterator, err := lexer.Tokenise(nil, string(code))
143-
if err != nil {
144-
return nil, "", fmt.Errorf("can't tokenize code: %w", err)
186+
rendered := RenderCodeByLexer(lexer, util.UnsafeBytesToString(code))
187+
unsafeLines := UnsafeSplitHighlightedLines(rendered)
188+
lines := make([]template.HTML, 0, len(unsafeLines))
189+
for _, lineBytes := range unsafeLines {
190+
line := escapeControlChars(lineBytes)
191+
lines = append(lines, line)
145192
}
146-
147-
tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens())
148-
htmlBuf := &bytes.Buffer{}
149-
150-
lines := make([]template.HTML, 0, len(tokensLines))
151-
for _, tokens := range tokensLines {
152-
iterator = chroma.Literator(tokens...)
153-
err = formatter.Format(htmlBuf, globalVars().githubStyles, iterator)
154-
if err != nil {
155-
return nil, "", fmt.Errorf("can't format code: %w", err)
156-
}
157-
lines = append(lines, template.HTML(htmlBuf.String()))
158-
htmlBuf.Reset()
159-
}
160-
161-
return lines, lexerName, nil
193+
return lines, lexerName
162194
}
163195

164-
// RenderPlainText returns non-highlighted HTML for code
165-
func RenderPlainText(code []byte) []template.HTML {
166-
r := bufio.NewReader(bytes.NewReader(code))
167-
m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
168-
for {
169-
content, err := r.ReadString('\n')
170-
if err != nil && err != io.EOF {
171-
log.Error("failed to read string from buffer: %v", err)
172-
break
196+
// renderPlainText returns non-highlighted HTML for code
197+
func renderPlainText(code []byte) []template.HTML {
198+
lines := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
199+
pos := 0
200+
for pos < len(code) {
201+
var content []byte
202+
nextPos := bytes.IndexByte(code[pos:], '\n')
203+
if nextPos == -1 {
204+
content = code[pos:]
205+
pos = len(code)
206+
} else {
207+
content = code[pos : pos+nextPos+1]
208+
pos += nextPos + 1
173209
}
174-
if content == "" && err == io.EOF {
175-
break
176-
}
177-
s := template.HTML(gohtml.EscapeString(content))
178-
m = append(m, s)
210+
lines = append(lines, escapeFullString(util.UnsafeBytesToString(content)))
179211
}
180-
return m
212+
return lines
181213
}
182214

183215
func formatLexerName(name string) string {
184-
if name == "fallback" {
216+
if name == LanguagePlaintext || name == chromaLexerFallback {
185217
return "Plaintext"
186218
}
187-
188219
return util.ToTitleCaseNoLower(name)
189220
}

modules/highlight/highlight_test.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,7 @@ c=2
118118

119119
for _, tt := range tests {
120120
t.Run(tt.name, func(t *testing.T) {
121-
out, lexerName, err := RenderFullFile(tt.name, "", []byte(tt.code))
122-
assert.NoError(t, err)
121+
out, lexerName := RenderFullFile(tt.name, "", []byte(tt.code))
123122
assert.Equal(t, tt.want, out)
124123
assert.Equal(t, tt.lexerName, lexerName)
125124
})
@@ -182,7 +181,7 @@ c=2`),
182181

183182
for _, tt := range tests {
184183
t.Run(tt.name, func(t *testing.T) {
185-
out := RenderPlainText([]byte(tt.code))
184+
out := renderPlainText([]byte(tt.code))
186185
assert.Equal(t, tt.want, out)
187186
})
188187
}
@@ -205,3 +204,14 @@ func TestUnsafeSplitHighlightedLines(t *testing.T) {
205204
assert.Equal(t, "<span>a</span>\n", string(ret[0]))
206205
assert.Equal(t, "<span>b\n</span>", string(ret[1]))
207206
}
207+
208+
func TestEscape(t *testing.T) {
209+
assert.Equal(t, template.HTML("\t\r\n<span class=\"broken-code-point\">NUL</span><span class=\"broken-code-point\">US</span>&'\"<>"), escapeControlChars([]byte("\t\r\n\x00\x1f&'\"<>")))
210+
assert.Equal(t, template.HTML("<span class=\"broken-code-point\">NUL</span><span class=\"broken-code-point\">US</span>&amp;&#39;&#34;&lt;&gt;\t\r\n"), escapeFullString("\x00\x1f&'\"<>\t\r\n"))
211+
212+
out, _ := RenderFullFile("a.py", "", []byte("# \x7f<>"))
213+
assert.Equal(t, template.HTML(`<span class="c1"># <span class="broken-code-point">DEL</span>&lt;&gt;</span>`), out[0])
214+
215+
out = renderPlainText([]byte("# \x7f<>"))
216+
assert.Equal(t, template.HTML(`# <span class="broken-code-point">DEL</span>&lt;&gt;`), out[0])
217+
}

modules/highlight/lexerdetect.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@ import (
1616
"github.com/go-enry/go-enry/v2"
1717
)
1818

19-
const mapKeyLowerPrefix = "lower/"
19+
const (
20+
mapKeyLowerPrefix = "lower/"
21+
LanguagePlaintext = "plaintext"
22+
chromaLexerFallback = "fallback"
23+
)
2024

2125
// chromaLexers is fully managed by us to do fast lookup for chroma lexers by file name or language name
2226
// Don't use lexers.Get because it is very slow in many cases (iterate all rules, filepath glob match, etc.)

routers/web/repo/view_file.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,12 +119,8 @@ func handleFileViewRenderSource(ctx *context.Context, attrs *attribute.Attribute
119119
}
120120

121121
language := attrs.GetLanguage().Value()
122-
fileContent, lexerName, err := highlight.RenderFullFile(filename, language, buf)
122+
fileContent, lexerName := highlight.RenderFullFile(filename, language, buf)
123123
ctx.Data["LexerName"] = lexerName
124-
if err != nil {
125-
log.Error("highlight.RenderFullFile failed, fallback to plain text: %v", err)
126-
fileContent = highlight.RenderPlainText(buf)
127-
}
128124
status := &charset.EscapeStatus{}
129125
statuses := make([]*charset.EscapeStatus, len(fileContent))
130126
for i, line := range fileContent {

web_src/css/index.css

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
@import "./modules/flexcontainer.css";
3434
@import "./modules/codeeditor.css";
3535
@import "./modules/chroma.css";
36+
@import "./modules/charescape.css";
3637

3738
@import "./shared/flex-list.css";
3839
@import "./shared/milestone.css";

web_src/css/modules/charescape.css

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
.broken-code-point {
2+
border-radius: 4px;
3+
padding: 0 2px;
4+
color: var(--color-body);
5+
background: var(--color-text-light-1);
6+
}
7+
8+
.unicode-escaped .escaped-code-point[data-escaped]::before {
9+
visibility: visible;
10+
content: attr(data-escaped);
11+
color: var(--color-red);
12+
}
13+
14+
.unicode-escaped .escaped-code-point .char {
15+
display: none;
16+
}
17+
18+
.unicode-escaped .ambiguous-code-point {
19+
border: 1px var(--color-yellow) solid;
20+
}

web_src/css/repo.css

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,26 +8,6 @@
88
min-width: 40% !important;
99
}
1010

11-
.repository .unicode-escaped .escaped-code-point[data-escaped]::before {
12-
visibility: visible;
13-
content: attr(data-escaped);
14-
font-family: var(--fonts-monospace);
15-
color: var(--color-red);
16-
}
17-
18-
.repository .unicode-escaped .escaped-code-point .char {
19-
display: none;
20-
}
21-
22-
.repository .broken-code-point {
23-
font-family: var(--fonts-monospace);
24-
color: var(--color-blue);
25-
}
26-
27-
.repository .unicode-escaped .ambiguous-code-point {
28-
border: 1px var(--color-yellow) solid;
29-
}
30-
3111
.issue-content {
3212
display: flex;
3313
align-items: flex-start;

0 commit comments

Comments
 (0)