55package highlight
66
77import (
8- "bufio"
98 "bytes"
10- "fmt"
11- gohtml "html"
129 "html/template"
13- "io"
14- "strings"
10+ "slices"
1511 "sync"
1612
1713 "code.gitea.io/gitea/modules/log"
1814 "code.gitea.io/gitea/modules/setting"
1915 "code.gitea.io/gitea/modules/util"
20-
2116 "github.com/alecthomas/chroma/v2"
17+
2218 "github.com/alecthomas/chroma/v2/formatters/html"
2319 "github.com/alecthomas/chroma/v2/styles"
2420)
2521
26- // don't index files larger than this many bytes for performance purposes
27- const sizeLimit = 1024 * 1024
22+ const (
23+ // don't index files larger than this many bytes for performance purposes
24+ sizeLimit = 1024 * 1024
25+ )
2826
2927type globalVarsType struct {
30- highlightMapping map [string ]string
31- githubStyles * chroma.Style
28+ highlightMapping map [string ]string
29+ githubStyles * chroma.Style
30+ escapeFull []template.HTML
31+ escapeControlChars []template.HTML
3232}
3333
3434var (
@@ -44,10 +44,66 @@ func globalVars() *globalVarsType {
4444 globalVarsPtr = & globalVarsType {}
4545 globalVarsPtr .githubStyles = styles .Get ("github" )
4646 globalVarsPtr .highlightMapping = setting .GetHighlightMapping ()
47+ globalVarsPtr .escapeControlChars = make ([]template.HTML , 256 )
48+ // ASCII Table 0x00 - 0x1F
49+ controlCharNames := []string {
50+ "NUL" , "SOH" , "STX" , "ETX" , "EOT" , "ENQ" , "ACK" , "BEL" ,
51+ "BS" , "HT" , "LF" , "VT" , "FF" , "CR" , "SO" , "SI" ,
52+ "DLE" , "DC1" , "DC2" , "DC3" , "DC4" , "NAK" , "SYN" , "ETB" ,
53+ "CAN" , "EM" , "SUB" , "ESC" , "FS" , "GS" , "RS" , "US" ,
54+ }
55+ for i , s := range controlCharNames {
56+ globalVarsPtr .escapeControlChars [i ] = template .HTML (`<span class="broken-code-point">` + s + `</span>` )
57+ }
58+ globalVarsPtr .escapeControlChars [0x7f ] = `<span class="broken-code-point">DEL</span>`
59+ globalVarsPtr .escapeControlChars ['\t' ] = ""
60+ globalVarsPtr .escapeControlChars ['\n' ] = ""
61+ globalVarsPtr .escapeControlChars ['\r' ] = ""
62+
63+ globalVarsPtr .escapeFull = slices .Clone (globalVarsPtr .escapeControlChars )
64+ // exactly the same as Golang's html.EscapeString
65+ globalVarsPtr .escapeFull ['&' ] = "&"
66+ globalVarsPtr .escapeFull ['\'' ] = "'"
67+ globalVarsPtr .escapeFull ['<' ] = "<"
68+ globalVarsPtr .escapeFull ['>' ] = ">"
69+ globalVarsPtr .escapeFull ['"' ] = """
4770 }
4871 return globalVarsPtr
4972}
5073
74+ func escapeByMap (code []byte , escapeMap []template.HTML ) template.HTML {
75+ firstEscapePos := - 1
76+ for i , c := range code {
77+ if escapeMap [c ] != "" {
78+ firstEscapePos = i
79+ break
80+ }
81+ }
82+ if firstEscapePos == - 1 {
83+ return template .HTML (util .UnsafeBytesToString (code ))
84+ }
85+
86+ buf := make ([]byte , firstEscapePos , len (code )* 2 )
87+ copy (buf [:firstEscapePos ], code [:firstEscapePos ])
88+ for i := firstEscapePos ; i < len (code ); i ++ {
89+ c := code [i ]
90+ if esc := escapeMap [c ]; esc != "" {
91+ buf = append (buf , esc ... )
92+ } else {
93+ buf = append (buf , c )
94+ }
95+ }
96+ return template .HTML (util .UnsafeBytesToString (buf ))
97+ }
98+
99+ func escapeFullString (code string ) template.HTML {
100+ return escapeByMap (util .UnsafeStringToBytes (code ), globalVars ().escapeFull )
101+ }
102+
103+ func escapeControlChars (code []byte ) template.HTML {
104+ return escapeByMap (code , globalVars ().escapeControlChars )
105+ }
106+
51107// UnsafeSplitHighlightedLines splits highlighted code into lines preserving HTML tags
52108// It always includes '\n', '\n' can appear at the end of each line or in the middle of HTML tags
53109// The '\n' is necessary for copying code from web UI to preserve original code lines
@@ -90,7 +146,7 @@ func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML,
90146 }
91147
92148 if len (code ) > sizeLimit {
93- return template . HTML ( template . HTMLEscapeString ( code ) ), nil , ""
149+ return escapeFullString ( code ), nil , ""
94150 }
95151
96152 lexer = detectChromaLexerWithAnalyze (fileName , language , util .UnsafeStringToBytes (code )) // it is also slow
@@ -104,86 +160,61 @@ func RenderCodeByLexer(lexer chroma.Lexer, code string) template.HTML {
104160 html .PreventSurroundingPre (true ),
105161 )
106162
107- htmlbuf := bytes.Buffer {}
108- htmlw := bufio .NewWriter (& htmlbuf )
109-
110163 iterator , err := lexer .Tokenise (nil , code )
111164 if err != nil {
112165 log .Error ("Can't tokenize code: %v" , err )
113- return template . HTML ( template . HTMLEscapeString ( code ) )
166+ return escapeFullString ( code )
114167 }
168+
169+ htmlBuf := & bytes.Buffer {}
115170 // style not used for live site but need to pass something
116- err = formatter .Format (htmlw , globalVars ().githubStyles , iterator )
171+ err = formatter .Format (htmlBuf , globalVars ().githubStyles , iterator )
117172 if err != nil {
118173 log .Error ("Can't format code: %v" , err )
119- return template . HTML ( template . HTMLEscapeString ( code ) )
174+ return escapeFullString ( code )
120175 }
121-
122- _ = htmlw .Flush ()
123- // Chroma will add newlines for certain lexers in order to highlight them properly
124- // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
125- return template .HTML (strings .TrimSuffix (htmlbuf .String (), "\n " ))
176+ return template .HTML (htmlBuf .String ())
126177}
127178
128179// RenderFullFile returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
129- func RenderFullFile (fileName , language string , code []byte ) ([]template.HTML , string , error ) {
130- if len (code ) > sizeLimit {
131- return RenderPlainText (code ), "" , nil
180+ func RenderFullFile (fileName , language string , code []byte ) ([]template.HTML , string ) {
181+ if language == LanguagePlaintext || len (code ) > sizeLimit {
182+ return renderPlainText (code ), formatLexerName ( LanguagePlaintext )
132183 }
133-
134- formatter := html .New (html .WithClasses (true ),
135- html .WithLineNumbers (false ),
136- html .PreventSurroundingPre (true ),
137- )
138-
139184 lexer := detectChromaLexerWithAnalyze (fileName , language , code )
140185 lexerName := formatLexerName (lexer .Config ().Name )
141-
142- iterator , err := lexer .Tokenise (nil , string (code ))
143- if err != nil {
144- return nil , "" , fmt .Errorf ("can't tokenize code: %w" , err )
186+ rendered := RenderCodeByLexer (lexer , util .UnsafeBytesToString (code ))
187+ unsafeLines := UnsafeSplitHighlightedLines (rendered )
188+ lines := make ([]template.HTML , 0 , len (unsafeLines ))
189+ for _ , lineBytes := range unsafeLines {
190+ line := escapeControlChars (lineBytes )
191+ lines = append (lines , line )
145192 }
146-
147- tokensLines := chroma .SplitTokensIntoLines (iterator .Tokens ())
148- htmlBuf := & bytes.Buffer {}
149-
150- lines := make ([]template.HTML , 0 , len (tokensLines ))
151- for _ , tokens := range tokensLines {
152- iterator = chroma .Literator (tokens ... )
153- err = formatter .Format (htmlBuf , globalVars ().githubStyles , iterator )
154- if err != nil {
155- return nil , "" , fmt .Errorf ("can't format code: %w" , err )
156- }
157- lines = append (lines , template .HTML (htmlBuf .String ()))
158- htmlBuf .Reset ()
159- }
160-
161- return lines , lexerName , nil
193+ return lines , lexerName
162194}
163195
164- // RenderPlainText returns non-highlighted HTML for code
165- func RenderPlainText (code []byte ) []template.HTML {
166- r := bufio .NewReader (bytes .NewReader (code ))
167- m := make ([]template.HTML , 0 , bytes .Count (code , []byte {'\n' })+ 1 )
168- for {
169- content , err := r .ReadString ('\n' )
170- if err != nil && err != io .EOF {
171- log .Error ("failed to read string from buffer: %v" , err )
172- break
196+ // renderPlainText returns non-highlighted HTML for code
197+ func renderPlainText (code []byte ) []template.HTML {
198+ lines := make ([]template.HTML , 0 , bytes .Count (code , []byte {'\n' })+ 1 )
199+ pos := 0
200+ for pos < len (code ) {
201+ var content []byte
202+ nextPos := bytes .IndexByte (code [pos :], '\n' )
203+ if nextPos == - 1 {
204+ content = code [pos :]
205+ pos = len (code )
206+ } else {
207+ content = code [pos : pos + nextPos + 1 ]
208+ pos += nextPos + 1
173209 }
174- if content == "" && err == io .EOF {
175- break
176- }
177- s := template .HTML (gohtml .EscapeString (content ))
178- m = append (m , s )
210+ lines = append (lines , escapeFullString (util .UnsafeBytesToString (content )))
179211 }
180- return m
212+ return lines
181213}
182214
183215func formatLexerName (name string ) string {
184- if name == "fallback" {
216+ if name == LanguagePlaintext || name == chromaLexerFallback {
185217 return "Plaintext"
186218 }
187-
188219 return util .ToTitleCaseNoLower (name )
189220}
0 commit comments