@@ -5,7 +5,8 @@ package util
55
66import (
77 "bytes"
8- "unicode"
8+ "net"
9+ "strings"
910)
1011
1112type sanitizedError struct {
@@ -25,48 +26,103 @@ func SanitizeErrorCredentialURLs(err error) error {
2526 return sanitizedError {err : err }
2627}
2728
28- const userPlaceholder = "sanitized-credential"
29-
3029var schemeSep = []byte ("://" )
3130
32- // SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"
31+ const userInfoPlaceholder = "(masked)"
32+
33+ // SanitizeCredentialURLs remove all credentials in URLs for the input string:
34+ // * "https://userinfo@domain.com" => "https://***@domain.com"
35+ // * "user:pass@domain.com" => "***@domain.com"
36+ // "***" is a magic string internally used, doesn't guarantee to be anything.
3337func SanitizeCredentialURLs (s string ) string {
34- bs := UnsafeStringToBytes (s )
35- schemeSepPos := bytes .Index (bs , schemeSep )
36- if schemeSepPos == - 1 || bytes .IndexByte (bs [schemeSepPos :], '@' ) == - 1 {
37- return s // fast return if there is no URL scheme or no userinfo
38+ sepColPos := strings .Index (s , ":" )
39+ if sepColPos == - 1 {
40+ return s // fast path: no colon, unlikely contain any URL credential
3841 }
39- out := make ([]byte , 0 , len (bs )+ len (userPlaceholder ))
40- for schemeSepPos != - 1 {
41- schemeSepPos += 3 // skip the "://"
42- sepAtPos := - 1 // the possible '@' position: "https://foo@[^here]host"
43- sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
44- sepLoop:
45- for ; sepEndPos < len (bs ); sepEndPos ++ {
46- c := bs [sepEndPos ]
47- if ('A' <= c && c <= 'Z' ) || ('a' <= c && c <= 'z' ) || ('0' <= c && c <= '9' ) {
48- continue
49- }
42+ sepAtPos := strings .Index (s [sepColPos + 1 :], "@" )
43+ for sepAtPos == - 1 {
44+ return s // fast path: no "@" after colon, unlikely contain any URL credential
45+ }
46+ sepAtPos += sepColPos + 1
47+
48+ res := make ([]byte , 0 , len (s )+ len (userInfoPlaceholder )) // a best guess to avoid too many re-allocations
49+ bs := UnsafeStringToBytes (s )
50+ for {
51+ // left part (before "@") is likely to be the "userinfo" (single username, or "username:password")
52+ leftPos := sepAtPos - 1
53+ leftLoop:
54+ for leftPos >= 0 {
55+ c := bs [leftPos ]
5056 switch c {
51- case '@' :
52- sepAtPos = sepEndPos
5357 case '-' , '.' , '_' , '~' , '!' , '$' , '&' , '\'' , '(' , ')' , '*' , '+' , ',' , ';' , '=' , ':' , '%' :
54- continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
58+ // RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
59+ default :
60+ valid := 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9'
61+ if ! valid {
62+ break leftLoop
63+ }
64+ }
65+ leftPos --
66+ }
67+ // left pos should point to the beginning of the left part, this pos is always valid in the buffer
68+ leftPos ++
69+
70+ // right part is likely to be the host (domain name, ip address)
71+ rightPos := sepAtPos + 1
72+ rightLoop:
73+ for rightPos < len (bs ) {
74+ c := bs [rightPos ]
75+ switch c {
76+ case '.' , '-' :
77+ // valid host char
78+ case '[' :
79+ // ipv6 begin
80+ if rightPos != sepAtPos + 1 {
81+ break rightLoop
82+ }
83+ case ']' :
84+ // ipv6 end
85+ rightPos ++
86+ break rightLoop
5587 default :
56- break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
88+ valid := 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9'
89+ if bs [sepAtPos + 1 ] == '[' {
90+ // ipv6 host
91+ valid = 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' || '0' <= c && c <= '9' || c == ':'
92+ }
93+ if ! valid {
94+ break rightLoop
95+ }
5796 }
97+ rightPos ++
5898 }
59- // if there is '@', and the string is like "s://u@h", then hide the "u" part
60- if sepAtPos != - 1 && (schemeSepPos >= 4 && unicode .IsLetter (rune (bs [schemeSepPos - 4 ]))) && sepAtPos - schemeSepPos > 0 && sepEndPos - sepAtPos > 0 {
61- out = append (out , bs [:schemeSepPos ]... )
62- out = append (out , userPlaceholder ... )
63- out = append (out , bs [sepAtPos :sepEndPos ]... )
99+
100+ leading , leftPart , rightPart := bs [:leftPos ], bs [leftPos :sepAtPos ], bs [sepAtPos + 1 :rightPos ]
101+
102+ // Either:
103+ // * git log message: "user:pass@host" (it contains a colon in userinfo), ignore "git@host" pattern
104+ // * http like URL: "https://userinfo@host.com" (it has "://" before the userinfo)
105+ needSanitize := bytes .IndexByte (leftPart , ':' ) >= 0 || bytes .HasSuffix (leading , schemeSep )
106+ needSanitize = needSanitize && len (leftPart ) > 0 && len (rightPart ) > 0
107+ // TODO: can also do more checks for right part
108+ // for example: ipv6 quick check
109+ if needSanitize && rightPart [0 ] == '[' {
110+ needSanitize = rightPart [len (rightPart )- 1 ] == ']' && net .ParseIP (UnsafeBytesToString (rightPart [1 :len (rightPart )- 1 ])) != nil
111+ }
112+ if needSanitize {
113+ res = append (res , leading ... )
114+ res = append (res , userInfoPlaceholder ... )
115+ res = append (res , '@' )
116+ res = append (res , rightPart ... )
64117 } else {
65- out = append (out , bs [:sepEndPos ]... )
118+ res = append (res , bs [:rightPos ]... )
119+ }
120+ bs = bs [rightPos :]
121+ sepAtPos = bytes .IndexByte (bs , '@' )
122+ if sepAtPos == - 1 {
123+ break
66124 }
67- bs = bs [sepEndPos :]
68- schemeSepPos = bytes .Index (bs , schemeSep )
69125 }
70- out = append (out , bs ... )
71- return UnsafeBytesToString (out )
126+ res = append (res , bs ... )
127+ return UnsafeBytesToString (res )
72128}
0 commit comments