Skip to content

Commit 4ee74d7

Browse files
GiteaBotbircnisilverwindclaudewxiaoguang
authored
FIX: URL sanitization to handle schemeless credentials (#37440) (#37471)
Backport #37440 by @bircni Fixes #37435 Co-authored-by: Nicolas <bircni@icloud.com> Co-authored-by: silverwind <me@silverwind.io> Co-authored-by: Claude (Opus 4.7) <noreply@anthropic.com> Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
1 parent c4a1ff7 commit 4ee74d7

5 files changed

Lines changed: 139 additions & 49 deletions

File tree

modules/git/gitcmd/command.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,12 @@ type Command struct {
5757
}
5858

5959
func logArgSanitize(arg string) string {
60-
if strings.Contains(arg, "://") && strings.Contains(arg, "@") {
61-
return util.SanitizeCredentialURLs(arg)
62-
} else if filepath.IsAbs(arg) {
60+
if filepath.IsAbs(arg) {
6361
base := filepath.Base(arg)
6462
dir := filepath.Dir(arg)
6563
return ".../" + filepath.Join(filepath.Base(dir), base)
6664
}
67-
return arg
65+
return util.SanitizeCredentialURLs(arg)
6866
}
6967

7068
func (c *Command) LogString() string {

modules/git/gitcmd/command_test.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,10 @@ func TestCommandString(t *testing.T) {
109109
assert.Equal(t, cmd.prog+` a "-m msg" "it's a test" "say \"hello\""`, cmd.LogString())
110110

111111
cmd = NewCommand("url: https://a:b@c/", "/root/dir-a/dir-b")
112-
assert.Equal(t, cmd.prog+` "url: https://sanitized-credential@c/" .../dir-a/dir-b`, cmd.LogString())
112+
assert.Equal(t, cmd.prog+` "url: https://(masked)@c/" .../dir-a/dir-b`, cmd.LogString())
113+
114+
cmd = NewCommand("url: a:b@c/", "/root/dir-a/dir-b")
115+
assert.Equal(t, cmd.prog+` "url: (masked)@c/" .../dir-a/dir-b`, cmd.LogString())
113116
}
114117

115118
func TestRunStdError(t *testing.T) {

modules/util/sanitize.go

Lines changed: 89 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ package util
55

66
import (
77
"bytes"
8-
"unicode"
8+
"net"
9+
"strings"
910
)
1011

1112
type sanitizedError struct {
@@ -25,48 +26,103 @@ func SanitizeErrorCredentialURLs(err error) error {
2526
return sanitizedError{err: err}
2627
}
2728

28-
const userPlaceholder = "sanitized-credential"
29-
3029
var schemeSep = []byte("://")
3130

32-
// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"
31+
const userInfoPlaceholder = "(masked)"
32+
33+
// SanitizeCredentialURLs remove all credentials in URLs for the input string:
34+
// * "https://userinfo@domain.com" => "https://***@domain.com"
35+
// * "user:pass@domain.com" => "***@domain.com"
36+
// "***" is a magic string internally used, doesn't guarantee to be anything.
3337
func SanitizeCredentialURLs(s string) string {
34-
bs := UnsafeStringToBytes(s)
35-
schemeSepPos := bytes.Index(bs, schemeSep)
36-
if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 {
37-
return s // fast return if there is no URL scheme or no userinfo
38+
sepColPos := strings.Index(s, ":")
39+
if sepColPos == -1 {
40+
return s // fast path: no colon, unlikely contain any URL credential
3841
}
39-
out := make([]byte, 0, len(bs)+len(userPlaceholder))
40-
for schemeSepPos != -1 {
41-
schemeSepPos += 3 // skip the "://"
42-
sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host"
43-
sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
44-
sepLoop:
45-
for ; sepEndPos < len(bs); sepEndPos++ {
46-
c := bs[sepEndPos]
47-
if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') {
48-
continue
49-
}
42+
sepAtPos := strings.Index(s[sepColPos+1:], "@")
43+
for sepAtPos == -1 {
44+
return s // fast path: no "@" after colon, unlikely contain any URL credential
45+
}
46+
sepAtPos += sepColPos + 1
47+
48+
res := make([]byte, 0, len(s)+len(userInfoPlaceholder)) // a best guess to avoid too many re-allocations
49+
bs := UnsafeStringToBytes(s)
50+
for {
51+
// left part (before "@") is likely to be the "userinfo" (single username, or "username:password")
52+
leftPos := sepAtPos - 1
53+
leftLoop:
54+
for leftPos >= 0 {
55+
c := bs[leftPos]
5056
switch c {
51-
case '@':
52-
sepAtPos = sepEndPos
5357
case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%':
54-
continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
58+
// RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
59+
default:
60+
valid := 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9'
61+
if !valid {
62+
break leftLoop
63+
}
64+
}
65+
leftPos--
66+
}
67+
// left pos should point to the beginning of the left part, this pos is always valid in the buffer
68+
leftPos++
69+
70+
// right part is likely to be the host (domain name, ip address)
71+
rightPos := sepAtPos + 1
72+
rightLoop:
73+
for rightPos < len(bs) {
74+
c := bs[rightPos]
75+
switch c {
76+
case '.', '-':
77+
// valid host char
78+
case '[':
79+
// ipv6 begin
80+
if rightPos != sepAtPos+1 {
81+
break rightLoop
82+
}
83+
case ']':
84+
// ipv6 end
85+
rightPos++
86+
break rightLoop
5587
default:
56-
break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
88+
valid := 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9'
89+
if bs[sepAtPos+1] == '[' {
90+
// ipv6 host
91+
valid = 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' || '0' <= c && c <= '9' || c == ':'
92+
}
93+
if !valid {
94+
break rightLoop
95+
}
5796
}
97+
rightPos++
5898
}
59-
// if there is '@', and the string is like "s://u@h", then hide the "u" part
60-
if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 {
61-
out = append(out, bs[:schemeSepPos]...)
62-
out = append(out, userPlaceholder...)
63-
out = append(out, bs[sepAtPos:sepEndPos]...)
99+
100+
leading, leftPart, rightPart := bs[:leftPos], bs[leftPos:sepAtPos], bs[sepAtPos+1:rightPos]
101+
102+
// Either:
103+
// * git log message: "user:pass@host" (it contains a colon in userinfo), ignore "git@host" pattern
104+
// * http like URL: "https://userinfo@host.com" (it has "://" before the userinfo)
105+
needSanitize := bytes.IndexByte(leftPart, ':') >= 0 || bytes.HasSuffix(leading, schemeSep)
106+
needSanitize = needSanitize && len(leftPart) > 0 && len(rightPart) > 0
107+
// TODO: can also do more checks for right part
108+
// for example: ipv6 quick check
109+
if needSanitize && rightPart[0] == '[' {
110+
needSanitize = rightPart[len(rightPart)-1] == ']' && net.ParseIP(UnsafeBytesToString(rightPart[1:len(rightPart)-1])) != nil
111+
}
112+
if needSanitize {
113+
res = append(res, leading...)
114+
res = append(res, userInfoPlaceholder...)
115+
res = append(res, '@')
116+
res = append(res, rightPart...)
64117
} else {
65-
out = append(out, bs[:sepEndPos]...)
118+
res = append(res, bs[:rightPos]...)
119+
}
120+
bs = bs[rightPos:]
121+
sepAtPos = bytes.IndexByte(bs, '@')
122+
if sepAtPos == -1 {
123+
break
66124
}
67-
bs = bs[sepEndPos:]
68-
schemeSepPos = bytes.Index(bs, schemeSep)
69125
}
70-
out = append(out, bs...)
71-
return UnsafeBytesToString(out)
126+
res = append(res, bs...)
127+
return UnsafeBytesToString(res)
72128
}

modules/util/sanitize_test.go

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import (
1313
func TestSanitizeErrorCredentialURLs(t *testing.T) {
1414
err := errors.New("error with https://a@b.com")
1515
se := SanitizeErrorCredentialURLs(err)
16-
assert.Equal(t, "error with https://"+userPlaceholder+"@b.com", se.Error())
16+
assert.Equal(t, "error with https://"+userInfoPlaceholder+"@b.com", se.Error())
1717
}
1818

1919
func TestSanitizeCredentialURLs(t *testing.T) {
@@ -27,43 +27,76 @@ func TestSanitizeCredentialURLs(t *testing.T) {
2727
},
2828
{
2929
"https://mytoken@github.com/go-gitea/test_repo.git",
30-
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
30+
"https://" + userInfoPlaceholder + "@github.com/go-gitea/test_repo.git",
3131
},
3232
{
3333
"https://user:password@github.com/go-gitea/test_repo.git",
34-
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
34+
"https://" + userInfoPlaceholder + "@github.com/go-gitea/test_repo.git",
3535
},
3636
{
37+
"https://user:password@[::]/go-gitea/test_repo.git",
38+
"https://" + userInfoPlaceholder + "@[::]/go-gitea/test_repo.git",
39+
},
40+
{
41+
"https://user:password@[2001:db8::1]:8080/go-gitea/test_repo.git",
42+
"https://" + userInfoPlaceholder + "@[2001:db8::1]:8080/go-gitea/test_repo.git",
43+
},
44+
{
45+
"see https://u:p@[::1]/x and https://u2:p2@h2",
46+
"see https://" + userInfoPlaceholder + "@[::1]/x and https://" + userInfoPlaceholder + "@h2",
47+
},
48+
{
49+
"https://user:secret@[unclosed-ipv6",
50+
"https://user:secret@[unclosed-ipv6",
51+
},
52+
{
53+
"https://user:secret@[invalid-ipv6]",
54+
"https://user:secret@[invalid-ipv6]",
55+
},
56+
{
57+
"ftp://x@",
3758
"ftp://x@",
38-
"ftp://" + userPlaceholder + "@",
3959
},
4060
{
4161
"ftp://x/@",
4262
"ftp://x/@",
4363
},
4464
{
4565
"ftp://u@x/@", // test multiple @ chars
46-
"ftp://" + userPlaceholder + "@x/@",
66+
"ftp://" + userInfoPlaceholder + "@x/@",
4767
},
4868
{
4969
"😊ftp://u@x😊", // test unicode
50-
"😊ftp://" + userPlaceholder + "@x😊",
70+
"😊ftp://" + userInfoPlaceholder + "@x😊",
5171
},
5272
{
5373
"://@",
5474
"://@",
5575
},
5676
{
57-
"//u:p@h", // do not process URLs without explicit scheme, they are not treated as "valid" URLs because there is no scheme context in string
5877
"//u:p@h",
78+
"//" + userInfoPlaceholder + "@h",
5979
},
6080
{
61-
"s://u@h", // the minimal pattern to be sanitized
62-
"s://" + userPlaceholder + "@h",
81+
"s://u@h",
82+
"s://" + userInfoPlaceholder + "@h",
6383
},
6484
{
6585
"URLs in log https://u:b@h and https://u:b@h:80/, with https://h.com and u@h.com",
66-
"URLs in log https://" + userPlaceholder + "@h and https://" + userPlaceholder + "@h:80/, with https://h.com and u@h.com",
86+
"URLs in log https://" + userInfoPlaceholder + "@h and https://" + userInfoPlaceholder + "@h:80/, with https://h.com and u@h.com",
87+
},
88+
{
89+
"fatal: unable to look up username:token@github.com (port 9418)",
90+
"fatal: unable to look up " + userInfoPlaceholder + "@github.com (port 9418)",
91+
},
92+
{
93+
"git failed for user:token@github.com/go-gitea/test_repo.git",
94+
"git failed for " + userInfoPlaceholder + "@github.com/go-gitea/test_repo.git",
95+
},
96+
{
97+
// SSH-form git URL ("git@host:path") must not let a later credential URL through
98+
"failed remote git@github.com:foo, retried via https://user:tok@github.com/foo",
99+
"failed remote git@github.com:foo, retried via https://" + userInfoPlaceholder + "@github.com/foo",
67100
},
68101
}
69102

services/migrations/migrate.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ func migrateRepository(ctx context.Context, doer *user_model.User, downloader ba
218218
// We don't actually need to check the OriginalURL as it isn't used anywhere
219219
}
220220

221-
log.Trace("migrating git data from %s", repo.CloneURL)
221+
log.Trace("migrating git data from %s", util.SanitizeCredentialURLs(repo.CloneURL))
222222
messenger("repo.migrate.migrating_git")
223223
if err = uploader.CreateRepo(ctx, repo, opts); err != nil {
224224
return err

0 commit comments

Comments
 (0)