go-gitea · zeripath · Aug 23, 2020
diff --git a/modules/charset/charset_test.go b/modules/charset/charset_test.go
@@ -5,13 +5,30 @@
 package charset
 
 import (
+	"strings"
 	"testing"
 
 	"code.gitea.io/gitea/modules/setting"
 
 	"github.com/stretchr/testify/assert"
 )
 
+func resetDefaultCharsetsOrder() {
+	defaultDetectedCharsetsOrder := make([]string, 0, len(setting.Repository.DetectedCharsetsOrder))
+	for _, charset := range setting.Repository.DetectedCharsetsOrder {
+		defaultDetectedCharsetsOrder = append(defaultDetectedCharsetsOrder, strings.ToLower(strings.TrimSpace(charset)))
+	}
+	setting.Repository.DetectedCharsetScore = map[string]int{}
+	i := 0
+	for _, charset := range defaultDetectedCharsetsOrder {
+		canonicalCharset := strings.ToLower(strings.TrimSpace(charset))
+		if _, has := setting.Repository.DetectedCharsetScore[canonicalCharset]; !has {
+			setting.Repository.DetectedCharsetScore[canonicalCharset] = i
+			i++
+		}
+	}
+}
+
 func TestRemoveBOMIfPresent(t *testing.T) {
 	res := RemoveBOMIfPresent([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})
 	assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
@@ -21,6 +38,7 @@ func TestRemoveBOMIfPresent(t *testing.T) {
 }
 
 func TestToUTF8WithErr(t *testing.T) {
+	resetDefaultCharsetsOrder()
 	var res string
 	var err error
 
@@ -76,6 +94,7 @@ func TestToUTF8WithErr(t *testing.T) {
 }
 
 func TestToUTF8WithFallback(t *testing.T) {
+	resetDefaultCharsetsOrder()
 	// "ABC"
 	res := ToUTF8WithFallback([]byte{0x41, 0x42, 0x43})
 	assert.Equal(t, []byte{0x41, 0x42, 0x43}, res)
@@ -116,7 +135,7 @@ func TestToUTF8WithFallback(t *testing.T) {
 }
 
 func TestToUTF8(t *testing.T) {
-
+	resetDefaultCharsetsOrder()
 	// Note: golang compiler seems so behave differently depending on the current
 	// locale, so some conversions might behave differently. For that reason, we don't
 	// depend on particular conversions but in expected behaviors.
@@ -165,6 +184,7 @@ func TestToUTF8(t *testing.T) {
 }
 
 func TestToUTF8DropErrors(t *testing.T) {
+	resetDefaultCharsetsOrder()
 	// "ABC"
 	res := ToUTF8DropErrors([]byte{0x41, 0x42, 0x43})
 	assert.Equal(t, []byte{0x41, 0x42, 0x43}, res)
@@ -204,6 +224,7 @@ func TestToUTF8DropErrors(t *testing.T) {
 }
 
 func TestDetectEncoding(t *testing.T) {
+	resetDefaultCharsetsOrder()
 	testSuccess := func(b []byte, expected string) {
 		encoding, err := DetectEncoding(b)
 		assert.NoError(t, err)
@@ -225,10 +246,7 @@ func TestDetectEncoding(t *testing.T) {
 	b = []byte{0x44, 0xe9, 0x63, 0x6f, 0x72, 0x0a}
 	encoding, err := DetectEncoding(b)
 	assert.NoError(t, err)
-	// due to a race condition in `chardet` library, it could either detect
-	// "ISO-8859-1" or "IS0-8859-2" here. Technically either is correct, so
-	// we accept either.
-	assert.Contains(t, encoding, "ISO-8859")
+	assert.Contains(t, encoding, "ISO-8859-1")
 
 	setting.Repository.AnsiCharset = "placeholder"
 	testSuccess(b, "placeholder")