diff --git a/CHANGELOG.md b/CHANGELOG.md index 6280b2c66..f008a5773 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ Changes: New Features: + - Setting the connection collation is possible with the `collation` DSN parameter. This parameter should be preferred over the `charset` parameter - Logging of critical errors is configurable with `SetLogger` - Google CloudSQL support diff --git a/README.md b/README.md index cd64bfdb6..fd1a5154f 100644 --- a/README.md +++ b/README.md @@ -141,8 +141,22 @@ Valid Values: Default: none ``` -Sets the charset used for client-server interaction (`"SET NAMES "`). If multiple charsets are set (separated by a comma), the following charset is used if setting the charset failes. This enables support for `utf8mb4` ([introduced in MySQL 5.5.3](http://dev.mysql.com/doc/refman/5.5/en/charset-unicode-utf8mb4.html)) with fallback to `utf8` for older servers (`charset=utf8mb4,utf8`). +Sets the charset used for client-server interaction (`"SET NAMES "`). If multiple charsets are set (separated by a comma), the following charset is used if setting the charset failes. This enables for example support for `utf8mb4` ([introduced in MySQL 5.5.3](http://dev.mysql.com/doc/refman/5.5/en/charset-unicode-utf8mb4.html)) with fallback to `utf8` for older servers (`charset=utf8mb4,utf8`). +Usage of the `charset` parameter is discouraged because it issues additional queries to the server. +Unless you need the fallback behavior, please use `collation` instead. + +##### `collation` + +``` +Type: string +Valid Values: +Default: utf8_general_ci +``` + +Sets the collation used for client-server interaction on connection. In contrast to `charset`, `collation` does not issue additional queries. If the specified collation is unavailable on the target server, the connection will fail. + +A list of valid charsets for a server is retrievable with `SHOW COLLATION`. ##### `clientFoundRows` @@ -243,7 +257,7 @@ user:password@/dbname?strict=true&sql_notes=false TCP via IPv6: ``` -user:password@tcp([de:ad:be:ef::ca:fe]:80)/dbname?timeout=90s +user:password@tcp([de:ad:be:ef::ca:fe]:80)/dbname?timeout=90s&collation=utf8mb4_unicode_ci ``` TCP on a remote host, e.g. Amazon RDS: @@ -295,7 +309,11 @@ Alternatively you can use the [`NullTime`](http://godoc.org/github.com/go-sql-dr ### Unicode support -Since version 1.1 Go-MySQL-Driver automatically uses the collation `utf8_general_ci` by default. Adding `&charset=utf8` (alias for `SET NAMES utf8`) to the DSN is not necessary anymore in most cases. +Since version 1.1 Go-MySQL-Driver automatically uses the collation `utf8_general_ci` by default. + +Other collations / charsets can be set using the [`collation`](#collation) DSN parameter. + +Version 1.0 of the driver recommended adding `&charset=utf8` (alias for `SET NAMES utf8`) to the DSN to enable proper UTF-8 support. This is not necessary anymore. The [`collation`](#collation) parameter should be preferred to set another collation / charset than the default. See http://dev.mysql.com/doc/refman/5.7/en/charset-unicode.html for more details on MySQL's Unicode support. diff --git a/collations.go b/collations.go new file mode 100644 index 000000000..aabe0055d --- /dev/null +++ b/collations.go @@ -0,0 +1,236 @@ +// Go MySQL Driver - A MySQL-Driver for Go's database/sql package +// +// Copyright 2014 The Go-MySQL-Driver Authors. All rights reserved. +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at http://mozilla.org/MPL/2.0/. + +package mysql + +const defaultCollation byte = 33 // utf8_general_ci + +// A list of available collations mapped to the internal ID. +// To update this map use the following MySQL query: +// SELECT COLLATION_NAME, ID FROM information_schema.COLLATIONS +var collations = map[string]byte{ + "big5_chinese_ci": 1, + "latin2_czech_cs": 2, + "dec8_swedish_ci": 3, + "cp850_general_ci": 4, + "latin1_german1_ci": 5, + "hp8_english_ci": 6, + "koi8r_general_ci": 7, + "latin1_swedish_ci": 8, + "latin2_general_ci": 9, + "swe7_swedish_ci": 10, + "ascii_general_ci": 11, + "ujis_japanese_ci": 12, + "sjis_japanese_ci": 13, + "cp1251_bulgarian_ci": 14, + "latin1_danish_ci": 15, + "hebrew_general_ci": 16, + "tis620_thai_ci": 18, + "euckr_korean_ci": 19, + "latin7_estonian_cs": 20, + "latin2_hungarian_ci": 21, + "koi8u_general_ci": 22, + "cp1251_ukrainian_ci": 23, + "gb2312_chinese_ci": 24, + "greek_general_ci": 25, + "cp1250_general_ci": 26, + "latin2_croatian_ci": 27, + "gbk_chinese_ci": 28, + "cp1257_lithuanian_ci": 29, + "latin5_turkish_ci": 30, + "latin1_german2_ci": 31, + "armscii8_general_ci": 32, + "utf8_general_ci": 33, + "cp1250_czech_cs": 34, + "ucs2_general_ci": 35, + "cp866_general_ci": 36, + "keybcs2_general_ci": 37, + "macce_general_ci": 38, + "macroman_general_ci": 39, + "cp852_general_ci": 40, + "latin7_general_ci": 41, + "latin7_general_cs": 42, + "macce_bin": 43, + "cp1250_croatian_ci": 44, + "utf8mb4_general_ci": 45, + "utf8mb4_bin": 46, + "latin1_bin": 47, + "latin1_general_ci": 48, + "latin1_general_cs": 49, + "cp1251_bin": 50, + "cp1251_general_ci": 51, + "cp1251_general_cs": 52, + "macroman_bin": 53, + "utf16_general_ci": 54, + "utf16_bin": 55, + "utf16le_general_ci": 56, + "cp1256_general_ci": 57, + "cp1257_bin": 58, + "cp1257_general_ci": 59, + "utf32_general_ci": 60, + "utf32_bin": 61, + "utf16le_bin": 62, + "binary": 63, + "armscii8_bin": 64, + "ascii_bin": 65, + "cp1250_bin": 66, + "cp1256_bin": 67, + "cp866_bin": 68, + "dec8_bin": 69, + "greek_bin": 70, + "hebrew_bin": 71, + "hp8_bin": 72, + "keybcs2_bin": 73, + "koi8r_bin": 74, + "koi8u_bin": 75, + "latin2_bin": 77, + "latin5_bin": 78, + "latin7_bin": 79, + "cp850_bin": 80, + "cp852_bin": 81, + "swe7_bin": 82, + "utf8_bin": 83, + "big5_bin": 84, + "euckr_bin": 85, + "gb2312_bin": 86, + "gbk_bin": 87, + "sjis_bin": 88, + "tis620_bin": 89, + "ucs2_bin": 90, + "ujis_bin": 91, + "geostd8_general_ci": 92, + "geostd8_bin": 93, + "latin1_spanish_ci": 94, + "cp932_japanese_ci": 95, + "cp932_bin": 96, + "eucjpms_japanese_ci": 97, + "eucjpms_bin": 98, + "cp1250_polish_ci": 99, + "utf16_unicode_ci": 101, + "utf16_icelandic_ci": 102, + "utf16_latvian_ci": 103, + "utf16_romanian_ci": 104, + "utf16_slovenian_ci": 105, + "utf16_polish_ci": 106, + "utf16_estonian_ci": 107, + "utf16_spanish_ci": 108, + "utf16_swedish_ci": 109, + "utf16_turkish_ci": 110, + "utf16_czech_ci": 111, + "utf16_danish_ci": 112, + "utf16_lithuanian_ci": 113, + "utf16_slovak_ci": 114, + "utf16_spanish2_ci": 115, + "utf16_roman_ci": 116, + "utf16_persian_ci": 117, + "utf16_esperanto_ci": 118, + "utf16_hungarian_ci": 119, + "utf16_sinhala_ci": 120, + "utf16_german2_ci": 121, + "utf16_croatian_ci": 122, + "utf16_unicode_520_ci": 123, + "utf16_vietnamese_ci": 124, + "ucs2_unicode_ci": 128, + "ucs2_icelandic_ci": 129, + "ucs2_latvian_ci": 130, + "ucs2_romanian_ci": 131, + "ucs2_slovenian_ci": 132, + "ucs2_polish_ci": 133, + "ucs2_estonian_ci": 134, + "ucs2_spanish_ci": 135, + "ucs2_swedish_ci": 136, + "ucs2_turkish_ci": 137, + "ucs2_czech_ci": 138, + "ucs2_danish_ci": 139, + "ucs2_lithuanian_ci": 140, + "ucs2_slovak_ci": 141, + "ucs2_spanish2_ci": 142, + "ucs2_roman_ci": 143, + "ucs2_persian_ci": 144, + "ucs2_esperanto_ci": 145, + "ucs2_hungarian_ci": 146, + "ucs2_sinhala_ci": 147, + "ucs2_german2_ci": 148, + "ucs2_croatian_ci": 149, + "ucs2_unicode_520_ci": 150, + "ucs2_vietnamese_ci": 151, + "ucs2_general_mysql500_ci": 159, + "utf32_unicode_ci": 160, + "utf32_icelandic_ci": 161, + "utf32_latvian_ci": 162, + "utf32_romanian_ci": 163, + "utf32_slovenian_ci": 164, + "utf32_polish_ci": 165, + "utf32_estonian_ci": 166, + "utf32_spanish_ci": 167, + "utf32_swedish_ci": 168, + "utf32_turkish_ci": 169, + "utf32_czech_ci": 170, + "utf32_danish_ci": 171, + "utf32_lithuanian_ci": 172, + "utf32_slovak_ci": 173, + "utf32_spanish2_ci": 174, + "utf32_roman_ci": 175, + "utf32_persian_ci": 176, + "utf32_esperanto_ci": 177, + "utf32_hungarian_ci": 178, + "utf32_sinhala_ci": 179, + "utf32_german2_ci": 180, + "utf32_croatian_ci": 181, + "utf32_unicode_520_ci": 182, + "utf32_vietnamese_ci": 183, + "utf8_unicode_ci": 192, + "utf8_icelandic_ci": 193, + "utf8_latvian_ci": 194, + "utf8_romanian_ci": 195, + "utf8_slovenian_ci": 196, + "utf8_polish_ci": 197, + "utf8_estonian_ci": 198, + "utf8_spanish_ci": 199, + "utf8_swedish_ci": 200, + "utf8_turkish_ci": 201, + "utf8_czech_ci": 202, + "utf8_danish_ci": 203, + "utf8_lithuanian_ci": 204, + "utf8_slovak_ci": 205, + "utf8_spanish2_ci": 206, + "utf8_roman_ci": 207, + "utf8_persian_ci": 208, + "utf8_esperanto_ci": 209, + "utf8_hungarian_ci": 210, + "utf8_sinhala_ci": 211, + "utf8_german2_ci": 212, + "utf8_croatian_ci": 213, + "utf8_unicode_520_ci": 214, + "utf8_vietnamese_ci": 215, + "utf8_general_mysql500_ci": 223, + "utf8mb4_unicode_ci": 224, + "utf8mb4_icelandic_ci": 225, + "utf8mb4_latvian_ci": 226, + "utf8mb4_romanian_ci": 227, + "utf8mb4_slovenian_ci": 228, + "utf8mb4_polish_ci": 229, + "utf8mb4_estonian_ci": 230, + "utf8mb4_spanish_ci": 231, + "utf8mb4_swedish_ci": 232, + "utf8mb4_turkish_ci": 233, + "utf8mb4_czech_ci": 234, + "utf8mb4_danish_ci": 235, + "utf8mb4_lithuanian_ci": 236, + "utf8mb4_slovak_ci": 237, + "utf8mb4_spanish2_ci": 238, + "utf8mb4_roman_ci": 239, + "utf8mb4_persian_ci": 240, + "utf8mb4_esperanto_ci": 241, + "utf8mb4_hungarian_ci": 242, + "utf8mb4_sinhala_ci": 243, + "utf8mb4_german2_ci": 244, + "utf8mb4_croatian_ci": 245, + "utf8mb4_unicode_520_ci": 246, + "utf8mb4_vietnamese_ci": 247, +} diff --git a/connection.go b/connection.go index 53b106a74..45467afb7 100644 --- a/connection.go +++ b/connection.go @@ -39,14 +39,15 @@ type config struct { dbname string params map[string]string loc *time.Location - timeout time.Duration tls *tls.Config + timeout time.Duration + collation uint8 allowAllFiles bool allowOldPasswords bool clientFoundRows bool } -// Handles parameters set in DSN +// Handles parameters set in DSN after the connection is established func (mc *mysqlConn) handleParams() (err error) { for param, val := range mc.cfg.params { switch param { diff --git a/const.go b/const.go index eb014d57d..379eabec1 100644 --- a/const.go +++ b/const.go @@ -130,13 +130,3 @@ const ( flagUnknown3 flagUnknown4 ) - -const ( - collation_ascii_general_ci byte = 11 - collation_utf8_general_ci byte = 33 - collation_utf8mb4_general_ci byte = 45 - collation_utf8mb4_bin byte = 46 - collation_latin1_general_ci byte = 48 - collation_binary byte = 63 - collation_utf8mb4_unicode_ci byte = 224 -) diff --git a/driver_test.go b/driver_test.go index 115aa05ac..59469785f 100644 --- a/driver_test.go +++ b/driver_test.go @@ -938,6 +938,44 @@ func TestFailingCharset(t *testing.T) { }) } +func TestCollation(t *testing.T) { + if !available { + t.Skipf("MySQL-Server not running on %s", netAddr) + } + + defaultCollation := "utf8_general_ci" + testCollations := []string{ + "", // do not set + defaultCollation, // driver default + "latin1_general_ci", + "binary", + "utf8_unicode_ci", + "utf8mb4_general_ci", + } + + for _, collation := range testCollations { + var expected, tdsn string + if collation != "" { + tdsn = dsn + "&collation=" + collation + expected = collation + } else { + tdsn = dsn + expected = defaultCollation + } + + runTests(t, tdsn, func(dbt *DBTest) { + var got string + if err := dbt.db.QueryRow("SELECT @@collation_connection").Scan(&got); err != nil { + dbt.Fatal(err) + } + + if got != expected { + dbt.Fatalf("Expected connection collation %s but got %s", expected, got) + } + }) + } +} + func TestRawBytesResultExceedsBuffer(t *testing.T) { runTests(t, dsn, func(dbt *DBTest) { // defaultBufSize from buffer.go diff --git a/packets.go b/packets.go index 8d98d9253..618098146 100644 --- a/packets.go +++ b/packets.go @@ -257,7 +257,7 @@ func (mc *mysqlConn) writeAuthPacket(cipher []byte) error { data[11] = 0x00 // Charset [1 byte] - data[12] = collation_utf8_general_ci + data[12] = mc.cfg.collation // SSL Connection Request Packet // http://dev.mysql.com/doc/internals/en/connection-phase-packets.html#packet-Protocol::SSLRequest diff --git a/utils.go b/utils.go index b5100e465..b6f200389 100644 --- a/utils.go +++ b/utils.go @@ -72,7 +72,11 @@ func DeregisterTLSConfig(key string) { // parseDSN parses the DSN string to a config func parseDSN(dsn string) (cfg *config, err error) { - cfg = new(config) + // New config with some default values + cfg = &config{ + loc: time.UTC, + collation: defaultCollation, + } // TODO: use strings.IndexByte when we can depend on Go 1.2 @@ -160,11 +164,6 @@ func parseDSN(dsn string) (cfg *config, err error) { } - // Set default location if empty - if cfg.loc == nil { - cfg.loc = time.UTC - } - return } @@ -188,22 +187,35 @@ func parseDSNParams(cfg *config, params string) (err error) { return fmt.Errorf("Invalid Bool value: %s", value) } - // Switch "rowsAffected" mode - case "clientFoundRows": + // Use old authentication mode (pre MySQL 4.1) + case "allowOldPasswords": var isBool bool - cfg.clientFoundRows, isBool = readBool(value) + cfg.allowOldPasswords, isBool = readBool(value) if !isBool { return fmt.Errorf("Invalid Bool value: %s", value) } - // Use old authentication mode (pre MySQL 4.1) - case "allowOldPasswords": + // Switch "rowsAffected" mode + case "clientFoundRows": var isBool bool - cfg.allowOldPasswords, isBool = readBool(value) + cfg.clientFoundRows, isBool = readBool(value) if !isBool { return fmt.Errorf("Invalid Bool value: %s", value) } + // Collation + case "collation": + collation, ok := collations[value] + if !ok { + // Note possibility for false negatives: + // could be triggered although the collation is valid if the + // collations map does not contain entries the server supports. + err = errors.New("unknown collation") + return + } + cfg.collation = collation + break + // Time Location case "loc": if value, err = url.QueryUnescape(value); err != nil { diff --git a/utils_test.go b/utils_test.go index baf2b8c26..301d81a62 100644 --- a/utils_test.go +++ b/utils_test.go @@ -21,18 +21,18 @@ var testDSNs = []struct { out string loc *time.Location }{ - {"username:password@protocol(address)/dbname?param=value", "&{user:username passwd:password net:protocol addr:address dbname:dbname params:map[param:value] loc:%p timeout:0 tls: allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, - {"user@unix(/path/to/socket)/dbname?charset=utf8", "&{user:user passwd: net:unix addr:/path/to/socket dbname:dbname params:map[charset:utf8] loc:%p timeout:0 tls: allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, - {"user:password@tcp(localhost:5555)/dbname?charset=utf8&tls=true", "&{user:user passwd:password net:tcp addr:localhost:5555 dbname:dbname params:map[charset:utf8] loc:%p timeout:0 tls: allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, - {"user:password@tcp(localhost:5555)/dbname?charset=utf8mb4,utf8&tls=skip-verify", "&{user:user passwd:password net:tcp addr:localhost:5555 dbname:dbname params:map[charset:utf8mb4,utf8] loc:%p timeout:0 tls: allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, - {"user:password@/dbname?loc=UTC&timeout=30s&allowAllFiles=1&clientFoundRows=true&allowOldPasswords=TRUE", "&{user:user passwd:password net:tcp addr:127.0.0.1:3306 dbname:dbname params:map[] loc:%p timeout:30000000000 tls: allowAllFiles:true allowOldPasswords:true clientFoundRows:true}", time.UTC}, - {"user:p@ss(word)@tcp([de:ad:be:ef::ca:fe]:80)/dbname?loc=Local", "&{user:user passwd:p@ss(word) net:tcp addr:[de:ad:be:ef::ca:fe]:80 dbname:dbname params:map[] loc:%p timeout:0 tls: allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.Local}, - {"/dbname", "&{user: passwd: net:tcp addr:127.0.0.1:3306 dbname:dbname params:map[] loc:%p timeout:0 tls: allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, - {"@/", "&{user: passwd: net:tcp addr:127.0.0.1:3306 dbname: params:map[] loc:%p timeout:0 tls: allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, - {"/", "&{user: passwd: net:tcp addr:127.0.0.1:3306 dbname: params:map[] loc:%p timeout:0 tls: allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, - {"", "&{user: passwd: net:tcp addr:127.0.0.1:3306 dbname: params:map[] loc:%p timeout:0 tls: allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, - {"user:p@/ssword@/", "&{user:user passwd:p@/ssword net:tcp addr:127.0.0.1:3306 dbname: params:map[] loc:%p timeout:0 tls: allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, - {"unix/?arg=%2Fsome%2Fpath.ext", "&{user: passwd: net:unix addr:/tmp/mysql.sock dbname: params:map[arg:/some/path.ext] loc:%p timeout:0 tls: allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, + {"username:password@protocol(address)/dbname?param=value", "&{user:username passwd:password net:protocol addr:address dbname:dbname params:map[param:value] loc:%p tls: timeout:0 collation:33 allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, + {"user@unix(/path/to/socket)/dbname?charset=utf8", "&{user:user passwd: net:unix addr:/path/to/socket dbname:dbname params:map[charset:utf8] loc:%p tls: timeout:0 collation:33 allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, + {"user:password@tcp(localhost:5555)/dbname?charset=utf8&tls=true", "&{user:user passwd:password net:tcp addr:localhost:5555 dbname:dbname params:map[charset:utf8] loc:%p tls: timeout:0 collation:33 allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, + {"user:password@tcp(localhost:5555)/dbname?charset=utf8mb4,utf8&tls=skip-verify", "&{user:user passwd:password net:tcp addr:localhost:5555 dbname:dbname params:map[charset:utf8mb4,utf8] loc:%p tls: timeout:0 collation:33 allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, + {"user:password@/dbname?loc=UTC&timeout=30s&allowAllFiles=1&clientFoundRows=true&allowOldPasswords=TRUE&collation=utf8mb4_unicode_ci", "&{user:user passwd:password net:tcp addr:127.0.0.1:3306 dbname:dbname params:map[] loc:%p tls: timeout:30000000000 collation:224 allowAllFiles:true allowOldPasswords:true clientFoundRows:true}", time.UTC}, + {"user:p@ss(word)@tcp([de:ad:be:ef::ca:fe]:80)/dbname?loc=Local", "&{user:user passwd:p@ss(word) net:tcp addr:[de:ad:be:ef::ca:fe]:80 dbname:dbname params:map[] loc:%p tls: timeout:0 collation:33 allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.Local}, + {"/dbname", "&{user: passwd: net:tcp addr:127.0.0.1:3306 dbname:dbname params:map[] loc:%p tls: timeout:0 collation:33 allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, + {"@/", "&{user: passwd: net:tcp addr:127.0.0.1:3306 dbname: params:map[] loc:%p tls: timeout:0 collation:33 allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, + {"/", "&{user: passwd: net:tcp addr:127.0.0.1:3306 dbname: params:map[] loc:%p tls: timeout:0 collation:33 allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, + {"", "&{user: passwd: net:tcp addr:127.0.0.1:3306 dbname: params:map[] loc:%p tls: timeout:0 collation:33 allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, + {"user:p@/ssword@/", "&{user:user passwd:p@/ssword net:tcp addr:127.0.0.1:3306 dbname: params:map[] loc:%p tls: timeout:0 collation:33 allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, + {"unix/?arg=%2Fsome%2Fpath.ext", "&{user: passwd: net:unix addr:/tmp/mysql.sock dbname: params:map[arg:/some/path.ext] loc:%p tls: timeout:0 collation:33 allowAllFiles:false allowOldPasswords:false clientFoundRows:false}", time.UTC}, } func TestDSNParser(t *testing.T) {