Add UTF-32 hack for iODBC + Teradata.

mkleehammer · mkleehammer · commit 297bfd9c0f0d · 2017-02-16T15:50:32.000-06:00
I'm not sure if the issue is iODBC (which Apple doesn't ship anymore) is using a 4-byte SQLWCHAR or if Teradata is returning UCS4/UTF-32LE for column names in SQLDescribeColW. Either way I've added a hack that assumes 4-byte chars if the decoding for SQL_WMETADATA (a pyodbc specific constant) is set to any of the UTF-32 encodings. The encoding is then actually used. cnxn.setdecoding(pyodbc.SQL_WMETADATA, encoding='utf-32le') Fixes #194
diff --git a/src/connection.cpp b/src/connection.cpp
@@ -1146,6 +1146,21 @@ static bool SetTextEncCommon(TextEnc& enc, const char* encoding, int ctype, bool
         enc.optenc = OPTENC_UTF16LE;
         enc.ctype  = (SQLSMALLINT)(ctype ? ctype : SQL_C_WCHAR);
     }
+    else if (strstr("|utf-32|utf32|", lower))
+    {
+        enc.optenc = OPTENC_UTF32;
+        enc.ctype  = (SQLSMALLINT)(ctype ? ctype : SQL_C_WCHAR);
+    }
+    else if (strstr("|utf-32-be|utf-32be|utf32be|", lower))
+    {
+        enc.optenc = OPTENC_UTF32BE;
+        enc.ctype  = (SQLSMALLINT)(ctype ? ctype : SQL_C_WCHAR);
+    }
+    else if (strstr("|utf-32-le|utf-32le|utf32le|", lower))
+    {
+        enc.optenc = OPTENC_UTF32LE;
+        enc.ctype  = (SQLSMALLINT)(ctype ? ctype : SQL_C_WCHAR);
+    }
     else if (strstr("|latin-1|latin1|iso-8859-1|iso8859-1|", lower))
     {
         enc.optenc = OPTENC_LATIN1;
diff --git a/src/cursor.cpp b/src/cursor.cpp
@@ -178,10 +178,28 @@ static bool create_name_map(Cursor* cur, SQLSMALLINT field_count, bool lower)
             goto done;
         }
 
+        const TextEnc& enc = cur->cnxn->metadata_enc;
+
+        // HACK: I don't know the exact issue, but iODBC + Teradata results in either UCS4 data
+        // or 4-byte SQLWCHAR.  I'm going to use UTF-32 as an indication that's what we have.
+
+        Py_ssize_t cbName = cchName;
+        switch (enc.optenc)
+        {
+        case OPTENC_UTF32:
+        case OPTENC_UTF32LE:
+        case OPTENC_UTF32BE:
+            cbName *= 4;
+            break;
+        default:
+            if (enc.ctype == SQL_C_WCHAR)
+                cbName *= 2;
+            break;
+        }
+
         TRACE("Col %d: type=%s (%d) colsize=%d\n", (i+1), SqlTypeName(nDataType), (int)nDataType, (int)nColSize);
 
-        const TextEnc& enc = cur->cnxn->metadata_enc;
-        Object name(TextBufferToObject(enc, szName, (Py_ssize_t)(cchName * sizeof(ODBCCHAR))));
+        Object name(TextBufferToObject(enc, szName, cbName));
 
         if (!name)
             goto done;
diff --git a/src/textenc.h b/src/textenc.h
@@ -14,6 +14,9 @@ enum {
     OPTENC_UTF16BE = 4,
     OPTENC_UTF16LE = 5,
     OPTENC_LATIN1  = 6,
+    OPTENC_UTF32   = 7,
+    OPTENC_UTF32LE = 8,
+    OPTENC_UTF32BE = 9,
 
 #if PY_MAJOR_VERSION < 3
     TO_UNICODE = 1,