Skip to content

bpo-46659: Fix the MBCS codec alias on Windows #31218

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions Lib/encodings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,14 @@ def search_function(encoding):
# Return the registry entry
return entry

# Register the search_function in the Python codec registry
codecs.register(search_function)

if sys.platform == 'win32':
# bpo-671666, bpo-46668: If Python does not implement a codec for current
# Windows ANSI code page, use the "mbcs" codec instead:
# WideCharToMultiByte() and MultiByteToWideChar() functions with CP_ACP.
# Python does not support custom code pages.
def _alias_mbcs(encoding):
try:
import _winapi
Expand All @@ -164,8 +171,4 @@ def _alias_mbcs(encoding):
# Imports may fail while we are shutting down
pass

# It must be registered before search_function()
codecs.register(_alias_mbcs)

# Register the search_function in the Python codec registry
codecs.register(search_function)
17 changes: 10 additions & 7 deletions Lib/test/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3191,13 +3191,16 @@ def test_incremental(self):
self.assertEqual(decoded, ('abc', 3))

def test_mbcs_alias(self):
# On Windows, the encoding name must be the ANSI code page
encoding = locale.getpreferredencoding(False)
self.assertTrue(encoding.startswith('cp'), encoding)

# The encodings module create a "mbcs" alias to the ANSI code page
codec = codecs.lookup(encoding)
self.assertEqual(codec.name, "mbcs")
# Check that looking up our 'default' codepage will return
# mbcs when we don't have a more specific one available
code_page = 99_999
name = f'cp{code_page}'
with mock.patch('_winapi.GetACP', return_value=code_page):
try:
codec = codecs.lookup(name)
self.assertEqual(codec.name, 'mbcs')
finally:
codecs.unregister(name)

@support.bigmemtest(size=2**31, memuse=7, dry_run=False)
def test_large_input(self, size):
Expand Down