Skip to content

Commit 400a1ce

Browse files
gh-108590: Fix sqlite3.iterdump for invalid Unicode in TEXT columns (#108657)
Co-authored-by: Erlend E. Aasland <[email protected]>
1 parent 210a5d7 commit 400a1ce

File tree

3 files changed

+41
-2
lines changed

3 files changed

+41
-2
lines changed

Lib/sqlite3/dump.py

+25-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
# future enhancements, you should normally quote any identifier that
88
# is an English language word, even if you do not have to."
99

10+
11+
from contextlib import contextmanager
12+
13+
1014
def _quote_name(name):
1115
return '"{0}"'.format(name.replace('"', '""'))
1216

@@ -15,6 +19,24 @@ def _quote_value(value):
1519
return "'{0}'".format(value.replace("'", "''"))
1620

1721

22+
def _force_decode(bs, *args, **kwargs):
23+
# gh-108590: Don't fail if the database contains invalid Unicode data.
24+
try:
25+
return bs.decode(*args, **kwargs)
26+
except UnicodeDecodeError:
27+
return "".join([chr(c) for c in bs])
28+
29+
30+
@contextmanager
31+
def _text_factory(con, factory):
32+
saved_factory = con.text_factory
33+
con.text_factory = factory
34+
try:
35+
yield
36+
finally:
37+
con.text_factory = saved_factory
38+
39+
1840
def _iterdump(connection):
1941
"""
2042
Returns an iterator to the dump of the database in an SQL text format.
@@ -74,8 +96,9 @@ def _iterdump(connection):
7496
)
7597
)
7698
query_res = cu.execute(q)
77-
for row in query_res:
78-
yield("{0};".format(row[0]))
99+
with _text_factory(connection, bytes):
100+
for row in query_res:
101+
yield("{0};".format(_force_decode(row[0])))
79102

80103
# Now when the type is 'index', 'trigger', or 'view'
81104
q = """

Lib/test/test_sqlite3/test_dump.py

+15
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,21 @@ def test_dump_virtual_tables(self):
133133
actual = list(self.cx.iterdump())
134134
self.assertEqual(expected, actual)
135135

136+
def test_dump_unicode_invalid(self):
137+
# gh-108590
138+
expected = [
139+
"BEGIN TRANSACTION;",
140+
"CREATE TABLE foo (data TEXT);",
141+
"INSERT INTO \"foo\" VALUES('a\x9f');",
142+
"COMMIT;",
143+
]
144+
self.cu.executescript("""
145+
CREATE TABLE foo (data TEXT);
146+
INSERT INTO foo VALUES (CAST(X'619f' AS TEXT));
147+
""")
148+
actual = list(self.cx.iterdump())
149+
self.assertEqual(expected, actual)
150+
136151

137152
if __name__ == "__main__":
138153
unittest.main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed an issue where :meth:`sqlite3.Connection.iterdump` would fail and leave an incomplete SQL dump if a table includes invalid Unicode sequences. Patch by Corvin McPherson

0 commit comments

Comments
 (0)