Skip to content

Parametrized values containing non-ascii bytes break cache #1031

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 23, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@
- Fix issue #766 by removing documentation references to distutils.
Thanks Russel Winder.

- Fix issue #1030: now byte-strings are escaped to produce item node ids
to make them always serializable.
Thanks Andy Freeland for the report and Bruno Oliveira for the PR.

- Python 2: if unicode parametrized values are convertible to ascii, their
ascii representation is used for the node id.

- Fix issue #411: Add __eq__ method to assertion comparison example.
Thanks Ben Webb.

Expand Down
44 changes: 43 additions & 1 deletion _pytest/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
# The type of re.compile objects is not exposed in Python.
REGEX_TYPE = type(re.compile(''))

_PY3 = sys.version_info > (3, 0)
_PY2 = not _PY3


if hasattr(inspect, 'signature'):
def _format_args(func):
Expand Down Expand Up @@ -1037,6 +1040,35 @@ def addcall(self, funcargs=None, id=_notexists, param=_notexists):
self._calls.append(cs)


if _PY3:
def _escape_bytes(val):
"""
If val is pure ascii, returns it as a str(), otherwise escapes
into a sequence of escaped bytes:
b'\xc3\xb4\xc5\xd6' -> u'\\xc3\\xb4\\xc5\\xd6'

note:
the obvious "v.decode('unicode-escape')" will return
valid utf-8 unicode if it finds them in the string, but we
want to return escaped bytes for any byte, even if they match
a utf-8 string.
"""
# source: http://goo.gl/bGsnwC
import codecs
encoded_bytes, _ = codecs.escape_encode(val)
return encoded_bytes.decode('ascii')
else:
def _escape_bytes(val):
"""
In py2 bytes and str are the same, so return it unchanged if it
is a full ascii string, otherwise escape it into its binary form.
"""
try:
return val.encode('ascii')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nicoddemus encode/decode mismatch

also the kind of code that begs for unit-tests to run across all code paths

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, thanks for tracking this down.

Some unit-tests were written for this (take a look at test_idmaker_native_strings), you think adding more values in there that demonstrate this problem is enough, or some other tests should be written?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

directly for _escape_bytes, the idea is to properly run trough all code-paths of that function
so we don't have to exercise the code paths via the idmaker

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I reached the same conclusion after I posted my comment and thought about it a little more. I will start working on this now. 😅

except UnicodeDecodeError:
return val.encode('string-escape')


def _idval(val, argname, idx, idfn):
if idfn:
try:
Expand All @@ -1046,14 +1078,24 @@ def _idval(val, argname, idx, idfn):
except Exception:
pass

if isinstance(val, (float, int, str, bool, NoneType)):
if isinstance(val, bytes):
return _escape_bytes(val)
elif isinstance(val, (float, int, str, bool, NoneType)):
return str(val)
elif isinstance(val, REGEX_TYPE):
return val.pattern
elif enum is not None and isinstance(val, enum.Enum):
return str(val)
elif isclass(val) and hasattr(val, '__name__'):
return val.__name__
elif _PY2 and isinstance(val, unicode):
# special case for python 2: if a unicode string is
# convertible to ascii, return it as an str() object instead
try:
return str(val)
except UnicodeDecodeError:
# fallthrough
pass
return str(argname)+str(idx)

def _idvalset(idx, valset, argnames, idfn):
Expand Down
16 changes: 10 additions & 6 deletions testing/python/metafunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,12 @@ def test_idmaker_autoname(self):
(object(), object())])
assert result == ["a0-1.0", "a1-b1"]
# unicode mixing, issue250
result = idmaker((py.builtin._totext("a"), "b"), [({}, '\xc3\xb4')])
assert result == ['a0-\xc3\xb4']
result = idmaker((py.builtin._totext("a"), "b"), [({}, b'\xc3\xb4')])
assert result == ['a0-\\xc3\\xb4']

def test_idmaker_native_strings(self):
from _pytest.python import idmaker
totext = py.builtin._totext
result = idmaker(("a", "b"), [(1.0, -1.1),
(2, -202),
("three", "three hundred"),
Expand All @@ -143,7 +144,9 @@ def test_idmaker_native_strings(self):
(str, int),
(list("six"), [66, 66]),
(set([7]), set("seven")),
(tuple("eight"), (8, -8, 8))
(tuple("eight"), (8, -8, 8)),
(b'\xc3\xb4', b"name"),
(b'\xc3\xb4', totext("other")),
])
assert result == ["1.0--1.1",
"2--202",
Expand All @@ -154,7 +157,10 @@ def test_idmaker_native_strings(self):
"str-int",
"a7-b7",
"a8-b8",
"a9-b9"]
"a9-b9",
"\\xc3\\xb4-name",
"\\xc3\\xb4-other",
]

def test_idmaker_enum(self):
from _pytest.python import idmaker
Expand Down Expand Up @@ -312,7 +318,6 @@ def test_simple(x):
"*uses no fixture 'y'*",
])

@pytest.mark.xfail
@pytest.mark.issue714
def test_parametrize_uses_no_fixture_error_indirect_true(self, testdir):
testdir.makepyfile("""
Expand All @@ -333,7 +338,6 @@ def test_simple(x):
"*uses no fixture 'y'*",
])

@pytest.mark.xfail
@pytest.mark.issue714
def test_parametrize_indirect_uses_no_fixture_error_indirect_list(self, testdir):
testdir.makepyfile("""
Expand Down
16 changes: 16 additions & 0 deletions testing/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,22 @@ def test_hello():
lastfailed = config.cache.get("cache/lastfailed", -1)
assert not lastfailed

def test_non_serializable_parametrize(self, testdir):
"""Test that failed parametrized tests with unmarshable parameters
don't break pytest-cache.
"""
testdir.makepyfile(r"""
import pytest

@pytest.mark.parametrize('val', [
b'\xac\x10\x02G',
])
def test_fail(val):
assert False
""")
result = testdir.runpytest()
result.stdout.fnmatch_lines('*1 failed in*')

def test_lastfailed_collectfailure(self, testdir, monkeypatch):

testdir.makepyfile(test_maybe="""
Expand Down