From fa35f650b53821baa85c78caf116fd9e4522c408 Mon Sep 17 00:00:00 2001 From: Daniel Hahler Date: Tue, 6 Nov 2018 18:47:19 +0100 Subject: [PATCH 1/7] Fix handling of duplicate args with regard to Python packages Fixes https://github.com/pytest-dev/pytest/issues/4310. --- changelog/4310.bugfix.rst | 1 + src/_pytest/main.py | 29 +++++++++++++++-------------- src/_pytest/python.py | 15 ++++++++++++++- testing/test_collection.py | 32 ++++++++++++++++++++++++-------- 4 files changed, 54 insertions(+), 23 deletions(-) create mode 100644 changelog/4310.bugfix.rst diff --git a/changelog/4310.bugfix.rst b/changelog/4310.bugfix.rst new file mode 100644 index 00000000000..24e177c2ea1 --- /dev/null +++ b/changelog/4310.bugfix.rst @@ -0,0 +1 @@ +Fix duplicate collection due to multiple args matching the same packages. diff --git a/src/_pytest/main.py b/src/_pytest/main.py index 3c908ec4c30..46228f82467 100644 --- a/src/_pytest/main.py +++ b/src/_pytest/main.py @@ -387,6 +387,7 @@ def __init__(self, config): self._initialpaths = frozenset() # Keep track of any collected nodes in here, so we don't duplicate fixtures self._node_cache = {} + self._collect_seen_pkgdirs = set() self.config.pluginmanager.register(self, name="session") @@ -496,18 +497,19 @@ def _collect(self, arg): # and stack all Packages found on the way. # No point in finding packages when collecting doctests if not self.config.option.doctestmodules: + pm = self.config.pluginmanager for parent in argpath.parts(): - pm = self.config.pluginmanager if pm._confcutdir and pm._confcutdir.relto(parent): continue if parent.isdir(): pkginit = parent.join("__init__.py") if pkginit.isfile(): + self._collect_seen_pkgdirs.add(parent) if pkginit in self._node_cache: root = self._node_cache[pkginit][0] else: - col = root._collectfile(pkginit) + col = root._collectfile(pkginit, handle_dupes=False) if col: if isinstance(col[0], Package): root = col[0] @@ -529,13 +531,12 @@ def filter_(f): def filter_(f): return f.check(file=1) - seen_dirs = set() for path in argpath.visit( fil=filter_, rec=self._recurse, bf=True, sort=True ): dirpath = path.dirpath() - if dirpath not in seen_dirs: - seen_dirs.add(dirpath) + if dirpath not in self._collect_seen_pkgdirs: + self._collect_seen_pkgdirs.add(dirpath) pkginit = dirpath.join("__init__.py") if pkginit.exists() and parts(pkginit.strpath).isdisjoint(paths): for x in root._collectfile(pkginit): @@ -570,20 +571,20 @@ def filter_(f): for y in m: yield y - def _collectfile(self, path): + def _collectfile(self, path, handle_dupes=True): ihook = self.gethookproxy(path) if not self.isinitpath(path): if ihook.pytest_ignore_collect(path=path, config=self.config): return () - # Skip duplicate paths. - keepduplicates = self.config.getoption("keepduplicates") - if not keepduplicates: - duplicate_paths = self.config.pluginmanager._duplicatepaths - if path in duplicate_paths: - return () - else: - duplicate_paths.add(path) + if handle_dupes: + keepduplicates = self.config.getoption("keepduplicates") + if not keepduplicates: + duplicate_paths = self.config.pluginmanager._duplicatepaths + if path in duplicate_paths: + return () + else: + duplicate_paths.add(path) return ihook.pytest_collect_file(path=path, parent=self) diff --git a/src/_pytest/python.py b/src/_pytest/python.py index 03a9fe03105..d360e2c8f3e 100644 --- a/src/_pytest/python.py +++ b/src/_pytest/python.py @@ -545,11 +545,24 @@ def gethookproxy(self, fspath): proxy = self.config.hook return proxy - def _collectfile(self, path): + def _collectfile(self, path, handle_dupes=True): ihook = self.gethookproxy(path) if not self.isinitpath(path): if ihook.pytest_ignore_collect(path=path, config=self.config): return () + + if handle_dupes: + keepduplicates = self.config.getoption("keepduplicates") + if not keepduplicates: + duplicate_paths = self.config.pluginmanager._duplicatepaths + if path in duplicate_paths: + return () + else: + duplicate_paths.add(path) + + if self.fspath == path: # __init__.py + return [self] + return ihook.pytest_collect_file(path=path, parent=self) def isinitpath(self, path): diff --git a/testing/test_collection.py b/testing/test_collection.py index 3860cf9f906..dd8ecb1af3e 100644 --- a/testing/test_collection.py +++ b/testing/test_collection.py @@ -951,19 +951,35 @@ def test_collect_init_tests(testdir): result = testdir.runpytest(p, "--collect-only") result.stdout.fnmatch_lines( [ - "*", - "*", - "*", - "*", + "collected 2 items", + "", + " ", + " ", + " ", ] ) result = testdir.runpytest("./tests", "--collect-only") result.stdout.fnmatch_lines( [ - "*", - "*", - "*", - "*", + "collected 2 items", + "", + " ", + " ", + " ", + ] + ) + # Ignores duplicates with "." and pkginit (#4310). + result = testdir.runpytest("./tests", ".", "--collect-only") + result.stdout.fnmatch_lines( + [ + "collected 2 items", + "", + " ", + " ", + " ", ] ) result = testdir.runpytest("./tests/test_foo.py", "--collect-only") From 134b103605ffbe50ea5175cfeecb8d033458fbd0 Mon Sep 17 00:00:00 2001 From: Daniel Hahler Date: Wed, 7 Nov 2018 11:01:39 +0100 Subject: [PATCH 2/7] XXX: revert _collect_seen_pkgdirs --- src/_pytest/main.py | 7 +++---- testing/test_collection.py | 13 +++++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/_pytest/main.py b/src/_pytest/main.py index 46228f82467..a2b27d9fa73 100644 --- a/src/_pytest/main.py +++ b/src/_pytest/main.py @@ -387,7 +387,6 @@ def __init__(self, config): self._initialpaths = frozenset() # Keep track of any collected nodes in here, so we don't duplicate fixtures self._node_cache = {} - self._collect_seen_pkgdirs = set() self.config.pluginmanager.register(self, name="session") @@ -505,7 +504,6 @@ def _collect(self, arg): if parent.isdir(): pkginit = parent.join("__init__.py") if pkginit.isfile(): - self._collect_seen_pkgdirs.add(parent) if pkginit in self._node_cache: root = self._node_cache[pkginit][0] else: @@ -531,12 +529,13 @@ def filter_(f): def filter_(f): return f.check(file=1) + seen_dirs = set() for path in argpath.visit( fil=filter_, rec=self._recurse, bf=True, sort=True ): dirpath = path.dirpath() - if dirpath not in self._collect_seen_pkgdirs: - self._collect_seen_pkgdirs.add(dirpath) + if dirpath not in seen_dirs: + seen_dirs.add(dirpath) pkginit = dirpath.join("__init__.py") if pkginit.exists() and parts(pkginit.strpath).isdisjoint(paths): for x in root._collectfile(pkginit): diff --git a/testing/test_collection.py b/testing/test_collection.py index dd8ecb1af3e..c7cde090e35 100644 --- a/testing/test_collection.py +++ b/testing/test_collection.py @@ -978,6 +978,19 @@ def test_collect_init_tests(testdir): "", " ", + "", + " ", + ] + ) + # XXX: Same as before, but different order. + result = testdir.runpytest(".", "tests", "--collect-only") + result.stdout.fnmatch_lines( + [ + "collected 2 items", + "", + " ", + "", " ", ] From f840521854d15288b57315c56b620001dd534d12 Mon Sep 17 00:00:00 2001 From: Daniel Hahler Date: Wed, 7 Nov 2018 19:29:55 +0100 Subject: [PATCH 3/7] harden test_collect_init_tests --- testing/test_collection.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/testing/test_collection.py b/testing/test_collection.py index c7cde090e35..18033b9c006 100644 --- a/testing/test_collection.py +++ b/testing/test_collection.py @@ -975,31 +975,34 @@ def test_collect_init_tests(testdir): result.stdout.fnmatch_lines( [ "collected 2 items", - "", " ", " ", - "", - " ", + " ", + " ", ] ) - # XXX: Same as before, but different order. + # Same as before, but different order. result = testdir.runpytest(".", "tests", "--collect-only") result.stdout.fnmatch_lines( [ "collected 2 items", - "", " ", " ", - "", " ", ] ) result = testdir.runpytest("./tests/test_foo.py", "--collect-only") - result.stdout.fnmatch_lines(["*", "*"]) + result.stdout.fnmatch_lines( + ["", " ", " "] + ) assert "test_init" not in result.stdout.str() result = testdir.runpytest("./tests/__init__.py", "--collect-only") - result.stdout.fnmatch_lines(["*", "*"]) + result.stdout.fnmatch_lines( + ["", " ", " "] + ) assert "test_foo" not in result.stdout.str() From f8b944dee0b59bdc483ce505738ea1cb3a57a5b4 Mon Sep 17 00:00:00 2001 From: Daniel Hahler Date: Wed, 7 Nov 2018 19:33:22 +0100 Subject: [PATCH 4/7] pkg_roots --- src/_pytest/main.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/_pytest/main.py b/src/_pytest/main.py index a2b27d9fa73..59e2b6d4dac 100644 --- a/src/_pytest/main.py +++ b/src/_pytest/main.py @@ -490,6 +490,7 @@ def _collect(self, arg): names = self._parsearg(arg) argpath = names.pop(0).realpath() paths = set() + pkg_roots = {} root = self # Start with a Session root, and delve to argpath item (dir or file) @@ -510,9 +511,9 @@ def _collect(self, arg): col = root._collectfile(pkginit, handle_dupes=False) if col: if isinstance(col[0], Package): - root = col[0] + pkg_roots[parent] = col[0] # always store a list in the cache, matchnodes expects it - self._node_cache[root.fspath] = [root] + self._node_cache[col[0].fspath] = [col[0]] # If it's a directory argument, recurse and look for any Subpackages. # Let the Package collector deal with subnodes, don't collect here. @@ -534,16 +535,19 @@ def filter_(f): fil=filter_, rec=self._recurse, bf=True, sort=True ): dirpath = path.dirpath() + collect_root = pkg_roots.get(dirpath, root) if dirpath not in seen_dirs: seen_dirs.add(dirpath) pkginit = dirpath.join("__init__.py") if pkginit.exists() and parts(pkginit.strpath).isdisjoint(paths): - for x in root._collectfile(pkginit): + for x in collect_root._collectfile(pkginit): yield x + if isinstance(x, Package): + pkg_roots[dirpath] = x paths.add(x.fspath.dirpath()) - if parts(path.strpath).isdisjoint(paths): - for x in root._collectfile(path): + if True or parts(path.strpath).isdisjoint(paths): + for x in collect_root._collectfile(path): key = (type(x), x.fspath) if key in self._node_cache: yield self._node_cache[key] @@ -556,7 +560,8 @@ def filter_(f): if argpath in self._node_cache: col = self._node_cache[argpath] else: - col = root._collectfile(argpath) + collect_root = pkg_roots.get(argpath.dirname, root) + col = collect_root._collectfile(argpath) if col: self._node_cache[argpath] = col m = self.matchnodes(col, names) From bbb9d72c1315e73d9acc6a9b8a9488479db76f0b Mon Sep 17 00:00:00 2001 From: Daniel Hahler Date: Wed, 7 Nov 2018 19:36:19 +0100 Subject: [PATCH 5/7] remove paths/parts --- src/_pytest/main.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/_pytest/main.py b/src/_pytest/main.py index 59e2b6d4dac..50bd109794e 100644 --- a/src/_pytest/main.py +++ b/src/_pytest/main.py @@ -18,7 +18,6 @@ from _pytest.config import hookimpl from _pytest.config import UsageError from _pytest.outcomes import exit -from _pytest.pathlib import parts from _pytest.runner import collect_one_node @@ -489,7 +488,6 @@ def _collect(self, arg): names = self._parsearg(arg) argpath = names.pop(0).realpath() - paths = set() pkg_roots = {} root = self @@ -539,21 +537,19 @@ def filter_(f): if dirpath not in seen_dirs: seen_dirs.add(dirpath) pkginit = dirpath.join("__init__.py") - if pkginit.exists() and parts(pkginit.strpath).isdisjoint(paths): + if pkginit.exists(): for x in collect_root._collectfile(pkginit): yield x if isinstance(x, Package): pkg_roots[dirpath] = x - paths.add(x.fspath.dirpath()) - if True or parts(path.strpath).isdisjoint(paths): - for x in collect_root._collectfile(path): - key = (type(x), x.fspath) - if key in self._node_cache: - yield self._node_cache[key] - else: - self._node_cache[key] = x - yield x + for x in collect_root._collectfile(path): + key = (type(x), x.fspath) + if key in self._node_cache: + yield self._node_cache[key] + else: + self._node_cache[key] = x + yield x else: assert argpath.check(file=1) From 6fce1f0ac7a9a569429efa740f5cdd04805b4a6c Mon Sep 17 00:00:00 2001 From: Daniel Hahler Date: Wed, 7 Nov 2018 20:06:35 +0100 Subject: [PATCH 6/7] pkg_roots per session --- src/_pytest/main.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/_pytest/main.py b/src/_pytest/main.py index 50bd109794e..5d5d02a239b 100644 --- a/src/_pytest/main.py +++ b/src/_pytest/main.py @@ -386,6 +386,7 @@ def __init__(self, config): self._initialpaths = frozenset() # Keep track of any collected nodes in here, so we don't duplicate fixtures self._node_cache = {} + self._pkg_roots = {} self.config.pluginmanager.register(self, name="session") @@ -488,7 +489,6 @@ def _collect(self, arg): names = self._parsearg(arg) argpath = names.pop(0).realpath() - pkg_roots = {} root = self # Start with a Session root, and delve to argpath item (dir or file) @@ -509,7 +509,7 @@ def _collect(self, arg): col = root._collectfile(pkginit, handle_dupes=False) if col: if isinstance(col[0], Package): - pkg_roots[parent] = col[0] + self._pkg_roots[parent] = col[0] # always store a list in the cache, matchnodes expects it self._node_cache[col[0].fspath] = [col[0]] @@ -533,15 +533,25 @@ def filter_(f): fil=filter_, rec=self._recurse, bf=True, sort=True ): dirpath = path.dirpath() - collect_root = pkg_roots.get(dirpath, root) + collect_root = self._pkg_roots.get(dirpath, root) if dirpath not in seen_dirs: + # Collect packages first. seen_dirs.add(dirpath) pkginit = dirpath.join("__init__.py") if pkginit.exists(): + got_pkg = False for x in collect_root._collectfile(pkginit): yield x if isinstance(x, Package): - pkg_roots[dirpath] = x + self._pkg_roots[dirpath] = x + got_pkg = True + if got_pkg: + continue + if path.basename == "__init__.py": + continue + + if dirpath in self._pkg_roots: + continue for x in collect_root._collectfile(path): key = (type(x), x.fspath) @@ -556,7 +566,7 @@ def filter_(f): if argpath in self._node_cache: col = self._node_cache[argpath] else: - collect_root = pkg_roots.get(argpath.dirname, root) + collect_root = self._pkg_roots.get(argpath.dirname, root) col = collect_root._collectfile(argpath) if col: self._node_cache[argpath] = col From 827573c0494f58e7a2454633fe3d1584a8416325 Mon Sep 17 00:00:00 2001 From: Daniel Hahler Date: Wed, 7 Nov 2018 20:14:07 +0100 Subject: [PATCH 7/7] cleanup, TODO: use _node_cache --- src/_pytest/main.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/src/_pytest/main.py b/src/_pytest/main.py index 5d5d02a239b..1de5f656fd0 100644 --- a/src/_pytest/main.py +++ b/src/_pytest/main.py @@ -490,7 +490,6 @@ def _collect(self, arg): names = self._parsearg(arg) argpath = names.pop(0).realpath() - root = self # Start with a Session root, and delve to argpath item (dir or file) # and stack all Packages found on the way. # No point in finding packages when collecting doctests @@ -503,10 +502,8 @@ def _collect(self, arg): if parent.isdir(): pkginit = parent.join("__init__.py") if pkginit.isfile(): - if pkginit in self._node_cache: - root = self._node_cache[pkginit][0] - else: - col = root._collectfile(pkginit, handle_dupes=False) + if pkginit not in self._node_cache: + col = self._collectfile(pkginit, handle_dupes=False) if col: if isinstance(col[0], Package): self._pkg_roots[parent] = col[0] @@ -533,27 +530,21 @@ def filter_(f): fil=filter_, rec=self._recurse, bf=True, sort=True ): dirpath = path.dirpath() - collect_root = self._pkg_roots.get(dirpath, root) if dirpath not in seen_dirs: # Collect packages first. seen_dirs.add(dirpath) pkginit = dirpath.join("__init__.py") if pkginit.exists(): - got_pkg = False + collect_root = self._pkg_roots.get(dirpath, self) for x in collect_root._collectfile(pkginit): yield x if isinstance(x, Package): self._pkg_roots[dirpath] = x - got_pkg = True - if got_pkg: - continue - if path.basename == "__init__.py": - continue - if dirpath in self._pkg_roots: + # Do not collect packages here. continue - for x in collect_root._collectfile(path): + for x in self._collectfile(path): key = (type(x), x.fspath) if key in self._node_cache: yield self._node_cache[key] @@ -566,7 +557,7 @@ def filter_(f): if argpath in self._node_cache: col = self._node_cache[argpath] else: - collect_root = self._pkg_roots.get(argpath.dirname, root) + collect_root = self._pkg_roots.get(argpath.dirname, self) col = collect_root._collectfile(argpath) if col: self._node_cache[argpath] = col