From 3292ec382ee5bc9962167f45152b6fe29df90417 Mon Sep 17 00:00:00 2001 From: swiant Date: Tue, 26 May 2020 15:57:17 -0600 Subject: [PATCH 1/7] create scheduler to randomize work distribution to nodes --- src/xdist/scheduler/__init__.py | 1 + src/xdist/scheduler/loadscopeshuffled.py | 58 ++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 src/xdist/scheduler/loadscopeshuffled.py diff --git a/src/xdist/scheduler/__init__.py b/src/xdist/scheduler/__init__.py index 06ba6b7b..203b2b6d 100644 --- a/src/xdist/scheduler/__init__.py +++ b/src/xdist/scheduler/__init__.py @@ -2,3 +2,4 @@ from xdist.scheduler.load import LoadScheduling # noqa from xdist.scheduler.loadfile import LoadFileScheduling # noqa from xdist.scheduler.loadscope import LoadScopeScheduling # noqa +from xdist.scheduler.loadscopeshuffled import LoadScopeShuffledScheduling # noqa diff --git a/src/xdist/scheduler/loadscopeshuffled.py b/src/xdist/scheduler/loadscopeshuffled.py new file mode 100644 index 00000000..19c2e873 --- /dev/null +++ b/src/xdist/scheduler/loadscopeshuffled.py @@ -0,0 +1,58 @@ +from . import LoadScopeScheduling + +from collections import OrderedDict +import random +from py.log import Producer + + +class LoadScopeShuffledScheduling(LoadScopeScheduling): + """Implement load scheduling across nodes, grouping test by scope + + This distributes the tests collected across all nodes so each test is run + just once. All nodes collect and submit the list of tests and when all + collections are received it is verified they are identical collections. + Then the collection gets divided up in work units, grouped by test scope, + and those work units get submitted to nodes. The work units are sampled via + random.choice and the tests within the workunit are shuffled prior to being + submitted to nodes. Whenever a node finishes an item, it calls + ``.mark_test_complete()`` which will trigger the scheduler + to assign more work units if the number of pending tests for the node falls + below a low-watermark. + + When created, ``numnodes`` defines how many nodes are expected to submit a + collection. This is used to know when all nodes have finished collection. + + This class behaves very much like LoadScopeScheduling, but with modified work assignment + """ + def __init__(self, config, log=None): + super(LoadScopeShuffledScheduling, self).__init__(config, log) + if log is None: + self.log = Producer("loadscopeshuffledsched") + else: + self.log = log.loadscopeshuffledsched + + def _assign_work_unit(self, node): + """Assign a randomly selected and shuffled work unit to a node.""" + assert self.workqueue + + # Grab a random unit of work + try: + scope = random.choice(list(self.workqueue)) + except IndexError: + # match LoadScopeScheduler error mode - OrderedDict().popitem() + raise KeyError('dictionary is empty') + work_unit = self.workqueue.pop(scope) + + # Keep track of the assigned work + assigned_to_node = self.assigned_work.setdefault(node, default=OrderedDict()) + assigned_to_node[scope] = work_unit + + # Ask the node to execute the workload + worker_collection = self.registered_collections[node] + nodeids_indexes = [ + worker_collection.index(nodeid) + for nodeid, completed in work_unit.items() + if not completed + ] + random.shuffle(nodeids_indexes) # re-order indexes within a workload + node.send_runtest_some(nodeids_indexes) From 6e83f030a06801284b1a0238ae32a8dfeeece33f Mon Sep 17 00:00:00 2001 From: swiant Date: Tue, 26 May 2020 15:57:40 -0600 Subject: [PATCH 2/7] integrate into pytest plugin --- src/xdist/dsession.py | 2 ++ src/xdist/plugin.py | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/xdist/dsession.py b/src/xdist/dsession.py index 7ac5f577..00edc9a9 100644 --- a/src/xdist/dsession.py +++ b/src/xdist/dsession.py @@ -7,6 +7,7 @@ LoadScheduling, LoadScopeScheduling, LoadFileScheduling, + LoadScopeShuffledScheduling ) @@ -98,6 +99,7 @@ def pytest_xdist_make_scheduler(self, config, log): "load": LoadScheduling, "loadscope": LoadScopeScheduling, "loadfile": LoadFileScheduling, + "loadscopeshuffled": LoadScopeShuffledScheduling, } return schedulers[dist](config, log) diff --git a/src/xdist/plugin.py b/src/xdist/plugin.py index 51651263..6a0b979c 100644 --- a/src/xdist/plugin.py +++ b/src/xdist/plugin.py @@ -75,7 +75,7 @@ def pytest_addoption(parser): "--dist", metavar="distmode", action="store", - choices=["each", "load", "loadscope", "loadfile", "no"], + choices=["each", "load", "loadscope", "loadfile", "no", "loadscopeshuffled"], dest="dist", default="no", help=( @@ -87,6 +87,8 @@ def pytest_addoption(parser): " the same scope to any available environment.\n\n" "loadfile: load balance by sending test grouped by file" " to any available environment.\n\n" + "loadscopeshuffled: load balance by sending pending groups of tests in" + " the same scope to any available environment in a random order.\n\n" "(default) no: run tests inprocess, don't distribute." ), ) From 9cc3542feae10504bf9f0886ae448cfe3c0d0e94 Mon Sep 17 00:00:00 2001 From: swiant Date: Tue, 26 May 2020 16:04:19 -0600 Subject: [PATCH 3/7] LoadScopeShuffled Tests --- testing/acceptance_test.py | 74 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/testing/acceptance_test.py b/testing/acceptance_test.py index b6a4a949..09858f79 100644 --- a/testing/acceptance_test.py +++ b/testing/acceptance_test.py @@ -1359,13 +1359,17 @@ def test_c(self): (_test_content * 4) % ("A", "B", "C", "D") ) - @pytest.mark.parametrize("scope", ["each", "load", "loadscope", "loadfile", "no"]) + @pytest.mark.parametrize( + "scope", ["each", "load", "loadscope", "loadfile", "no", "loadscopeshuffled"] + ) def test_single_file(self, testdir, scope): testdir.makepyfile(test_a=self.test_file1) result = testdir.runpytest("-n2", "--dist=%s" % scope, "-v") result.assert_outcomes(passed=(12 if scope != "each" else 12 * 2)) - @pytest.mark.parametrize("scope", ["each", "load", "loadscope", "loadfile", "no"]) + @pytest.mark.parametrize( + "scope", ["each", "load", "loadscope", "loadfile", "no", "loadscopeshuffled"] + ) def test_multi_file(self, testdir, scope): testdir.makepyfile( test_a=self.test_file1, @@ -1404,3 +1408,69 @@ def get_workers_and_test_count_by_prefix(prefix, lines, expected_status="PASSED" if expected_status == status and nodeid.startswith(prefix): result[worker] = result.get(worker, 0) + 1 return result + + +class TestLoadScopeShuffled: + def test_by_module(self, testdir): + test_file = """ + import pytest + @pytest.mark.parametrize('i', range(10)) + def test(i): + pass + """ + testdir.makepyfile(test_a=test_file, test_b=test_file) + result = testdir.runpytest("-n2", "--dist=loadscopeshuffled", "-v") + assert get_workers_and_test_count_by_prefix( + "test_a.py::test", result.outlines + ) in ({"gw0": 10}, {"gw1": 10}) + assert get_workers_and_test_count_by_prefix( + "test_b.py::test", result.outlines + ) in ({"gw0": 10}, {"gw1": 10}) + + def test_by_class(self, testdir): + testdir.makepyfile( + test_a=""" + import pytest + class TestA: + @pytest.mark.parametrize('i', range(10)) + def test(self, i): + pass + + class TestB: + @pytest.mark.parametrize('i', range(10)) + def test(self, i): + pass + """ + ) + result = testdir.runpytest("-n2", "--dist=loadscopeshuffled", "-v") + assert get_workers_and_test_count_by_prefix( + "test_a.py::TestA", result.outlines + ) in ({"gw0": 10}, {"gw1": 10}) + assert get_workers_and_test_count_by_prefix( + "test_a.py::TestB", result.outlines + ) in ({"gw0": 10}, {"gw1": 10}) + + def test_module_single_start(self, testdir): + """Fix test suite never finishing in case all workers start with a single test (#277).""" + test_file1 = """ + import pytest + def test(): + pass + """ + test_file2 = """ + import pytest + def test_1(): + pass + def test_2(): + pass + """ + testdir.makepyfile(test_a=test_file1, test_b=test_file1, test_c=test_file2) + result = testdir.runpytest("-n2", "--dist=loadscopeshuffled", "-v") + a = get_workers_and_test_count_by_prefix("test_a.py::test", result.outlines) + b = get_workers_and_test_count_by_prefix("test_b.py::test", result.outlines) + c1 = get_workers_and_test_count_by_prefix("test_c.py::test_1", result.outlines) + c2 = get_workers_and_test_count_by_prefix("test_c.py::test_2", result.outlines) + assert a in ({"gw0": 1}, {"gw1": 1}) + assert b in ({"gw0": 1}, {"gw1": 1}) + assert a.items() != b.items() + assert c1 == c2 From 5a52b215b47924da542f3bd47054b00de2f8447f Mon Sep 17 00:00:00 2001 From: swiant Date: Tue, 26 May 2020 16:16:47 -0600 Subject: [PATCH 4/7] Refactor LoadScopeShuffled to parameterized LoadScopeTests --- testing/acceptance_test.py | 86 ++++++-------------------------------- 1 file changed, 13 insertions(+), 73 deletions(-) diff --git a/testing/acceptance_test.py b/testing/acceptance_test.py index 09858f79..5d49ca31 100644 --- a/testing/acceptance_test.py +++ b/testing/acceptance_test.py @@ -1171,8 +1171,12 @@ def test_aaa1(crasher): assert "INTERNALERROR" not in result.stderr.str() -class TestLoadScope: - def test_by_module(self, testdir): +class TestLoadScopes: + """ + Tests for LoadScope and LoadScopeShuffled + """ + @pytest.mark.parametrize("scope", ["loadscope", "loadscopeshuffled"]) + def test_by_module(self, testdir, scope): test_file = """ import pytest @pytest.mark.parametrize('i', range(10)) @@ -1180,7 +1184,7 @@ def test(i): pass """ testdir.makepyfile(test_a=test_file, test_b=test_file) - result = testdir.runpytest("-n2", "--dist=loadscope", "-v") + result = testdir.runpytest("-n2", "--dist=%s" % scope, "-v") assert get_workers_and_test_count_by_prefix( "test_a.py::test", result.outlines ) in ({"gw0": 10}, {"gw1": 10}) @@ -1188,7 +1192,8 @@ def test(i): "test_b.py::test", result.outlines ) in ({"gw0": 10}, {"gw1": 10}) - def test_by_class(self, testdir): + @pytest.mark.parametrize("scope", ["loadscope", "loadscopeshuffled"]) + def test_by_class(self, testdir, scope): testdir.makepyfile( test_a=""" import pytest @@ -1203,7 +1208,7 @@ def test(self, i): pass """ ) - result = testdir.runpytest("-n2", "--dist=loadscope", "-v") + result = testdir.runpytest("-n2", "--dist=%s" % scope, "-v") assert get_workers_and_test_count_by_prefix( "test_a.py::TestA", result.outlines ) in ({"gw0": 10}, {"gw1": 10}) @@ -1211,7 +1216,8 @@ def test(self, i): "test_a.py::TestB", result.outlines ) in ({"gw0": 10}, {"gw1": 10}) - def test_module_single_start(self, testdir): + @pytest.mark.parametrize("scope", ["loadscope", "loadscopeshuffled"]) + def test_module_single_start(self, testdir, scope): """Fix test suite never finishing in case all workers start with a single test (#277).""" test_file1 = """ import pytest @@ -1226,7 +1232,7 @@ def test_2(): pass """ testdir.makepyfile(test_a=test_file1, test_b=test_file1, test_c=test_file2) - result = testdir.runpytest("-n2", "--dist=loadscope", "-v") + result = testdir.runpytest("-n2", "--dist=%s" % scope, "-v") a = get_workers_and_test_count_by_prefix("test_a.py::test", result.outlines) b = get_workers_and_test_count_by_prefix("test_b.py::test", result.outlines) c1 = get_workers_and_test_count_by_prefix("test_c.py::test_1", result.outlines) @@ -1408,69 +1414,3 @@ def get_workers_and_test_count_by_prefix(prefix, lines, expected_status="PASSED" if expected_status == status and nodeid.startswith(prefix): result[worker] = result.get(worker, 0) + 1 return result - - -class TestLoadScopeShuffled: - def test_by_module(self, testdir): - test_file = """ - import pytest - @pytest.mark.parametrize('i', range(10)) - def test(i): - pass - """ - testdir.makepyfile(test_a=test_file, test_b=test_file) - result = testdir.runpytest("-n2", "--dist=loadscopeshuffled", "-v") - assert get_workers_and_test_count_by_prefix( - "test_a.py::test", result.outlines - ) in ({"gw0": 10}, {"gw1": 10}) - assert get_workers_and_test_count_by_prefix( - "test_b.py::test", result.outlines - ) in ({"gw0": 10}, {"gw1": 10}) - - def test_by_class(self, testdir): - testdir.makepyfile( - test_a=""" - import pytest - class TestA: - @pytest.mark.parametrize('i', range(10)) - def test(self, i): - pass - - class TestB: - @pytest.mark.parametrize('i', range(10)) - def test(self, i): - pass - """ - ) - result = testdir.runpytest("-n2", "--dist=loadscopeshuffled", "-v") - assert get_workers_and_test_count_by_prefix( - "test_a.py::TestA", result.outlines - ) in ({"gw0": 10}, {"gw1": 10}) - assert get_workers_and_test_count_by_prefix( - "test_a.py::TestB", result.outlines - ) in ({"gw0": 10}, {"gw1": 10}) - - def test_module_single_start(self, testdir): - """Fix test suite never finishing in case all workers start with a single test (#277).""" - test_file1 = """ - import pytest - def test(): - pass - """ - test_file2 = """ - import pytest - def test_1(): - pass - def test_2(): - pass - """ - testdir.makepyfile(test_a=test_file1, test_b=test_file1, test_c=test_file2) - result = testdir.runpytest("-n2", "--dist=loadscopeshuffled", "-v") - a = get_workers_and_test_count_by_prefix("test_a.py::test", result.outlines) - b = get_workers_and_test_count_by_prefix("test_b.py::test", result.outlines) - c1 = get_workers_and_test_count_by_prefix("test_c.py::test_1", result.outlines) - c2 = get_workers_and_test_count_by_prefix("test_c.py::test_2", result.outlines) - assert a in ({"gw0": 1}, {"gw1": 1}) - assert b in ({"gw0": 1}, {"gw1": 1}) - assert a.items() != b.items() - assert c1 == c2 From bd0dfb4d791f4fa8949870c4a884badfe82f400d Mon Sep 17 00:00:00 2001 From: swiant Date: Tue, 26 May 2020 16:22:27 -0600 Subject: [PATCH 5/7] towncrier news fragment --- changelog/229.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/229.feature diff --git a/changelog/229.feature b/changelog/229.feature new file mode 100644 index 00000000..0efe2928 --- /dev/null +++ b/changelog/229.feature @@ -0,0 +1 @@ +Extend loadscope scheduling to randomize the distribution of test groups and the order of tests within the group. \ No newline at end of file From bda83889d0813ca8b27c021aefea467dc23d9c29 Mon Sep 17 00:00:00 2001 From: swiant Date: Tue, 26 May 2020 16:28:16 -0600 Subject: [PATCH 6/7] Add reference to ReadMe for loadscopeshuffled dist option --- README.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 4c66d8b5..a7942879 100644 --- a/README.rst +++ b/README.rst @@ -87,13 +87,18 @@ any guaranteed order, but you can control this with these options: by **class** for *test methods*, then each group will be sent to an available worker, guaranteeing that all tests in a group run in the same process. This can be useful if you have expensive module-level or class-level fixtures. Currently the groupings can't be customized, - with grouping by class takes priority over grouping by module. + with grouping by class taking priority over grouping by module. Use loadscopeshuffled for + randomizing the groupings execution order. This feature was added in version ``1.19``. * ``--dist=loadfile``: tests will be grouped by file name, and then will be sent to an available worker, guaranteeing that all tests in a group run in the same worker. This feature was added in version ``1.21``. +* ``--dist=loadscopeshuffled``: tests will be grouped as they are by loadscope, then the groups + will be randomly selected to run on a worker. The tests within the group will be shuffled. This + can be useful if your tests use module or class fixtures but are otherwise expected to be independent. + Making session-scoped fixtures execute only once ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From c3a3ca721d6a415999021332d98b8775e95dd162 Mon Sep 17 00:00:00 2001 From: swiant Date: Wed, 27 May 2020 08:15:20 -0600 Subject: [PATCH 7/7] Xfail flaky test --- testing/acceptance_test.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/testing/acceptance_test.py b/testing/acceptance_test.py index 5d49ca31..274f49c8 100644 --- a/testing/acceptance_test.py +++ b/testing/acceptance_test.py @@ -1216,7 +1216,18 @@ def test(self, i): "test_a.py::TestB", result.outlines ) in ({"gw0": 10}, {"gw1": 10}) - @pytest.mark.parametrize("scope", ["loadscope", "loadscopeshuffled"]) + @pytest.mark.parametrize( + "scope", + [ + pytest.param("loadscope"), + pytest.param( + "loadscopeshuffled", + marks=pytest.mark.xfail( + reason="Flaky due to work distribution randomization i.e. test a, b are not always sent first and therefore do not always end up on gw0 and gw1" + ) + ) + ] + ) def test_module_single_start(self, testdir, scope): """Fix test suite never finishing in case all workers start with a single test (#277).""" test_file1 = """