Skip to content

Randomize load scope scheduler #536

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,18 @@ any guaranteed order, but you can control this with these options:
by **class** for *test methods*, then each group will be sent to an available worker,
guaranteeing that all tests in a group run in the same process. This can be useful if you have
expensive module-level or class-level fixtures. Currently the groupings can't be customized,
with grouping by class takes priority over grouping by module.
with grouping by class taking priority over grouping by module. Use loadscopeshuffled for
randomizing the groupings execution order.
This feature was added in version ``1.19``.

* ``--dist=loadfile``: tests will be grouped by file name, and then will be sent to an available
worker, guaranteeing that all tests in a group run in the same worker. This feature was added
in version ``1.21``.

* ``--dist=loadscopeshuffled``: tests will be grouped as they are by loadscope, then the groups
will be randomly selected to run on a worker. The tests within the group will be shuffled. This
can be useful if your tests use module or class fixtures but are otherwise expected to be independent.


Making session-scoped fixtures execute only once
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
1 change: 1 addition & 0 deletions changelog/229.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Extend loadscope scheduling to randomize the distribution of test groups and the order of tests within the group.
2 changes: 2 additions & 0 deletions src/xdist/dsession.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
LoadScheduling,
LoadScopeScheduling,
LoadFileScheduling,
LoadScopeShuffledScheduling
)


Expand Down Expand Up @@ -98,6 +99,7 @@ def pytest_xdist_make_scheduler(self, config, log):
"load": LoadScheduling,
"loadscope": LoadScopeScheduling,
"loadfile": LoadFileScheduling,
"loadscopeshuffled": LoadScopeShuffledScheduling,
}
return schedulers[dist](config, log)

Expand Down
4 changes: 3 additions & 1 deletion src/xdist/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def pytest_addoption(parser):
"--dist",
metavar="distmode",
action="store",
choices=["each", "load", "loadscope", "loadfile", "no"],
choices=["each", "load", "loadscope", "loadfile", "no", "loadscopeshuffled"],
dest="dist",
default="no",
help=(
Expand All @@ -87,6 +87,8 @@ def pytest_addoption(parser):
" the same scope to any available environment.\n\n"
"loadfile: load balance by sending test grouped by file"
" to any available environment.\n\n"
"loadscopeshuffled: load balance by sending pending groups of tests in"
" the same scope to any available environment in a random order.\n\n"
"(default) no: run tests inprocess, don't distribute."
),
)
Expand Down
1 change: 1 addition & 0 deletions src/xdist/scheduler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from xdist.scheduler.load import LoadScheduling # noqa
from xdist.scheduler.loadfile import LoadFileScheduling # noqa
from xdist.scheduler.loadscope import LoadScopeScheduling # noqa
from xdist.scheduler.loadscopeshuffled import LoadScopeShuffledScheduling # noqa
58 changes: 58 additions & 0 deletions src/xdist/scheduler/loadscopeshuffled.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from . import LoadScopeScheduling

from collections import OrderedDict
import random
from py.log import Producer


class LoadScopeShuffledScheduling(LoadScopeScheduling):
"""Implement load scheduling across nodes, grouping test by scope

This distributes the tests collected across all nodes so each test is run
just once. All nodes collect and submit the list of tests and when all
collections are received it is verified they are identical collections.
Then the collection gets divided up in work units, grouped by test scope,
and those work units get submitted to nodes. The work units are sampled via
random.choice and the tests within the workunit are shuffled prior to being
submitted to nodes. Whenever a node finishes an item, it calls
``.mark_test_complete()`` which will trigger the scheduler
to assign more work units if the number of pending tests for the node falls
below a low-watermark.

When created, ``numnodes`` defines how many nodes are expected to submit a
collection. This is used to know when all nodes have finished collection.

This class behaves very much like LoadScopeScheduling, but with modified work assignment
"""
def __init__(self, config, log=None):
super(LoadScopeShuffledScheduling, self).__init__(config, log)
if log is None:
self.log = Producer("loadscopeshuffledsched")
else:
self.log = log.loadscopeshuffledsched

def _assign_work_unit(self, node):
"""Assign a randomly selected and shuffled work unit to a node."""
assert self.workqueue

# Grab a random unit of work
try:
scope = random.choice(list(self.workqueue))
except IndexError:
# match LoadScopeScheduler error mode - OrderedDict().popitem()
raise KeyError('dictionary is empty')
work_unit = self.workqueue.pop(scope)

# Keep track of the assigned work
assigned_to_node = self.assigned_work.setdefault(node, default=OrderedDict())
assigned_to_node[scope] = work_unit

# Ask the node to execute the workload
worker_collection = self.registered_collections[node]
nodeids_indexes = [
worker_collection.index(nodeid)
for nodeid, completed in work_unit.items()
if not completed
]
random.shuffle(nodeids_indexes) # re-order indexes within a workload
node.send_runtest_some(nodeids_indexes)
39 changes: 30 additions & 9 deletions testing/acceptance_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1171,24 +1171,29 @@ def test_aaa1(crasher):
assert "INTERNALERROR" not in result.stderr.str()


class TestLoadScope:
def test_by_module(self, testdir):
class TestLoadScopes:
"""
Tests for LoadScope and LoadScopeShuffled
"""
@pytest.mark.parametrize("scope", ["loadscope", "loadscopeshuffled"])
def test_by_module(self, testdir, scope):
test_file = """
import pytest
@pytest.mark.parametrize('i', range(10))
def test(i):
pass
"""
testdir.makepyfile(test_a=test_file, test_b=test_file)
result = testdir.runpytest("-n2", "--dist=loadscope", "-v")
result = testdir.runpytest("-n2", "--dist=%s" % scope, "-v")
assert get_workers_and_test_count_by_prefix(
"test_a.py::test", result.outlines
) in ({"gw0": 10}, {"gw1": 10})
assert get_workers_and_test_count_by_prefix(
"test_b.py::test", result.outlines
) in ({"gw0": 10}, {"gw1": 10})

def test_by_class(self, testdir):
@pytest.mark.parametrize("scope", ["loadscope", "loadscopeshuffled"])
def test_by_class(self, testdir, scope):
testdir.makepyfile(
test_a="""
import pytest
Expand All @@ -1203,15 +1208,27 @@ def test(self, i):
pass
"""
)
result = testdir.runpytest("-n2", "--dist=loadscope", "-v")
result = testdir.runpytest("-n2", "--dist=%s" % scope, "-v")
assert get_workers_and_test_count_by_prefix(
"test_a.py::TestA", result.outlines
) in ({"gw0": 10}, {"gw1": 10})
assert get_workers_and_test_count_by_prefix(
"test_a.py::TestB", result.outlines
) in ({"gw0": 10}, {"gw1": 10})

def test_module_single_start(self, testdir):
@pytest.mark.parametrize(
"scope",
[
pytest.param("loadscope"),
pytest.param(
"loadscopeshuffled",
marks=pytest.mark.xfail(
reason="Flaky due to work distribution randomization i.e. test a, b are not always sent first and therefore do not always end up on gw0 and gw1"
)
)
]
)
def test_module_single_start(self, testdir, scope):
"""Fix test suite never finishing in case all workers start with a single test (#277)."""
test_file1 = """
import pytest
Expand All @@ -1226,7 +1243,7 @@ def test_2():
pass
"""
testdir.makepyfile(test_a=test_file1, test_b=test_file1, test_c=test_file2)
result = testdir.runpytest("-n2", "--dist=loadscope", "-v")
result = testdir.runpytest("-n2", "--dist=%s" % scope, "-v")
a = get_workers_and_test_count_by_prefix("test_a.py::test", result.outlines)
b = get_workers_and_test_count_by_prefix("test_b.py::test", result.outlines)
c1 = get_workers_and_test_count_by_prefix("test_c.py::test_1", result.outlines)
Expand Down Expand Up @@ -1359,13 +1376,17 @@ def test_c(self):
(_test_content * 4) % ("A", "B", "C", "D")
)

@pytest.mark.parametrize("scope", ["each", "load", "loadscope", "loadfile", "no"])
@pytest.mark.parametrize(
"scope", ["each", "load", "loadscope", "loadfile", "no", "loadscopeshuffled"]
)
def test_single_file(self, testdir, scope):
testdir.makepyfile(test_a=self.test_file1)
result = testdir.runpytest("-n2", "--dist=%s" % scope, "-v")
result.assert_outcomes(passed=(12 if scope != "each" else 12 * 2))

@pytest.mark.parametrize("scope", ["each", "load", "loadscope", "loadfile", "no"])
@pytest.mark.parametrize(
"scope", ["each", "load", "loadscope", "loadfile", "no", "loadscopeshuffled"]
)
def test_multi_file(self, testdir, scope):
testdir.makepyfile(
test_a=self.test_file1,
Expand Down