Skip to content

gh-129200: Add locking to the iOS testbed startup sequence. #130564

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -2149,7 +2149,7 @@ testuniversal: all
# a full Xcode install that has an iPhone SE (3rd edition) simulator available.
# This must be run *after* a `make install` has completed the build. The
# `--with-framework-name` argument *cannot* be used when configuring the build.
XCFOLDER:=iOSTestbed.$(MULTIARCH).$(shell date +%s)
XCFOLDER:=iOSTestbed.$(MULTIARCH).$(shell date +%s).$$PPID
.PHONY: testios
testios:
@if test "$(MACHDEP)" != "ios"; then \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Multiple iOS testbed runners can now be started at the same time without
introducing an ambiguity over simulator ownership.
70 changes: 63 additions & 7 deletions iOS/testbed/__main__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import argparse
import asyncio
import fcntl
import json
import os
import plistlib
import re
import shutil
import subprocess
import sys
import tempfile
from contextlib import asynccontextmanager
from datetime import datetime
from pathlib import Path
Expand Down Expand Up @@ -36,6 +39,46 @@ class MySystemExit(Exception):
pass


class SimulatorLock:
# An fcntl-based filesystem lock that can be used to ensure that
def __init__(self, timeout):
self.filename = Path(tempfile.gettempdir()) / "python-ios-testbed"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the lock need to be system-wide rather than just for a particular user? If so, the use of tempfile.gettempdir() can be problematic here. As documented, it checks first for a TMPDIR environment variable and it appears that usually on macOS TMPDIR is set to a user-specific temp directory, for example:

>>> tempfile.gettempdir()
'/var/folders/sn/0m4rnbyj2z1byjs68sz838300000gn/T'

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As best as I can make out, the lock only needs to be per user. If I start a simulator on one user's account, then use Fast User Switching to move to a second user's account, the simulator on the first user's account doesn't show on the second.

self.timeout = timeout

self.fd = None

async def acquire(self):
# Ensure the lockfile exists
self.filename.touch(exist_ok=True)

# Try `timeout` times to acquire the lock file, with a 1 second pause
# between each attempt. Report status every 10 seconds.
for i in range(0, self.timeout):
try:
fd = os.open(self.filename, os.O_RDWR | os.O_TRUNC, 0o644)
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
except OSError:
os.close(fd)
if i % 10 == 0:
print("... waiting", flush=True)
await asyncio.sleep(1)
else:
self.fd = fd
return

# If we reach the end of the loop, we've exceeded the allowed number of
# attempts.
raise ValueError("Unable to obtain lock on iOS simulator creation")

def release(self):
# If a lock is held, release it.
if self.fd is not None:
# Release the lock.
fcntl.flock(self.fd, fcntl.LOCK_UN)
os.close(self.fd)
self.fd = None


# All subprocesses are executed through this context manager so that no matter
# what happens, they can always be cancelled from another task, and they will
# always be cleaned up on exit.
Expand Down Expand Up @@ -107,23 +150,24 @@ async def list_devices():
raise


async def find_device(initial_devices):
async def find_device(initial_devices, lock):
while True:
new_devices = set(await list_devices()).difference(initial_devices)
if len(new_devices) == 0:
await asyncio.sleep(1)
elif len(new_devices) == 1:
udid = new_devices.pop()
print(f"{datetime.now():%Y-%m-%d %H:%M:%S}: New test simulator detected")
print(f"UDID: {udid}")
print(f"UDID: {udid}", flush=True)
lock.release()
return udid
else:
exit(f"Found more than one new device: {new_devices}")


async def log_stream_task(initial_devices):
async def log_stream_task(initial_devices, lock):
# Wait up to 5 minutes for the build to complete and the simulator to boot.
udid = await asyncio.wait_for(find_device(initial_devices), 5 * 60)
udid = await asyncio.wait_for(find_device(initial_devices, lock), 5 * 60)

# Stream the iOS device's logs, filtering out messages that come from the
# XCTest test suite (catching NSLog messages from the test method), or
Expand Down Expand Up @@ -171,7 +215,7 @@ async def log_stream_task(initial_devices):

async def xcode_test(location, simulator, verbose):
# Run the test suite on the named simulator
print("Starting xcodebuild...")
print("Starting xcodebuild...", flush=True)
args = [
"xcodebuild",
"test",
Expand Down Expand Up @@ -331,7 +375,17 @@ async def run_testbed(simulator: str, args: list[str], verbose: bool=False):
location = Path(__file__).parent
print("Updating plist...", end="", flush=True)
update_plist(location, args)
print(" done.")
print(" done.", flush=True)

# We need to get an exclusive lock on simulator creation, to avoid issues
# with multiple simulators starting and being unable to tell which
# simulator is due to which testbed instance. See
# https://github.com/python/cpython/issues/130294 for details. Wait up to
# 10 minutes for a simulator to boot.
print("Obtaining lock on simulator creation...", flush=True)
simulator_lock = SimulatorLock(timeout=10*60)
await simulator_lock.acquire()
print("Simulator lock acquired.", flush=True)

# Get the list of devices that are booted at the start of the test run.
# The simulator started by the test suite will be detected as the new
Expand All @@ -340,13 +394,15 @@ async def run_testbed(simulator: str, args: list[str], verbose: bool=False):

try:
async with asyncio.TaskGroup() as tg:
tg.create_task(log_stream_task(initial_devices))
tg.create_task(log_stream_task(initial_devices, simulator_lock))
tg.create_task(xcode_test(location, simulator=simulator, verbose=verbose))
except* MySystemExit as e:
raise SystemExit(*e.exceptions[0].args) from None
except* subprocess.CalledProcessError as e:
# Extract it from the ExceptionGroup so it can be handled by `main`.
raise e.exceptions[0]
finally:
simulator_lock.release()


def main():
Expand Down
Loading