Skip to content

Commit 3cdbf33

Browse files
committed
fix(gateway): don't dead-end setup wizard when only system-scope unit is installed
The setup wizard dropped non-root users at a bare shell prompt when trying to start a system-scope gateway service. Previously _require_root_for_system_service called sys.exit(1), which the wizard's `except Exception` guards cannot catch (SystemExit is a BaseException). Users with a pre-existing /etc/systemd/system unit (e.g. from an earlier `sudo hermes setup` run) hit this whenever they re-ran `hermes setup` as a regular user. - Convert _require_root_for_system_service to raise a typed SystemScopeRequiresRootError (RuntimeError subclass) instead of sys.exit(1). The direct CLI path (`hermes gateway install|start|stop| restart|uninstall` without sudo) still exits 1 cleanly via a new catch at the top of gateway_command, matching the existing UserSystemdUnavailableError pattern. - Add _system_scope_wizard_would_need_root() pre-check and _print_system_scope_remediation() helper. Both setup wizards (hermes_cli/setup.py and hermes_cli/gateway.py::gateway_setup) now detect the dead-end before prompting and print actionable guidance: either `sudo systemctl start <service>` this time, or uninstall the system unit and install a per-user one. - Defense-in-depth: all 5 wizard prompt sites also catch SystemScopeRequiresRootError and fall back to the remediation helper if the pre-check is bypassed (race, etc.). Tests: 12 new tests in TestSystemScopeRequiresRootError, TestSystemScopeWizardPreCheck, TestSystemScopeRemediationOutput, and TestGatewayCommandCatchesSystemScopeError covering the exception contract, pre-check matrix (root vs non-root, system-only vs user-present vs none vs explicit system=True), remediation output for each action, and the direct-CLI exit-1 path.
1 parent 04cf478 commit 3cdbf33

3 files changed

Lines changed: 285 additions & 7 deletions

File tree

hermes_cli/gateway.py

Lines changed: 95 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -967,6 +967,27 @@ class UserSystemdUnavailableError(RuntimeError):
967967
"""
968968

969969

970+
class SystemScopeRequiresRootError(RuntimeError):
971+
"""Raised when a system-scope gateway operation is attempted as non-root.
972+
973+
System-scope units live in ``/etc/systemd/system/`` and require root for
974+
install / uninstall / start / stop / restart via ``systemctl``. The
975+
previous behavior was ``sys.exit(1)`` which blew past the wizard's
976+
``except Exception`` guards and dumped the user at a bare shell prompt
977+
with no guidance. Raising a typed exception lets callers that can
978+
recover (the setup wizard) print actionable remediation instead, while
979+
``gateway_command`` still exits 1 with the same message for the direct
980+
CLI path.
981+
982+
``args[0]`` carries the user-facing message, ``args[1]`` the action name.
983+
``str(e)`` returns only the message (not the tuple repr) so format
984+
strings like ``f"Failed: {e}"`` render cleanly.
985+
"""
986+
987+
def __str__(self) -> str:
988+
return self.args[0] if self.args else ""
989+
990+
970991
def _user_dbus_socket_path() -> Path:
971992
"""Return the expected per-user D-Bus socket path (regardless of existence)."""
972993
xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"
@@ -1382,8 +1403,10 @@ def print_systemd_scope_conflict_warning() -> None:
13821403

13831404
def _require_root_for_system_service(action: str) -> None:
13841405
if os.geteuid() != 0:
1385-
print(f"System gateway {action} requires root. Re-run with sudo.")
1386-
sys.exit(1)
1406+
raise SystemScopeRequiresRootError(
1407+
f"System gateway {action} requires root. Re-run with sudo.",
1408+
action,
1409+
)
13871410

13881411

13891412
def _system_service_identity(run_as_user: str | None = None) -> tuple[str, str, str]:
@@ -1930,6 +1953,47 @@ def _select_systemd_scope(system: bool = False) -> bool:
19301953
return get_systemd_unit_path(system=True).exists() and not get_systemd_unit_path(system=False).exists()
19311954

19321955

1956+
def _system_scope_wizard_would_need_root(system: bool = False) -> bool:
1957+
"""True when the setup wizard is about to trigger a system-scope operation
1958+
as a non-root user.
1959+
1960+
Replicates the decision ``_select_systemd_scope`` makes inside
1961+
``systemd_start`` / ``systemd_restart`` / ``systemd_stop`` so the wizard
1962+
can detect the dead-end BEFORE prompting, rather than letting
1963+
``SystemScopeRequiresRootError`` propagate out and leave the user
1964+
staring at a bare shell.
1965+
"""
1966+
if os.geteuid() == 0:
1967+
return False
1968+
return _select_systemd_scope(system=system)
1969+
1970+
1971+
def _print_system_scope_remediation(action: str) -> None:
1972+
"""Print actionable remediation when the wizard skips a system-scope
1973+
prompt because the user isn't root. Keeps the wizard flowing instead of
1974+
aborting.
1975+
"""
1976+
svc = get_service_name()
1977+
print_warning(
1978+
f"Gateway is installed as a system-wide service — "
1979+
f"{action} requires root."
1980+
)
1981+
print_info(" Options:")
1982+
print_info(f" 1. {action.capitalize()} it this time:")
1983+
if action == "start":
1984+
print_info(f" sudo systemctl start {svc}")
1985+
elif action == "stop":
1986+
print_info(f" sudo systemctl stop {svc}")
1987+
elif action == "restart":
1988+
print_info(f" sudo systemctl restart {svc}")
1989+
else:
1990+
print_info(f" sudo systemctl {action} {svc}")
1991+
print_info(" 2. Switch to a per-user service (recommended for personal use):")
1992+
print_info(" sudo hermes gateway uninstall --system")
1993+
print_info(" hermes gateway install")
1994+
print_info(" hermes gateway start")
1995+
1996+
19331997
def _get_restart_drain_timeout() -> float:
19341998
"""Return the configured gateway restart drain timeout in seconds."""
19351999
raw = os.getenv("HERMES_RESTART_DRAIN_TIMEOUT", "").strip()
@@ -4115,7 +4179,9 @@ def gateway_setup():
41154179
print_success("Gateway service is installed and running.")
41164180
elif service_installed:
41174181
print_warning("Gateway service is installed but not running.")
4118-
if prompt_yes_no(" Start it now?", True):
4182+
if supports_systemd_services() and _system_scope_wizard_would_need_root():
4183+
_print_system_scope_remediation("start")
4184+
elif prompt_yes_no(" Start it now?", True):
41194185
try:
41204186
if supports_systemd_services():
41214187
systemd_start()
@@ -4125,6 +4191,12 @@ def gateway_setup():
41254191
print_error(" Failed to start — user systemd not reachable:")
41264192
for line in str(e).splitlines():
41274193
print(f" {line}")
4194+
except SystemScopeRequiresRootError as e:
4195+
# Defense in depth: the pre-check above should have caught
4196+
# this, but handle the race/edge case gracefully instead of
4197+
# letting the exception escape the wizard.
4198+
print_error(f" Failed to start: {e}")
4199+
_print_system_scope_remediation("start")
41284200
except subprocess.CalledProcessError as e:
41294201
print_error(f" Failed to start: {e}")
41304202
else:
@@ -4174,7 +4246,9 @@ def _is_progress(status: str) -> bool:
41744246
service_running = _is_service_running()
41754247

41764248
if service_running:
4177-
if prompt_yes_no(" Restart the gateway to pick up changes?", True):
4249+
if supports_systemd_services() and _system_scope_wizard_would_need_root():
4250+
_print_system_scope_remediation("restart")
4251+
elif prompt_yes_no(" Restart the gateway to pick up changes?", True):
41784252
try:
41794253
if supports_systemd_services():
41804254
systemd_restart()
@@ -4187,10 +4261,15 @@ def _is_progress(status: str) -> bool:
41874261
print_error(" Restart failed — user systemd not reachable:")
41884262
for line in str(e).splitlines():
41894263
print(f" {line}")
4264+
except SystemScopeRequiresRootError as e:
4265+
print_error(f" Restart failed: {e}")
4266+
_print_system_scope_remediation("restart")
41904267
except subprocess.CalledProcessError as e:
41914268
print_error(f" Restart failed: {e}")
41924269
elif service_installed:
4193-
if prompt_yes_no(" Start the gateway service?", True):
4270+
if supports_systemd_services() and _system_scope_wizard_would_need_root():
4271+
_print_system_scope_remediation("start")
4272+
elif prompt_yes_no(" Start the gateway service?", True):
41944273
try:
41954274
if supports_systemd_services():
41964275
systemd_start()
@@ -4200,6 +4279,9 @@ def _is_progress(status: str) -> bool:
42004279
print_error(" Start failed — user systemd not reachable:")
42014280
for line in str(e).splitlines():
42024281
print(f" {line}")
4282+
except SystemScopeRequiresRootError as e:
4283+
print_error(f" Start failed: {e}")
4284+
_print_system_scope_remediation("start")
42034285
except subprocess.CalledProcessError as e:
42044286
print_error(f" Start failed: {e}")
42054287
else:
@@ -4273,6 +4355,14 @@ def gateway_command(args):
42734355
for line in str(e).splitlines():
42744356
print(f" {line}")
42754357
sys.exit(1)
4358+
except SystemScopeRequiresRootError as e:
4359+
# The direct ``hermes gateway install|uninstall|start|stop|restart``
4360+
# path lands here when the user typed a system-scope action without
4361+
# sudo. Same exit code as before — just gives the wizard a way to
4362+
# intercept the same condition with friendlier guidance before the
4363+
# error is raised.
4364+
print(str(e))
4365+
sys.exit(1)
42764366

42774367

42784368
def _gateway_command_inner(args):

hermes_cli/setup.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2462,6 +2462,9 @@ def _is_progress(status: str) -> bool:
24622462
launchd_start,
24632463
launchd_restart,
24642464
UserSystemdUnavailableError,
2465+
SystemScopeRequiresRootError,
2466+
_system_scope_wizard_would_need_root,
2467+
_print_system_scope_remediation,
24652468
)
24662469

24672470
service_installed = _is_service_installed()
@@ -2479,7 +2482,9 @@ def _is_progress(status: str) -> bool:
24792482
print()
24802483

24812484
if service_running:
2482-
if prompt_yes_no(" Restart the gateway to pick up changes?", True):
2485+
if supports_systemd and _system_scope_wizard_would_need_root():
2486+
_print_system_scope_remediation("restart")
2487+
elif prompt_yes_no(" Restart the gateway to pick up changes?", True):
24832488
try:
24842489
if supports_systemd:
24852490
systemd_restart()
@@ -2489,10 +2494,19 @@ def _is_progress(status: str) -> bool:
24892494
print_error(" Restart failed — user systemd not reachable:")
24902495
for line in str(e).splitlines():
24912496
print(f" {line}")
2497+
except SystemScopeRequiresRootError as e:
2498+
# Defense in depth: the pre-check above should have
2499+
# caught this, but a race (unit file appearing mid-run)
2500+
# could still land here. Previously this exited the
2501+
# whole wizard via sys.exit(1).
2502+
print_error(f" Restart failed: {e}")
2503+
_print_system_scope_remediation("restart")
24922504
except Exception as e:
24932505
print_error(f" Restart failed: {e}")
24942506
elif service_installed:
2495-
if prompt_yes_no(" Start the gateway service?", True):
2507+
if supports_systemd and _system_scope_wizard_would_need_root():
2508+
_print_system_scope_remediation("start")
2509+
elif prompt_yes_no(" Start the gateway service?", True):
24962510
try:
24972511
if supports_systemd:
24982512
systemd_start()
@@ -2502,6 +2516,9 @@ def _is_progress(status: str) -> bool:
25022516
print_error(" Start failed — user systemd not reachable:")
25032517
for line in str(e).splitlines():
25042518
print(f" {line}")
2519+
except SystemScopeRequiresRootError as e:
2520+
print_error(f" Start failed: {e}")
2521+
_print_system_scope_remediation("start")
25052522
except Exception as e:
25062523
print_error(f" Start failed: {e}")
25072524
elif supports_service_manager:
@@ -2529,6 +2546,9 @@ def _is_progress(status: str) -> bool:
25292546
print_error(" Start failed — user systemd not reachable:")
25302547
for line in str(e).splitlines():
25312548
print(f" {line}")
2549+
except SystemScopeRequiresRootError as e:
2550+
print_error(f" Start failed: {e}")
2551+
_print_system_scope_remediation("start")
25322552
except Exception as e:
25332553
print_error(f" Start failed: {e}")
25342554
except Exception as e:

tests/hermes_cli/test_gateway_service.py

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2177,3 +2177,171 @@ def fake_remove(interactive=True, dry_run=False):
21772177

21782178
assert prompt_called["count"] == 0
21792179
assert remove_called["invoked"] is False
2180+
2181+
2182+
class TestSystemScopeRequiresRootError:
2183+
"""Tests for the SystemScopeRequiresRootError replacement of sys.exit(1).
2184+
2185+
Before this change, ``_require_root_for_system_service`` called
2186+
``sys.exit(1)`` when non-root code tried a system-scope systemd
2187+
operation. The wizard's ``except Exception`` guards don't catch
2188+
``SystemExit`` (it's a ``BaseException`` subclass), so the user was
2189+
dumped at a bare shell prompt mid-setup. The fix raises a typed
2190+
exception instead, which the wizard intercepts and handles with
2191+
actionable remediation.
2192+
"""
2193+
2194+
def test_require_root_raises_when_non_root(self, monkeypatch):
2195+
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
2196+
2197+
with pytest.raises(gateway_cli.SystemScopeRequiresRootError) as excinfo:
2198+
gateway_cli._require_root_for_system_service("start")
2199+
2200+
assert excinfo.value.args[0] == "System gateway start requires root. Re-run with sudo."
2201+
assert excinfo.value.args[1] == "start"
2202+
# str(e) renders only the message, not the tuple repr, so that
2203+
# wizard format strings like f"Failed: {e}" print cleanly.
2204+
assert str(excinfo.value) == "System gateway start requires root. Re-run with sudo."
2205+
assert f"Failed: {excinfo.value}" == "Failed: System gateway start requires root. Re-run with sudo."
2206+
2207+
def test_require_root_noop_when_root(self, monkeypatch):
2208+
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 0)
2209+
2210+
# Should not raise, should not exit
2211+
gateway_cli._require_root_for_system_service("start")
2212+
2213+
def test_error_is_runtime_error_subclass(self):
2214+
"""Wizards use ``except Exception`` guards — the error must be a
2215+
``RuntimeError`` (catchable by ``Exception``), NOT a ``SystemExit``
2216+
(``BaseException``), so the wizard can recover from it.
2217+
"""
2218+
err = gateway_cli.SystemScopeRequiresRootError("msg", "start")
2219+
assert isinstance(err, RuntimeError)
2220+
assert isinstance(err, Exception)
2221+
assert not isinstance(err, SystemExit)
2222+
2223+
2224+
class TestSystemScopeWizardPreCheck:
2225+
"""Tests for _system_scope_wizard_would_need_root — the guard the
2226+
wizard uses to detect the dead-end BEFORE prompting the user to start
2227+
a service that will fail without sudo.
2228+
"""
2229+
2230+
@staticmethod
2231+
def _setup_units(tmp_path, monkeypatch, system_present: bool, user_present: bool):
2232+
sys_dir = tmp_path / "sys"
2233+
usr_dir = tmp_path / "usr"
2234+
sys_dir.mkdir()
2235+
usr_dir.mkdir()
2236+
if system_present:
2237+
(sys_dir / "hermes-gateway.service").write_text("[Unit]\n")
2238+
if user_present:
2239+
(usr_dir / "hermes-gateway.service").write_text("[Unit]\n")
2240+
monkeypatch.setattr(
2241+
gateway_cli,
2242+
"get_systemd_unit_path",
2243+
lambda system=False: (sys_dir if system else usr_dir) / "hermes-gateway.service",
2244+
)
2245+
2246+
def test_non_root_with_only_system_unit_returns_true(self, tmp_path, monkeypatch):
2247+
self._setup_units(tmp_path, monkeypatch, system_present=True, user_present=False)
2248+
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
2249+
2250+
assert gateway_cli._system_scope_wizard_would_need_root() is True
2251+
2252+
def test_root_never_needs_root(self, tmp_path, monkeypatch):
2253+
self._setup_units(tmp_path, monkeypatch, system_present=True, user_present=False)
2254+
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 0)
2255+
2256+
assert gateway_cli._system_scope_wizard_would_need_root() is False
2257+
2258+
def test_non_root_with_user_unit_present_returns_false(self, tmp_path, monkeypatch):
2259+
# User-scope unit present — user can start it themselves, no sudo needed.
2260+
self._setup_units(tmp_path, monkeypatch, system_present=True, user_present=True)
2261+
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
2262+
2263+
assert gateway_cli._system_scope_wizard_would_need_root() is False
2264+
2265+
def test_non_root_with_no_units_returns_false(self, tmp_path, monkeypatch):
2266+
self._setup_units(tmp_path, monkeypatch, system_present=False, user_present=False)
2267+
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
2268+
2269+
assert gateway_cli._system_scope_wizard_would_need_root() is False
2270+
2271+
def test_non_root_with_explicit_system_arg_returns_true(self, tmp_path, monkeypatch):
2272+
# Caller passed system=True explicitly (e.g. ``hermes gateway start --system``).
2273+
self._setup_units(tmp_path, monkeypatch, system_present=False, user_present=False)
2274+
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
2275+
2276+
assert gateway_cli._system_scope_wizard_would_need_root(system=True) is True
2277+
2278+
2279+
class TestSystemScopeRemediationOutput:
2280+
"""Tests for _print_system_scope_remediation — the actionable guidance
2281+
shown when the wizard detects a system-scope-only setup as non-root.
2282+
"""
2283+
2284+
def test_start_remediation_mentions_sudo_systemctl_and_uninstall(self, capsys, monkeypatch):
2285+
monkeypatch.setattr(gateway_cli, "get_service_name", lambda: "hermes-gateway")
2286+
2287+
gateway_cli._print_system_scope_remediation("start")
2288+
out = capsys.readouterr().out
2289+
2290+
assert "system-wide service" in out
2291+
assert "start requires root" in out
2292+
assert "sudo systemctl start hermes-gateway" in out
2293+
assert "sudo hermes gateway uninstall --system" in out
2294+
assert "hermes gateway install" in out
2295+
2296+
def test_restart_remediation_uses_systemctl_restart(self, capsys, monkeypatch):
2297+
monkeypatch.setattr(gateway_cli, "get_service_name", lambda: "hermes-gateway")
2298+
2299+
gateway_cli._print_system_scope_remediation("restart")
2300+
out = capsys.readouterr().out
2301+
2302+
assert "restart requires root" in out
2303+
assert "sudo systemctl restart hermes-gateway" in out
2304+
2305+
def test_stop_remediation_uses_systemctl_stop(self, capsys, monkeypatch):
2306+
monkeypatch.setattr(gateway_cli, "get_service_name", lambda: "hermes-gateway")
2307+
2308+
gateway_cli._print_system_scope_remediation("stop")
2309+
out = capsys.readouterr().out
2310+
2311+
assert "stop requires root" in out
2312+
assert "sudo systemctl stop hermes-gateway" in out
2313+
2314+
2315+
class TestGatewayCommandCatchesSystemScopeError:
2316+
"""The direct CLI path (``hermes gateway start --system`` etc.) must
2317+
still exit 1 with a clean message when non-root. The top-level
2318+
``gateway_command`` catches ``SystemScopeRequiresRootError`` and
2319+
converts it back to ``sys.exit(1)``, preserving existing CLI behavior.
2320+
"""
2321+
2322+
def test_non_root_system_start_exits_one_with_clean_message(self, tmp_path, monkeypatch, capsys):
2323+
sys_dir = tmp_path / "sys"
2324+
usr_dir = tmp_path / "usr"
2325+
sys_dir.mkdir()
2326+
usr_dir.mkdir()
2327+
(sys_dir / "hermes-gateway.service").write_text("[Unit]\n")
2328+
monkeypatch.setattr(
2329+
gateway_cli,
2330+
"get_systemd_unit_path",
2331+
lambda system=False: (sys_dir if system else usr_dir) / "hermes-gateway.service",
2332+
)
2333+
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
2334+
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
2335+
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
2336+
monkeypatch.setattr(gateway_cli, "kill_gateway_processes", lambda **kw: 0)
2337+
2338+
args = SimpleNamespace(gateway_command="start", system=True, all=False)
2339+
2340+
with pytest.raises(SystemExit) as excinfo:
2341+
gateway_cli.gateway_command(args)
2342+
2343+
assert excinfo.value.code == 1
2344+
out = capsys.readouterr().out
2345+
# Renders the message, NOT the ``('msg', 'action')`` tuple repr
2346+
assert "System gateway start requires root. Re-run with sudo." in out
2347+
assert "('" not in out # no tuple repr leaking through

0 commit comments

Comments
 (0)