Skip to content

Commit 7fa0f8b

Browse files
authored
Merge pull request #1450 from lsst-sqre/t/DM-54896
DM-54896: Avoid alert spam on database desynchronization
2 parents 718996a + bd632d7 commit 7fa0f8b

7 files changed

Lines changed: 166 additions & 31 deletions

File tree

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
### Bug fixes
2+
3+
- Rather than raising uncaught exceptions when a token does not exist in the SQL database when creating a child token, log an exception and return a 401 error (for ingresses) or a 400 error (for OpenID Connect authentication). This desynchronization will be caught by the nightly audit `CronJob` and doesn't need to spam alerts.

src/gafaelfawr/handlers/ingress.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -406,9 +406,12 @@ async def get_auth(
406406
response.headers.update(rate_status.to_http_headers())
407407

408408
# Construct the response headers.
409-
headers = await build_success_headers(
410-
context, auth_config, token_data, user_info
411-
)
409+
try:
410+
headers = await build_success_headers(
411+
context, auth_config, token_data, user_info
412+
)
413+
except InvalidTokenError as e:
414+
raise generate_challenge(context, auth_config.auth_type, e) from e
412415
for key, value in headers:
413416
response.headers.append(key, value)
414417

@@ -866,12 +869,6 @@ async def build_success_headers(
866869
-------
867870
headers
868871
Headers to include in the response.
869-
870-
Raises
871-
------
872-
fastapi.HTTPException
873-
Raised if user information could not be retrieved from external
874-
systems.
875872
"""
876873
headers = [("X-Auth-Request-User", token_data.username)]
877874
if user_info.email:

src/gafaelfawr/handlers/oidc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ async def post_token(
267267
ip_address=context.ip_address,
268268
)
269269
except OAuthError as e:
270-
context.logger.warning("%s", e.message, error=str(e))
270+
context.logger.warning(e.message, error=str(e))
271271
content = {
272272
"error": e.error,
273273
"error_description": e.message if e.hide_error else str(e),

src/gafaelfawr/services/token.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from safir.database import CountedPaginatedList
99
from safir.datetime import format_datetime_for_logging
1010
from safir.redis import DeserializeError
11+
from sqlalchemy.exc import IntegrityError
1112
from sqlalchemy.ext.asyncio import AsyncSession
1213
from structlog.stdlib import BoundLogger
1314

@@ -26,6 +27,7 @@
2627
InvalidExpiresError,
2728
InvalidIPAddressError,
2829
InvalidScopesError,
30+
InvalidTokenError,
2931
PermissionDeniedError,
3032
)
3133
from ..models.enums import TokenChange, TokenType
@@ -266,6 +268,9 @@ async def create_oidc_token(
266268
------
267269
InvalidGrantError
268270
Raised if the underlying authentication token has expired.
271+
InvalidTokenError
272+
Raised if the parent token is invalid, probably due to a
273+
desynchronization between Redis and the SQL database.
269274
"""
270275
token = Token()
271276
created = datetime.now(tz=UTC).replace(microsecond=0)
@@ -302,6 +307,11 @@ async def create_oidc_token(
302307
async with self._session.begin():
303308
await self._token_db_store.add(data, parent=parent)
304309
await self._token_change_store.add(history_entry)
310+
except IntegrityError as e:
311+
msg = "Integrity error creating OIDC token"
312+
self._logger.exception(msg, error=str(e))
313+
await self._token_redis_store.delete(data.token.key)
314+
raise InvalidTokenError(msg) from e
305315
except Exception:
306316
await self._token_redis_store.delete(data.token.key)
307317
raise

src/gafaelfawr/services/token_cache.py

Lines changed: 50 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44

55
import sentry_sdk
66
from safir.datetime import format_datetime_for_logging
7+
from sqlalchemy.exc import IntegrityError
78
from sqlalchemy.ext.asyncio import AsyncSession
89
from structlog.stdlib import BoundLogger
910

1011
from ..cache import InternalTokenCache, NotebookTokenCache
1112
from ..config import Config
13+
from ..exceptions import InvalidTokenError
1214
from ..models.enums import TokenChange, TokenType
1315
from ..models.history import TokenChangeHistoryEntry
1416
from ..models.token import Token, TokenData
@@ -141,10 +143,9 @@ async def get_internal_token(
141143
valid = await self._is_token_valid(token, minimum_lifetime, scopes)
142144
if token and valid:
143145
return token
144-
async with self._session.begin():
145-
token = await self._create_internal_token(
146-
token_data, service, scopes, ip_address, minimum_lifetime
147-
)
146+
token = await self._create_internal_token(
147+
token_data, service, scopes, ip_address, minimum_lifetime
148+
)
148149
self._internal_cache.store(token_data, service, scopes, token)
149150
return token
150151

@@ -187,10 +188,9 @@ async def get_notebook_token(
187188
token = self._notebook_cache.get(token_data)
188189
if token and await self._is_token_valid(token, minimum_lifetime):
189190
return token
190-
async with self._session.begin():
191-
token = await self._create_notebook_token(
192-
token_data, ip_address, minimum_lifetime
193-
)
191+
token = await self._create_notebook_token(
192+
token_data, ip_address, minimum_lifetime
193+
)
194194
self._notebook_cache.store(token_data, token)
195195
return token
196196

@@ -225,14 +225,21 @@ async def _create_internal_token(
225225
-------
226226
Token
227227
Retrieved or newly-created internal token.
228+
229+
Raises
230+
------
231+
InvalidTokenError
232+
Raised if the parent token is invalid, probably due to a
233+
desynchronization between Redis and the SQL database.
228234
"""
229235
# See if there's already a matching internal token.
230-
key = await self._token_db_store.get_internal_token_key(
231-
token_data,
232-
service,
233-
scopes,
234-
self._minimum_expiration(token_data, minimum_lifetime),
235-
)
236+
async with self._session.begin():
237+
key = await self._token_db_store.get_internal_token_key(
238+
token_data,
239+
service,
240+
scopes,
241+
self._minimum_expiration(token_data, minimum_lifetime),
242+
)
236243
if key:
237244
data = await self._token_redis_store.get_data_by_key(key)
238245
if data:
@@ -272,10 +279,17 @@ async def _create_internal_token(
272279
event_time=created,
273280
)
274281

282+
parent = token_data.token.key
275283
await self._token_redis_store.store_data(data)
276284
try:
277-
await self._token_db_store.add(data, parent=token_data.token.key)
278-
await self._token_change_store.add(history_entry)
285+
async with self._session.begin():
286+
await self._token_db_store.add(data, parent=parent)
287+
await self._token_change_store.add(history_entry)
288+
except IntegrityError as e:
289+
msg = "Integrity error creating internal token"
290+
self._logger.exception(msg, error=str(e))
291+
await self._token_redis_store.delete(data.token.key)
292+
raise InvalidTokenError(msg) from e
279293
except Exception:
280294
await self._token_redis_store.delete(data.token.key)
281295
raise
@@ -316,11 +330,19 @@ async def _create_notebook_token(
316330
-------
317331
Token
318332
The retrieved or newly-created notebook token.
333+
334+
Raises
335+
------
336+
InvalidTokenError
337+
Raised if the parent token is invalid, probably due to a
338+
desynchronization between Redis and the SQL database.
319339
"""
320340
# See if there's already a matching notebook token.
321-
key = await self._token_db_store.get_notebook_token_key(
322-
token_data, self._minimum_expiration(token_data, minimum_lifetime)
323-
)
341+
async with self._session.begin():
342+
key = await self._token_db_store.get_notebook_token_key(
343+
token_data,
344+
self._minimum_expiration(token_data, minimum_lifetime),
345+
)
324346
if key:
325347
data = await self._token_redis_store.get_data_by_key(key)
326348
if data:
@@ -358,10 +380,17 @@ async def _create_notebook_token(
358380
event_time=created,
359381
)
360382

383+
parent = token_data.token.key
361384
await self._token_redis_store.store_data(data)
362385
try:
363-
await self._token_db_store.add(data, parent=token_data.token.key)
364-
await self._token_change_store.add(history_entry)
386+
async with self._session.begin():
387+
await self._token_db_store.add(data, parent=parent)
388+
await self._token_change_store.add(history_entry)
389+
except IntegrityError as e:
390+
msg = "Integrity error creating notebook token"
391+
self._logger.exception(msg, error=str(e))
392+
await self._token_redis_store.delete(data.token.key)
393+
raise InvalidTokenError(msg) from e
365394
except Exception:
366395
await self._token_redis_store.delete(data.token.key)
367396
raise
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""Tests for errors in the ``/ingress/auth`` route."""
2+
3+
import pytest
4+
from httpx import AsyncClient
5+
6+
from gafaelfawr.factory import Factory
7+
from gafaelfawr.storage.token import TokenDatabaseStore
8+
9+
from ..support.tokens import create_session_token
10+
11+
12+
@pytest.mark.asyncio
13+
async def test_database_desync(client: AsyncClient, factory: Factory) -> None:
14+
"""Test error handling when a token is missing from the database.
15+
16+
If a token exists only in Redis but not in the database, internal tokens
17+
cannot be generated for it. Test error handling in that case.
18+
"""
19+
token_data = await create_session_token(factory, scopes={"read:all"})
20+
token_store = TokenDatabaseStore(factory.session)
21+
async with factory.session.begin():
22+
assert await token_store.delete(token_data.token.key)
23+
24+
# Authentication with an internal token requested should return a 401.
25+
r = await client.get(
26+
"/ingress/auth",
27+
params={
28+
"scope": "read:all",
29+
"service": "test",
30+
"delegate_to": "test",
31+
},
32+
headers={"Authorization": f"Bearer {token_data.token}"},
33+
)
34+
assert r.status_code == 401
35+
36+
# Likewise for a notebook token.
37+
r = await client.get(
38+
"/ingress/auth",
39+
params={
40+
"scope": "read:all",
41+
"service": "test",
42+
"notebook": "true",
43+
},
44+
headers={"Authorization": f"Bearer {token_data.token}"},
45+
)
46+
assert r.status_code == 401

tests/handlers/oidc_test.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
OIDCTokenReply,
2626
)
2727
from gafaelfawr.models.token import Token
28+
from gafaelfawr.storage.token import TokenDatabaseStore
2829
from gafaelfawr.util import number_to_base64
2930

3031
from ..support.config import build_oidc_client
@@ -1100,3 +1101,52 @@ async def test_userinfo_internal(
11001101
"preferred_username": token_data.username,
11011102
"sub": token_data.username,
11021103
}
1104+
1105+
1106+
@pytest.mark.parametrize("config", ["github-oidc-server"], indirect=True)
1107+
@pytest.mark.asyncio
1108+
async def test_database_desync(
1109+
config: Config, client: AsyncClient, factory: Factory
1110+
) -> None:
1111+
"""Test error handling when a token is missing from the database.
1112+
1113+
If a token exists only in Redis but not in the database, internal tokens
1114+
cannot be generated for it. Test error handling in that case.
1115+
"""
1116+
redirect_uri = f"https://{TEST_HOSTNAME}/foo"
1117+
assert config.oidc_server
1118+
config.oidc_server.clients = [
1119+
build_oidc_client("some-id", "some-secret", redirect_uri)
1120+
]
1121+
token_data = await create_session_token(factory, scopes={"read:all"})
1122+
await set_session_cookie(client, token_data.token)
1123+
token_store = TokenDatabaseStore(factory.session)
1124+
async with factory.session.begin():
1125+
assert await token_store.delete(token_data.token.key)
1126+
1127+
r = await client.get(
1128+
"/auth/openid/login",
1129+
params={
1130+
"response_type": "code",
1131+
"scope": "openid",
1132+
"client_id": "some-id",
1133+
"state": "random-state",
1134+
"redirect_uri": redirect_uri,
1135+
},
1136+
)
1137+
assert r.status_code == 307
1138+
url = urlparse(r.headers["Location"])
1139+
query = parse_qs(url.query)
1140+
code = query["code"][0]
1141+
1142+
r = await client.post(
1143+
"/auth/openid/token",
1144+
data={
1145+
"grant_type": "authorization_code",
1146+
"client_id": "some-id",
1147+
"client_secret": "some-secret",
1148+
"code": code,
1149+
"redirect_uri": redirect_uri,
1150+
},
1151+
)
1152+
assert r.status_code == 400

0 commit comments

Comments
 (0)