Skip to content

Commit 3b5a30e

Browse files
authored
Feature: authenticated file upload (#463)
Problem: file upload is performed in two steps, one to push the file and one to push the associated STORE message. Solution: improve `/api/v0/storage/add_file` to allow the user to send a STORE message along with his file, making upload an atomic operation.
1 parent 30ed0a8 commit 3b5a30e

File tree

10 files changed

+716
-180
lines changed

10 files changed

+716
-180
lines changed

src/aleph/api_entrypoint.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from configmanager import Config
77

88
import aleph.config
9+
from aleph.chains.chain_service import ChainService
910
from aleph.db.connection import make_engine, make_session_factory
1011
from aleph.services.cache.node_cache import NodeCache
1112
from aleph.services.ipfs import IpfsService
@@ -21,7 +22,10 @@
2122
APP_STATE_NODE_CACHE,
2223
APP_STATE_P2P_CLIENT,
2324
APP_STATE_SESSION_FACTORY,
24-
APP_STATE_STORAGE_SERVICE, APP_STATE_MQ_CHANNEL, APP_STATE_MQ_WS_CHANNEL,
25+
APP_STATE_STORAGE_SERVICE,
26+
APP_STATE_MQ_CHANNEL,
27+
APP_STATE_MQ_WS_CHANNEL,
28+
APP_STATE_CHAIN_SERVICE,
2529
)
2630

2731

@@ -49,6 +53,9 @@ async def configure_aiohttp_app(
4953
ipfs_service=ipfs_service,
5054
node_cache=node_cache,
5155
)
56+
chain_service = ChainService(
57+
storage_service=storage_service, session_factory=session_factory
58+
)
5259

5360
app = create_aiohttp_app()
5461

@@ -67,6 +74,7 @@ async def configure_aiohttp_app(
6774
app[APP_STATE_NODE_CACHE] = node_cache
6875
app[APP_STATE_STORAGE_SERVICE] = storage_service
6976
app[APP_STATE_SESSION_FACTORY] = session_factory
77+
app[APP_STATE_CHAIN_SERVICE] = chain_service
7078

7179
return app
7280

src/aleph/schemas/pending_messages.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,11 @@ class PendingStoreMessage(BasePendingMessage[Literal[MessageType.store], StoreCo
124124
pass
125125

126126

127+
class PendingInlineStoreMessage(PendingStoreMessage):
128+
item_content: str
129+
item_type: Literal[ItemType.inline] # type: ignore[valid-type]
130+
131+
127132
MESSAGE_TYPE_TO_CLASS = {
128133
MessageType.aggregate: PendingAggregateMessage,
129134
MessageType.forget: PendingForgetMessage,

src/aleph/storage.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import logging
66
from hashlib import sha256
77
from typing import Any, IO, Optional, cast, Final
8+
from aiohttp import web
89

910
from aleph_message.models import ItemType
1011

@@ -19,9 +20,13 @@
1920
from aleph.services.ipfs.common import get_cid_version
2021
from aleph.services.p2p.http import request_hash as p2p_http_request_hash
2122
from aleph.services.storage.engine import StorageEngine
23+
from aleph.toolkit.constants import MiB
2224
from aleph.types.db_session import DbSession
2325
from aleph.types.files import FileType
2426
from aleph.utils import get_sha256
27+
from aleph.schemas.pending_messages import (
28+
parse_message,
29+
)
2530

2631
LOGGER = logging.getLogger(__name__)
2732

@@ -239,7 +244,9 @@ async def get_json(
239244
async def pin_hash(self, chash: str, timeout: int = 30, tries: int = 1):
240245
await self.ipfs_service.pin_add(cid=chash, timeout=timeout, tries=tries)
241246

242-
async def add_json(self, session: DbSession, value: Any, engine: ItemType = ItemType.ipfs) -> str:
247+
async def add_json(
248+
self, session: DbSession, value: Any, engine: ItemType = ItemType.ipfs
249+
) -> str:
243250
content = aleph_json.dumps(value)
244251

245252
if engine == ItemType.ipfs:
@@ -259,6 +266,17 @@ async def add_json(self, session: DbSession, value: Any, engine: ItemType = Item
259266

260267
return chash
261268

269+
async def add_file_content_to_local_storage(
270+
self, session: DbSession, file_content: bytes, file_hash: str
271+
) -> None:
272+
await self.storage_engine.write(filename=file_hash, content=file_content)
273+
upsert_file(
274+
session=session,
275+
file_hash=file_hash,
276+
size=len(file_content),
277+
file_type=FileType.FILE,
278+
)
279+
262280
async def add_file(
263281
self, session: DbSession, fileobject: IO, engine: ItemType = ItemType.ipfs
264282
) -> str:
@@ -275,12 +293,8 @@ async def add_file(
275293
else:
276294
raise ValueError(f"Unsupported item type: {engine}")
277295

278-
await self.storage_engine.write(filename=file_hash, content=file_content)
279-
upsert_file(
280-
session=session,
281-
file_hash=file_hash,
282-
size=len(file_content),
283-
file_type=FileType.FILE,
296+
await self.add_file_content_to_local_storage(
297+
session=session, file_content=file_content, file_hash=file_hash
284298
)
285299

286300
return file_hash

src/aleph/web/controllers/app_state_getters.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from aleph_p2p_client import AlephP2PServiceClient
1212
from configmanager import Config
1313

14+
from aleph.chains.chain_service import ChainService
1415
from aleph.services.cache.node_cache import NodeCache
1516
from aleph.services.ipfs import IpfsService
1617
from aleph.storage import StorageService
@@ -27,7 +28,7 @@
2728
APP_STATE_P2P_CLIENT = "p2p_client"
2829
APP_STATE_SESSION_FACTORY = "session_factory"
2930
APP_STATE_STORAGE_SERVICE = "storage_service"
30-
31+
APP_STATE_CHAIN_SERVICE = "chain_service"
3132

3233
T = TypeVar("T")
3334

@@ -103,3 +104,7 @@ def get_session_factory_from_request(request: web.Request) -> DbSessionFactory:
103104

104105
def get_storage_service_from_request(request: web.Request) -> StorageService:
105106
return cast(StorageService, request.app[APP_STATE_STORAGE_SERVICE])
107+
108+
109+
def get_chain_service_from_request(request: web.Request) -> ChainService:
110+
return cast(ChainService, request.app[APP_STATE_CHAIN_SERVICE])

src/aleph/web/controllers/ipfs.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
get_ipfs_service_from_request,
99
get_session_factory_from_request,
1010
)
11-
from aleph.web.controllers.utils import multidict_proxy_to_io
11+
from aleph.web.controllers.utils import file_field_to_io
1212

1313

1414
async def ipfs_add_file(request: web.Request):
@@ -20,7 +20,12 @@ async def ipfs_add_file(request: web.Request):
2020

2121
# No need to pin it here anymore.
2222
post = await request.post()
23-
ipfs_add_response = await ipfs_service.add_file(multidict_proxy_to_io(post))
23+
try:
24+
file_field = post["file"]
25+
except KeyError:
26+
raise web.HTTPUnprocessableEntity(reason="Missing 'file' in multipart form.")
27+
28+
ipfs_add_response = await ipfs_service.add_file(file_field_to_io(file_field))
2429

2530
cid = ipfs_add_response["Hash"]
2631
name = ipfs_add_response["Name"]

src/aleph/web/controllers/p2p.py

Lines changed: 17 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,32 @@
11
import asyncio
22
import json
33
import logging
4-
from typing import Dict, cast, Optional, Any, Mapping, List, Union
4+
from typing import Dict, cast, Optional, Any, List, Union
55

6-
import aio_pika.abc
76
from aiohttp import web
87
from aleph_p2p_client import AlephP2PServiceClient
98
from configmanager import Config
109
from pydantic import BaseModel, Field, ValidationError
1110

12-
import aleph.toolkit.json as aleph_json
13-
from aleph.schemas.pending_messages import parse_message, BasePendingMessage
1411
from aleph.services.ipfs import IpfsService
1512
from aleph.services.p2p.pubsub import publish as pub_p2p
1613
from aleph.toolkit.shield import shielded
17-
from aleph.types.message_status import (
18-
InvalidMessageException,
19-
MessageStatus,
20-
MessageProcessingStatus,
21-
)
2214
from aleph.types.protocol import Protocol
2315
from aleph.web.controllers.app_state_getters import (
2416
get_config_from_request,
2517
get_ipfs_service_from_request,
2618
get_p2p_client_from_request,
27-
get_mq_channel_from_request,
2819
)
29-
from aleph.web.controllers.utils import mq_make_aleph_message_topic_queue
20+
from aleph.web.controllers.utils import (
21+
validate_message_dict,
22+
broadcast_and_process_message,
23+
PublicationStatus,
24+
broadcast_status_to_http_status,
25+
)
3026

3127
LOGGER = logging.getLogger(__name__)
3228

3329

34-
class PublicationStatus(BaseModel):
35-
status: str
36-
failed: List[Protocol]
37-
38-
@classmethod
39-
def from_failures(cls, failed_publications: List[Protocol]):
40-
status = {
41-
0: "success",
42-
1: "warning",
43-
2: "error",
44-
}[len(failed_publications)]
45-
return cls(status=status, failed=failed_publications)
46-
47-
48-
def _validate_message_dict(message_dict: Mapping[str, Any]) -> BasePendingMessage:
49-
try:
50-
return parse_message(message_dict)
51-
except InvalidMessageException as e:
52-
raise web.HTTPUnprocessableEntity(body=str(e))
53-
54-
5530
def _validate_request_data(config: Config, request_data: Dict) -> None:
5631
"""
5732
Validates the content of a JSON pubsub message depending on the channel
@@ -83,7 +58,7 @@ def _validate_request_data(config: Config, request_data: Dict) -> None:
8358
reason="'data': must be deserializable as JSON."
8459
)
8560

86-
_validate_message_dict(message_dict)
61+
validate_message_dict(message_dict)
8762

8863

8964
async def _pub_on_p2p_topics(
@@ -142,48 +117,11 @@ async def pub_json(request: web.Request):
142117
)
143118

144119

145-
async def _mq_read_one_message(
146-
mq_queue: aio_pika.abc.AbstractQueue, timeout: float
147-
) -> Optional[aio_pika.abc.AbstractIncomingMessage]:
148-
"""
149-
Consume one element from a message queue and then return.
150-
"""
151-
152-
queue: asyncio.Queue = asyncio.Queue()
153-
154-
async def _process_message(message: aio_pika.abc.AbstractMessage):
155-
await queue.put(message)
156-
157-
consumer_tag = await mq_queue.consume(_process_message, no_ack=True)
158-
159-
try:
160-
return await asyncio.wait_for(queue.get(), timeout)
161-
except asyncio.TimeoutError:
162-
return None
163-
finally:
164-
await mq_queue.cancel(consumer_tag)
165-
166-
167-
def _processing_status_to_http_status(status: MessageProcessingStatus) -> int:
168-
mapping = {
169-
MessageProcessingStatus.PROCESSED_NEW_MESSAGE: 200,
170-
MessageProcessingStatus.PROCESSED_CONFIRMATION: 200,
171-
MessageProcessingStatus.FAILED_WILL_RETRY: 202,
172-
MessageProcessingStatus.FAILED_REJECTED: 422,
173-
}
174-
return mapping[status]
175-
176-
177120
class PubMessageRequest(BaseModel):
178121
sync: bool = False
179122
message_dict: Dict[str, Any] = Field(alias="message")
180123

181124

182-
class PubMessageResponse(BaseModel):
183-
publication_status: PublicationStatus
184-
message_status: Optional[MessageStatus]
185-
186-
187125
@shielded
188126
async def pub_message(request: web.Request):
189127
try:
@@ -194,76 +132,14 @@ async def pub_message(request: web.Request):
194132
# Body must be valid JSON
195133
raise web.HTTPUnprocessableEntity()
196134

197-
pending_message = _validate_message_dict(request_data.message_dict)
198-
199-
# In sync mode, wait for a message processing event. We need to create the queue
200-
# before publishing the message on P2P topics in order to guarantee that the event
201-
# will be picked up.
202-
config = get_config_from_request(request)
203-
204-
if request_data.sync:
205-
mq_channel = await get_mq_channel_from_request(request=request, logger=LOGGER)
206-
mq_queue = await mq_make_aleph_message_topic_queue(
207-
channel=mq_channel,
208-
config=config,
209-
routing_key=f"*.{pending_message.item_hash}",
210-
)
211-
else:
212-
mq_queue = None
213-
214-
# We publish the message on P2P topics early, for 3 reasons:
215-
# 1. Just because this node is unable to process the message does not
216-
# necessarily mean the message is incorrect (ex: bug in a new version).
217-
# 2. If the publication fails after the processing, we end up in a situation where
218-
# a message exists without being propagated to the other nodes, ultimately
219-
# causing sync issues on the network.
220-
# 3. The message is currently fed to this node using the P2P service client
221-
# loopback mechanism.
222-
ipfs_service = get_ipfs_service_from_request(request)
223-
p2p_client = get_p2p_client_from_request(request)
224-
225-
message_topic = config.aleph.queue_topic.value
226-
failed_publications = await _pub_on_p2p_topics(
227-
p2p_client=p2p_client,
228-
ipfs_service=ipfs_service,
229-
topic=message_topic,
230-
payload=aleph_json.dumps(request_data.message_dict),
135+
pending_message = validate_message_dict(request_data.message_dict)
136+
broadcast_status = await broadcast_and_process_message(
137+
pending_message=pending_message,
138+
message_dict=request_data.message_dict,
139+
sync=request_data.sync,
140+
request=request,
141+
logger=LOGGER,
231142
)
232-
pub_status = PublicationStatus.from_failures(failed_publications)
233-
if pub_status.status == "error":
234-
return web.json_response(
235-
text=PubMessageResponse(
236-
publication_status=pub_status, message_status=None
237-
).json(),
238-
status=500,
239-
)
240-
241-
status = PubMessageResponse(
242-
publication_status=pub_status, message_status=MessageStatus.PENDING
243-
)
244-
245-
# When publishing in async mode, just respond with 202 (Accepted).
246-
message_accepted_response = web.json_response(text=status.json(), status=202)
247-
if not request_data.sync:
248-
return message_accepted_response
249-
250-
# Ignore type checking here, we know that mq_queue is set at this point
251-
assert mq_queue is not None
252-
response = await _mq_read_one_message(mq_queue, timeout=30)
253-
254-
# Delete the queue immediately
255-
await mq_queue.delete(if_empty=False)
256-
257-
# If the message was not processed before the timeout, return a 202.
258-
if response is None:
259-
return message_accepted_response
260-
261-
routing_key = response.routing_key
262-
assert routing_key is not None # again, for type checking
263-
status_str, _item_hash = routing_key.split(".")
264-
processing_status = MessageProcessingStatus(status_str)
265-
status_code = _processing_status_to_http_status(processing_status)
266-
267-
status.message_status = processing_status.to_message_status()
268143

269-
return web.json_response(text=status.json(), status=status_code)
144+
status_code = broadcast_status_to_http_status(broadcast_status)
145+
return web.json_response(text=broadcast_status.json(), status=status_code)

0 commit comments

Comments
 (0)