13
13
from apify_client import ApifyClientAsync
14
14
from apify_shared .consts import ActorEnvVars , ActorExitCodes , ApifyEnvVars
15
15
from apify_shared .utils import ignore_docs , maybe_extract_enum_member_value
16
- from crawlee import service_container
16
+ from crawlee import service_locator
17
17
from crawlee .events ._types import Event , EventMigratingData , EventPersistStateData
18
18
19
19
from apify ._configuration import Configuration
34
34
from typing_extensions import Self
35
35
36
36
from crawlee .proxy_configuration import _NewUrlFunction
37
+ from crawlee .storage_clients import BaseStorageClient
37
38
38
39
from apify ._models import Webhook
39
40
@@ -71,17 +72,22 @@ def __init__(
71
72
self ._configure_logging = configure_logging
72
73
self ._apify_client = self .new_client ()
73
74
74
- self ._event_manager : EventManager
75
- if self ._configuration .is_at_home :
76
- self ._event_manager = PlatformEventManager (
75
+ # Create an instance of the cloud storage client, the local storage client is obtained
76
+ # from the service locator.
77
+ self ._cloud_storage_client = ApifyStorageClient .from_config (config = self ._configuration )
78
+
79
+ # Set the event manager based on whether the Actor is running on the platform or locally.
80
+ self ._event_manager = (
81
+ PlatformEventManager (
77
82
config = self ._configuration ,
78
83
persist_state_interval = self ._configuration .persist_state_interval ,
79
84
)
80
- else :
81
- self . _event_manager = LocalEventManager (
85
+ if self . is_at_home ()
86
+ else LocalEventManager (
82
87
system_info_interval = self ._configuration .system_info_interval ,
83
88
persist_state_interval = self ._configuration .persist_state_interval ,
84
89
)
90
+ )
85
91
86
92
self ._is_initialized = False
87
93
@@ -94,9 +100,6 @@ async def __aenter__(self) -> Self:
94
100
When you exit the `async with` block, the `Actor.exit()` method is called, and if any exception happens while
95
101
executing the block code, the `Actor.fail` method is called.
96
102
"""
97
- if self ._configure_logging :
98
- _configure_logging (self ._configuration )
99
-
100
103
await self .init ()
101
104
return self
102
105
@@ -156,6 +159,11 @@ def log(self) -> logging.Logger:
156
159
"""The logging.Logger instance the Actor uses."""
157
160
return logger
158
161
162
+ @property
163
+ def _local_storage_client (self ) -> BaseStorageClient :
164
+ """The local storage client the Actor instance uses."""
165
+ return service_locator .get_storage_client ()
166
+
159
167
def _raise_if_not_initialized (self ) -> None :
160
168
if not self ._is_initialized :
161
169
raise RuntimeError ('The Actor was not initialized!' )
@@ -184,18 +192,19 @@ async def init(self) -> None:
184
192
if self ._is_initialized :
185
193
raise RuntimeError ('The Actor was already initialized!' )
186
194
187
- if self ._configuration . token :
188
- service_container . set_cloud_storage_client ( ApifyStorageClient ( configuration = self ._configuration ))
195
+ self ._is_exiting = False
196
+ self ._was_final_persist_state_emitted = False
189
197
190
- if self ._configuration .is_at_home :
191
- service_container .set_default_storage_client_type ('cloud' )
192
- else :
193
- service_container .set_default_storage_client_type ('local' )
198
+ # If the Actor is running on the Apify platform, we set the cloud storage client.
199
+ if self .is_at_home ():
200
+ service_locator .set_storage_client (self ._cloud_storage_client )
194
201
195
- service_container .set_event_manager (self ._event_manager )
202
+ service_locator .set_event_manager (self .event_manager )
203
+ service_locator .set_configuration (self .configuration )
196
204
197
- self ._is_exiting = False
198
- self ._was_final_persist_state_emitted = False
205
+ # The logging configuration has to be called after all service_locator set methods.
206
+ if self ._configure_logging :
207
+ _configure_logging ()
199
208
200
209
self .log .info ('Initializing Actor...' )
201
210
self .log .info ('System info' , extra = get_system_info ())
@@ -245,7 +254,6 @@ async def finalize() -> None:
245
254
await self ._event_manager .wait_for_all_listeners_to_complete (timeout = event_listeners_timeout )
246
255
247
256
await self ._event_manager .__aexit__ (None , None , None )
248
- cast (dict , service_container ._services ).clear () # noqa: SLF001
249
257
250
258
await asyncio .wait_for (finalize (), cleanup_timeout .total_seconds ())
251
259
self ._is_initialized = False
@@ -349,11 +357,13 @@ async def open_dataset(
349
357
self ._raise_if_not_initialized ()
350
358
self ._raise_if_cloud_requested_but_not_configured (force_cloud = force_cloud )
351
359
360
+ storage_client = self ._cloud_storage_client if force_cloud else self ._local_storage_client
361
+
352
362
return await Dataset .open (
353
363
id = id ,
354
364
name = name ,
355
365
configuration = self ._configuration ,
356
- storage_client = service_container . get_storage_client ( client_type = 'cloud' if force_cloud else None ) ,
366
+ storage_client = storage_client ,
357
367
)
358
368
359
369
async def open_key_value_store (
@@ -381,12 +391,13 @@ async def open_key_value_store(
381
391
"""
382
392
self ._raise_if_not_initialized ()
383
393
self ._raise_if_cloud_requested_but_not_configured (force_cloud = force_cloud )
394
+ storage_client = self ._cloud_storage_client if force_cloud else self ._local_storage_client
384
395
385
396
return await KeyValueStore .open (
386
397
id = id ,
387
398
name = name ,
388
399
configuration = self ._configuration ,
389
- storage_client = service_container . get_storage_client ( client_type = 'cloud' if force_cloud else None ) ,
400
+ storage_client = storage_client ,
390
401
)
391
402
392
403
async def open_request_queue (
@@ -417,11 +428,13 @@ async def open_request_queue(
417
428
self ._raise_if_not_initialized ()
418
429
self ._raise_if_cloud_requested_but_not_configured (force_cloud = force_cloud )
419
430
431
+ storage_client = self ._cloud_storage_client if force_cloud else self ._local_storage_client
432
+
420
433
return await RequestQueue .open (
421
434
id = id ,
422
435
name = name ,
423
436
configuration = self ._configuration ,
424
- storage_client = service_container . get_storage_client ( client_type = 'cloud' if force_cloud else None ) ,
437
+ storage_client = storage_client ,
425
438
)
426
439
427
440
async def push_data (self , data : dict | list [dict ]) -> None :
@@ -963,7 +976,7 @@ async def create_proxy_configuration(
963
976
password : str | None = None ,
964
977
groups : list [str ] | None = None ,
965
978
country_code : str | None = None ,
966
- proxy_urls : list [str ] | None = None ,
979
+ proxy_urls : list [str | None ] | None = None ,
967
980
new_url_function : _NewUrlFunction | None = None ,
968
981
) -> ProxyConfiguration | None :
969
982
"""Create a ProxyConfiguration object with the passed proxy configuration.
0 commit comments