Skip to content

Commit e0774cc

Browse files
authored
regular initializers for credentials (#1142)
* removes all dlt dependencies from logger * uses dataclass_transform to generate init methods for configspec, warnings on inconsistent settings * changes all configspecs to conform with new init methods, drops special init for credentials * fixes setting native value None
1 parent cf3ac9f commit e0774cc

File tree

97 files changed

+570
-764
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

97 files changed

+570
-764
lines changed

dlt/common/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1+
from dlt.common import logger
12
from dlt.common.arithmetics import Decimal
23
from dlt.common.wei import Wei
34
from dlt.common.pendulum import pendulum
45
from dlt.common.json import json
56
from dlt.common.runtime.signals import sleep
6-
from dlt.common.runtime import logger
77

88
__all__ = ["Decimal", "Wei", "pendulum", "json", "sleep", "logger"]

dlt/common/configuration/resolve.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def initialize_credentials(hint: Any, initial_value: Any) -> CredentialsConfigur
7676
first_credentials: CredentialsConfiguration = None
7777
for idx, spec in enumerate(specs_in_union):
7878
try:
79-
credentials = spec(initial_value)
79+
credentials = spec.from_init_value(initial_value)
8080
if credentials.is_resolved():
8181
return credentials
8282
# keep first credentials in the union to return in case all of the match but not resolve
@@ -88,7 +88,7 @@ def initialize_credentials(hint: Any, initial_value: Any) -> CredentialsConfigur
8888
return first_credentials
8989
else:
9090
assert issubclass(hint, CredentialsConfiguration)
91-
return hint(initial_value) # type: ignore
91+
return hint.from_init_value(initial_value) # type: ignore
9292

9393

9494
def inject_section(

dlt/common/configuration/specs/api_credentials.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66

77
@configspec
88
class OAuth2Credentials(CredentialsConfiguration):
9-
client_id: str
10-
client_secret: TSecretValue
11-
refresh_token: Optional[TSecretValue]
9+
client_id: str = None
10+
client_secret: TSecretValue = None
11+
refresh_token: Optional[TSecretValue] = None
1212
scopes: Optional[List[str]] = None
1313

1414
token: Optional[TSecretValue] = None

dlt/common/configuration/specs/aws_credentials.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,9 @@ def parse_native_representation(self, native_value: Any) -> None:
121121
self.__is_resolved__ = True
122122
except Exception:
123123
raise InvalidBoto3Session(self.__class__, native_value)
124+
125+
@classmethod
126+
def from_session(cls, botocore_session: Any) -> "AwsCredentials":
127+
self = cls()
128+
self.parse_native_representation(botocore_session)
129+
return self

dlt/common/configuration/specs/base_configuration.py

Lines changed: 52 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import inspect
33
import contextlib
44
import dataclasses
5+
import warnings
56

67
from collections.abc import Mapping as C_Mapping
78
from typing import (
@@ -19,7 +20,7 @@
1920
ClassVar,
2021
TypeVar,
2122
)
22-
from typing_extensions import get_args, get_origin
23+
from typing_extensions import get_args, get_origin, dataclass_transform
2324
from functools import wraps
2425

2526
if TYPE_CHECKING:
@@ -44,6 +45,7 @@
4445
_F_BaseConfiguration: Any = type(object)
4546
_F_ContainerInjectableContext: Any = type(object)
4647
_T = TypeVar("_T", bound="BaseConfiguration")
48+
_C = TypeVar("_C", bound="CredentialsConfiguration")
4749

4850

4951
def is_base_configuration_inner_hint(inner_hint: Type[Any]) -> bool:
@@ -106,18 +108,26 @@ def is_secret_hint(hint: Type[Any]) -> bool:
106108

107109

108110
@overload
109-
def configspec(cls: Type[TAnyClass]) -> Type[TAnyClass]: ...
111+
def configspec(cls: Type[TAnyClass], init: bool = True) -> Type[TAnyClass]: ...
110112

111113

112114
@overload
113-
def configspec(cls: None = ...) -> Callable[[Type[TAnyClass]], Type[TAnyClass]]: ...
115+
def configspec(
116+
cls: None = ..., init: bool = True
117+
) -> Callable[[Type[TAnyClass]], Type[TAnyClass]]: ...
114118

115119

120+
@dataclass_transform(eq_default=False, field_specifiers=(dataclasses.Field, dataclasses.field))
116121
def configspec(
117-
cls: Optional[Type[Any]] = None,
122+
cls: Optional[Type[Any]] = None, init: bool = True
118123
) -> Union[Type[TAnyClass], Callable[[Type[TAnyClass]], Type[TAnyClass]]]:
119124
"""Converts (via derivation) any decorated class to a Python dataclass that may be used as a spec to resolve configurations
120125
126+
__init__ method is synthesized by default. `init` flag is ignored if the decorated class implements custom __init__ as well as
127+
when any of base classes has no synthesized __init__
128+
129+
All fields must have default values. This decorator will add `None` default values that miss one.
130+
121131
In comparison the Python dataclass, a spec implements full dictionary interface for its attributes, allows instance creation from ie. strings
122132
or other types (parsing, deserialization) and control over configuration resolution process. See `BaseConfiguration` and CredentialsConfiguration` for
123133
more information.
@@ -142,6 +152,10 @@ def wrap(cls: Type[TAnyClass]) -> Type[TAnyClass]:
142152
# get all annotations without corresponding attributes and set them to None
143153
for ann in cls.__annotations__:
144154
if not hasattr(cls, ann) and not ann.startswith(("__", "_abc_")):
155+
warnings.warn(
156+
f"Missing default value for field {ann} on {cls.__name__}. None assumed. All"
157+
" fields in configspec must have default."
158+
)
145159
setattr(cls, ann, None)
146160
# get all attributes without corresponding annotations
147161
for att_name, att_value in list(cls.__dict__.items()):
@@ -177,17 +191,18 @@ def default_factory(att_value=att_value): # type: ignore[no-untyped-def]
177191

178192
# We don't want to overwrite user's __init__ method
179193
# Create dataclass init only when not defined in the class
180-
# (never put init on BaseConfiguration itself)
181-
try:
182-
is_base = cls is BaseConfiguration
183-
except NameError:
184-
is_base = True
185-
init = False
186-
base_params = getattr(cls, "__dataclass_params__", None)
187-
if not is_base and (base_params and base_params.init or cls.__init__ is object.__init__):
188-
init = True
194+
# NOTE: any class without synthesized __init__ breaks the creation chain
195+
has_default_init = super(cls, cls).__init__ == cls.__init__ # type: ignore[misc]
196+
base_params = getattr(cls, "__dataclass_params__", None) # cls.__init__ is object.__init__
197+
synth_init = init and ((not base_params or base_params.init) and has_default_init)
198+
if synth_init != init and has_default_init:
199+
warnings.warn(
200+
f"__init__ method will not be generated on {cls.__name__} because bas class didn't"
201+
" synthesize __init__. Please correct `init` flag in confispec decorator. You are"
202+
" probably receiving incorrect __init__ signature for type checking"
203+
)
189204
# do not generate repr as it may contain secret values
190-
return dataclasses.dataclass(cls, init=init, eq=False, repr=False) # type: ignore
205+
return dataclasses.dataclass(cls, init=synth_init, eq=False, repr=False) # type: ignore
191206

192207
# called with parenthesis
193208
if cls is None:
@@ -198,12 +213,14 @@ def default_factory(att_value=att_value): # type: ignore[no-untyped-def]
198213

199214
@configspec
200215
class BaseConfiguration(MutableMapping[str, Any]):
201-
__is_resolved__: bool = dataclasses.field(default=False, init=False, repr=False)
216+
__is_resolved__: bool = dataclasses.field(default=False, init=False, repr=False, compare=False)
202217
"""True when all config fields were resolved and have a specified value type"""
203-
__section__: str = dataclasses.field(default=None, init=False, repr=False)
204-
"""Obligatory section used by config providers when searching for keys, always present in the search path"""
205-
__exception__: Exception = dataclasses.field(default=None, init=False, repr=False)
218+
__exception__: Exception = dataclasses.field(
219+
default=None, init=False, repr=False, compare=False
220+
)
206221
"""Holds the exception that prevented the full resolution"""
222+
__section__: ClassVar[str] = None
223+
"""Obligatory section used by config providers when searching for keys, always present in the search path"""
207224
__config_gen_annotations__: ClassVar[List[str]] = []
208225
"""Additional annotations for config generator, currently holds a list of fields of interest that have defaults"""
209226
__dataclass_fields__: ClassVar[Dict[str, TDtcField]]
@@ -342,9 +359,10 @@ def call_method_in_mro(config, method_name: str) -> None:
342359
class CredentialsConfiguration(BaseConfiguration):
343360
"""Base class for all credentials. Credentials are configurations that may be stored only by providers supporting secrets."""
344361

345-
__section__: str = "credentials"
362+
__section__: ClassVar[str] = "credentials"
346363

347-
def __init__(self, init_value: Any = None) -> None:
364+
@classmethod
365+
def from_init_value(cls: Type[_C], init_value: Any = None) -> _C:
348366
"""Initializes credentials from `init_value`
349367
350368
Init value may be a native representation of the credentials or a dict. In case of native representation (for example a connection string or JSON with service account credentials)
@@ -353,14 +371,10 @@ def __init__(self, init_value: Any = None) -> None:
353371
354372
Credentials will be marked as resolved if all required fields are set.
355373
"""
356-
if init_value is None:
357-
return
358-
elif isinstance(init_value, C_Mapping):
359-
self.update(init_value)
360-
else:
361-
self.parse_native_representation(init_value)
362-
if not self.is_partial():
363-
self.resolve()
374+
# create an instance
375+
self = cls()
376+
self._apply_init_value(init_value)
377+
return self
364378

365379
def to_native_credentials(self) -> Any:
366380
"""Returns native credentials object.
@@ -369,6 +383,16 @@ def to_native_credentials(self) -> Any:
369383
"""
370384
return self.to_native_representation()
371385

386+
def _apply_init_value(self, init_value: Any = None) -> None:
387+
if isinstance(init_value, C_Mapping):
388+
self.update(init_value)
389+
elif init_value is not None:
390+
self.parse_native_representation(init_value)
391+
else:
392+
return
393+
if not self.is_partial():
394+
self.resolve()
395+
372396
def __str__(self) -> str:
373397
"""Get string representation of credentials to be displayed, with all secret parts removed"""
374398
return super().__str__()

dlt/common/configuration/specs/config_providers_context.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import contextlib
2+
import dataclasses
23
import io
34
from typing import ClassVar, List
45

@@ -28,7 +29,7 @@ class ConfigProvidersConfiguration(BaseConfiguration):
2829
only_toml_fragments: bool = True
2930

3031
# always look in providers
31-
__section__ = known_sections.PROVIDERS
32+
__section__: ClassVar[str] = known_sections.PROVIDERS
3233

3334

3435
@configspec
@@ -37,8 +38,12 @@ class ConfigProvidersContext(ContainerInjectableContext):
3738

3839
global_affinity: ClassVar[bool] = True
3940

40-
providers: List[ConfigProvider]
41-
context_provider: ConfigProvider
41+
providers: List[ConfigProvider] = dataclasses.field(
42+
default=None, init=False, repr=False, compare=False
43+
)
44+
context_provider: ConfigProvider = dataclasses.field(
45+
default=None, init=False, repr=False, compare=False
46+
)
4247

4348
def __init__(self) -> None:
4449
super().__init__()

dlt/common/configuration/specs/config_section_context.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
class ConfigSectionContext(ContainerInjectableContext):
99
TMergeFunc = Callable[["ConfigSectionContext", "ConfigSectionContext"], None]
1010

11-
pipeline_name: Optional[str]
11+
pipeline_name: Optional[str] = None
1212
sections: Tuple[str, ...] = ()
1313
merge_style: TMergeFunc = None
1414
source_state_key: str = None
@@ -70,13 +70,3 @@ def __str__(self) -> str:
7070
super().__str__()
7171
+ f": {self.pipeline_name} {self.sections}@{self.merge_style} state['{self.source_state_key}']"
7272
)
73-
74-
if TYPE_CHECKING:
75-
# provide __init__ signature when type checking
76-
def __init__(
77-
self,
78-
pipeline_name: str = None,
79-
sections: Tuple[str, ...] = (),
80-
merge_style: TMergeFunc = None,
81-
source_state_key: str = None,
82-
) -> None: ...

dlt/common/configuration/specs/connection_string_credentials.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
1-
from typing import Any, ClassVar, Dict, List, Optional
1+
import dataclasses
2+
from typing import Any, ClassVar, Dict, List, Optional, Union
3+
24
from dlt.common.libs.sql_alchemy import URL, make_url
35
from dlt.common.configuration.specs.exceptions import InvalidConnectionString
4-
56
from dlt.common.typing import TSecretValue
67
from dlt.common.configuration.specs.base_configuration import CredentialsConfiguration, configspec
78

89

910
@configspec
1011
class ConnectionStringCredentials(CredentialsConfiguration):
11-
drivername: str = None
12+
drivername: str = dataclasses.field(default=None, init=False, repr=False, compare=False)
1213
database: str = None
1314
password: Optional[TSecretValue] = None
1415
username: str = None
@@ -18,6 +19,11 @@ class ConnectionStringCredentials(CredentialsConfiguration):
1819

1920
__config_gen_annotations__: ClassVar[List[str]] = ["port", "password", "host"]
2021

22+
def __init__(self, connection_string: Union[str, Dict[str, Any]] = None) -> None:
23+
"""Initializes the credentials from SQLAlchemy like connection string or from dict holding connection string elements"""
24+
super().__init__()
25+
self._apply_init_value(connection_string)
26+
2127
def parse_native_representation(self, native_value: Any) -> None:
2228
if not isinstance(native_value, str):
2329
raise InvalidConnectionString(self.__class__, native_value, self.drivername)

dlt/common/configuration/specs/gcp_credentials.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
import dataclasses
12
import sys
2-
from typing import Any, Final, List, Tuple, Union, Dict
3+
from typing import Any, ClassVar, Final, List, Tuple, Union, Dict
34

45
from dlt.common import json, pendulum
56
from dlt.common.configuration.specs.api_credentials import OAuth2Credentials
@@ -22,8 +23,12 @@
2223

2324
@configspec
2425
class GcpCredentials(CredentialsConfiguration):
25-
token_uri: Final[str] = "https://oauth2.googleapis.com/token"
26-
auth_uri: Final[str] = "https://accounts.google.com/o/oauth2/auth"
26+
token_uri: Final[str] = dataclasses.field(
27+
default="https://oauth2.googleapis.com/token", init=False, repr=False, compare=False
28+
)
29+
auth_uri: Final[str] = dataclasses.field(
30+
default="https://accounts.google.com/o/oauth2/auth", init=False, repr=False, compare=False
31+
)
2732

2833
project_id: str = None
2934

@@ -69,7 +74,9 @@ def to_gcs_credentials(self) -> Dict[str, Any]:
6974
class GcpServiceAccountCredentialsWithoutDefaults(GcpCredentials):
7075
private_key: TSecretValue = None
7176
client_email: str = None
72-
type: Final[str] = "service_account" # noqa: A003
77+
type: Final[str] = dataclasses.field( # noqa: A003
78+
default="service_account", init=False, repr=False, compare=False
79+
)
7380

7481
def parse_native_representation(self, native_value: Any) -> None:
7582
"""Accepts ServiceAccountCredentials as native value. In other case reverts to serialized services.json"""
@@ -121,8 +128,10 @@ def __str__(self) -> str:
121128
@configspec
122129
class GcpOAuthCredentialsWithoutDefaults(GcpCredentials, OAuth2Credentials):
123130
# only desktop app supported
124-
refresh_token: TSecretValue
125-
client_type: Final[str] = "installed"
131+
refresh_token: TSecretValue = None
132+
client_type: Final[str] = dataclasses.field(
133+
default="installed", init=False, repr=False, compare=False
134+
)
126135

127136
def parse_native_representation(self, native_value: Any) -> None:
128137
"""Accepts Google OAuth2 credentials as native value. In other case reverts to serialized oauth client secret json"""
@@ -237,7 +246,7 @@ def __str__(self) -> str:
237246

238247
@configspec
239248
class GcpDefaultCredentials(CredentialsWithDefault, GcpCredentials):
240-
_LAST_FAILED_DEFAULT: float = 0.0
249+
_LAST_FAILED_DEFAULT: ClassVar[float] = 0.0
241250

242251
def parse_native_representation(self, native_value: Any) -> None:
243252
"""Accepts google credentials as native value"""

dlt/common/configuration/specs/known_sections.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
EXTRACT = "extract"
1414
"""extract stage of the pipeline"""
1515

16+
SCHEMA = "schema"
17+
"""schema configuration, ie. normalizers"""
18+
1619
PROVIDERS = "providers"
1720
"""secrets and config providers"""
1821

0 commit comments

Comments
 (0)