Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit e7762f5

Browse files
author
Sergey Vasilyev
committed
Annotate missing fields
1 parent 9c6f165 commit e7762f5

File tree

8 files changed

+47
-20
lines changed

8 files changed

+47
-20
lines changed

data_diff/databases/_connect.py

+2
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ def match_path(self, dsn):
9595
class Connect:
9696
"""Provides methods for connecting to a supported database using a URL or connection dict."""
9797

98+
database_by_scheme: Dict[str, Database]
99+
match_uri_path: Dict[str, MatchUriPath]
98100
conn_cache: MutableMapping[Hashable, Database]
99101

100102
def __init__(self, database_by_scheme: Dict[str, Database] = DATABASE_BY_SCHEME):

data_diff/databases/base.py

+17-11
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import math
66
import sys
77
import logging
8-
from typing import Any, Callable, Dict, Generator, Tuple, Optional, Sequence, Type, List, Union, TypeVar
8+
from typing import Any, Callable, ClassVar, Dict, Generator, Tuple, Optional, Sequence, Type, List, Union, TypeVar
99
from functools import partial, wraps
1010
from concurrent.futures import ThreadPoolExecutor
1111
import threading
@@ -179,6 +179,9 @@ class ThreadLocalInterpreter:
179179
Useful for cursor-sensitive operations, such as creating a temporary table.
180180
"""
181181

182+
compiler: Compiler
183+
gen: Generator
184+
182185
def __init__(self, compiler: Compiler, gen: Generator):
183186
super().__init__()
184187
self.gen = gen
@@ -238,9 +241,9 @@ def optimizer_hints(self, hints: str) -> str:
238241

239242

240243
class BaseDialect(abc.ABC):
241-
SUPPORTS_PRIMARY_KEY = False
242-
SUPPORTS_INDEXES = False
243-
TYPE_CLASSES: Dict[str, type] = {}
244+
SUPPORTS_PRIMARY_KEY: ClassVar[bool] = False
245+
SUPPORTS_INDEXES: ClassVar[bool] = False
246+
TYPE_CLASSES: ClassVar[Dict[str, type]] = {}
244247
MIXINS = frozenset()
245248

246249
PLACEHOLDER_TABLE = None # Used for Oracle
@@ -847,14 +850,13 @@ class Database(abc.ABC, _RuntypeHackToFixCicularRefrencedDatabase):
847850
Instanciated using :meth:`~data_diff.connect`
848851
"""
849852

850-
default_schema: str = None
851-
SUPPORTS_ALPHANUMS = True
852-
SUPPORTS_UNIQUE_CONSTAINT = False
853-
854-
CONNECT_URI_KWPARAMS = []
853+
default_schema: ClassVar[str] = None
854+
SUPPORTS_ALPHANUMS: ClassVar[bool] = True
855+
SUPPORTS_UNIQUE_CONSTAINT: ClassVar[bool] = False
856+
CONNECT_URI_KWPARAMS: ClassVar[List[str]] = []
855857

856-
_interactive = False
857-
is_closed = False
858+
_interactive: bool = False
859+
is_closed: bool = False
858860

859861
@property
860862
def name(self):
@@ -1121,6 +1123,10 @@ class ThreadedDatabase(Database):
11211123
Used for database connectors that do not support sharing their connection between different threads.
11221124
"""
11231125

1126+
_init_error: Optional[Exception]
1127+
_queue: ThreadPoolExecutor
1128+
thread_local: threading.local
1129+
11241130
def __init__(self, thread_count=1):
11251131
super().__init__()
11261132
self._init_error = None

data_diff/databases/databricks.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import math
2-
from typing import Dict, Sequence
2+
from typing import Any, Dict, Sequence
33
import logging
44

55
from data_diff.abcs.database_types import (
@@ -104,13 +104,17 @@ class Databricks(ThreadedDatabase):
104104
CONNECT_URI_HELP = "databricks://:<access_token>@<server_hostname>/<http_path>"
105105
CONNECT_URI_PARAMS = ["catalog", "schema"]
106106

107+
default_schema: str
108+
catalog: str
109+
_args: Dict[str, Any]
110+
107111
def __init__(self, *, thread_count, **kw):
108112
super().__init__(thread_count=thread_count)
109113
logging.getLogger("databricks.sql").setLevel(logging.WARNING)
110114

111115
self._args = kw
112116
self.default_schema = kw.get("schema", "default")
113-
self.catalog = self._args.get("catalog", "hive_metastore")
117+
self.catalog = kw.get("catalog", "hive_metastore")
114118

115119
def create_connection(self):
116120
databricks = import_databricks()

data_diff/databases/duckdb.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Union
1+
from typing import Any, Dict, Union
22

33
from data_diff.utils import match_regexps
44
from data_diff.abcs.database_types import (
@@ -139,6 +139,9 @@ class DuckDB(Database):
139139
CONNECT_URI_HELP = "duckdb://<dbname>@<filepath>"
140140
CONNECT_URI_PARAMS = ["database", "dbpath"]
141141

142+
_args: Dict[str, Any]
143+
_conn: Any
144+
142145
def __init__(self, **kw):
143146
super().__init__()
144147
self._args = kw

data_diff/databases/postgresql.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
from typing import List
1+
from typing import Any, ClassVar, Dict, List, Type
22
from data_diff.abcs.database_types import (
3+
ColType,
34
DbPath,
45
JSON,
56
Timestamp,
@@ -69,7 +70,7 @@ class PostgresqlDialect(
6970
SUPPORTS_INDEXES = True
7071
MIXINS = {Mixin_Schema, Mixin_MD5, Mixin_NormalizeValue, Mixin_RandomSample}
7172

72-
TYPE_CLASSES = {
73+
TYPE_CLASSES: ClassVar[Dict[str, Type[ColType]]] = {
7374
# Timestamps
7475
"timestamp with time zone": TimestampTZ,
7576
"timestamp without time zone": Timestamp,
@@ -125,9 +126,11 @@ class PostgreSQL(ThreadedDatabase):
125126
SUPPORTS_UNIQUE_CONSTAINT = True
126127
CONNECT_URI_HELP = "postgresql://<user>:<password>@<host>/<database>"
127128
CONNECT_URI_PARAMS = ["database?"]
128-
129129
default_schema = "public"
130130

131+
_args: Dict[str, Any]
132+
_conn: Any
133+
131134
def __init__(self, *, thread_count, **kw):
132135
super().__init__(thread_count=thread_count)
133136
self._args = kw

data_diff/databases/redshift.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
from typing import List, Dict
1+
from typing import ClassVar, List, Dict, Type
22
from data_diff.abcs.database_types import (
3+
ColType,
34
Float,
45
JSON,
56
TemporalType,
@@ -53,7 +54,7 @@ def normalize_json(self, value: str, _coltype: JSON) -> str:
5354

5455
class Dialect(PostgresqlDialect, Mixin_MD5, Mixin_NormalizeValue, AbstractMixin_MD5, AbstractMixin_NormalizeValue):
5556
name = "Redshift"
56-
TYPE_CLASSES = {
57+
TYPE_CLASSES: ClassVar[Dict[str, Type[ColType]]] = {
5758
**PostgresqlDialect.TYPE_CLASSES,
5859
"double": Float,
5960
"real": Float,

data_diff/thread_utils.py

+5
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ class ThreadedYielder(Iterable):
4545
Priority for the iterator can be provided via the keyword argument 'priority'. (higher runs first)
4646
"""
4747

48+
_pool: ThreadPoolExecutor
49+
_futures: deque
50+
_yield: deque
51+
_exception: Optional[None]
52+
4853
def __init__(self, max_workers: Optional[int] = None):
4954
super().__init__()
5055
self._pool = PriorityThreadPoolExecutor(max_workers)

data_diff/utils.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import re
55
import string
66
from abc import abstractmethod
7-
from typing import Any, Dict, Iterable, Iterator, List, MutableMapping, Sequence, TypeVar, Union
7+
from typing import Any, Dict, Iterable, Iterator, List, MutableMapping, Optional, Sequence, TypeVar, Union
88
from urllib.parse import urlparse
99
import operator
1010
import threading
@@ -175,6 +175,9 @@ def alphanums_to_numbers(s1: str, s2: str):
175175

176176

177177
class ArithAlphanumeric(ArithString):
178+
_str: str
179+
_max_len: Optional[int]
180+
178181
def __init__(self, s: str, max_len=None):
179182
super().__init__()
180183

0 commit comments

Comments
 (0)