|
1 | | -import os |
2 | 1 | import asyncio |
3 | 2 | import inspect |
| 3 | +import os |
4 | 4 | import random |
5 | | -from time import sleep |
6 | | -from typing import Optional, Any |
7 | | -from unittest import mock |
8 | 5 | from datetime import datetime # noqa: I251 |
9 | 6 | from itertools import chain, count |
| 7 | +from time import sleep |
| 8 | +from typing import Any, Optional |
| 9 | +from unittest import mock |
10 | 10 |
|
11 | 11 | import duckdb |
| 12 | +import pyarrow as pa |
12 | 13 | import pytest |
13 | 14 |
|
14 | 15 | import dlt |
| 16 | +from dlt.common import Decimal |
| 17 | +from dlt.common.configuration import ConfigurationValueError |
15 | 18 | from dlt.common.configuration.container import Container |
16 | 19 | from dlt.common.configuration.exceptions import InvalidNativeValue |
17 | | -from dlt.common.configuration.specs.base_configuration import configspec, BaseConfiguration |
18 | | -from dlt.common.configuration import ConfigurationValueError |
| 20 | +from dlt.common.configuration.specs.base_configuration import ( |
| 21 | + BaseConfiguration, |
| 22 | + configspec, |
| 23 | +) |
| 24 | +from dlt.common.json import json |
19 | 25 | from dlt.common.pendulum import pendulum, timedelta |
20 | | -from dlt.common import Decimal |
21 | 26 | from dlt.common.pipeline import NormalizeInfo, StateInjectableContext, resource_state |
22 | 27 | from dlt.common.schema.schema import Schema |
23 | | -from dlt.common.utils import uniq_id, digest128, chunks |
24 | | -from dlt.common.json import json |
25 | | - |
| 28 | +from dlt.common.utils import chunks, digest128, uniq_id |
26 | 29 | from dlt.extract import DltSource |
27 | | -from dlt.extract.items import ValidateItem |
28 | | -from dlt.extract.resource import DltResource |
29 | | -from dlt.sources.helpers.transform import take_first |
30 | | -from dlt.extract.incremental import IncrementalResourceWrapper, Incremental |
| 30 | +from dlt.extract.incremental import Incremental, IncrementalResourceWrapper |
31 | 31 | from dlt.extract.incremental.exceptions import ( |
32 | 32 | IncrementalCursorInvalidCoercion, |
| 33 | + IncrementalCursorPathHasValueNone, |
33 | 34 | IncrementalCursorPathMissing, |
34 | 35 | IncrementalPrimaryKeyMissing, |
35 | | - IncrementalCursorPathHasValueNone, |
36 | 36 | ) |
| 37 | +from dlt.extract.items import ValidateItem |
| 38 | +from dlt.extract.resource import DltResource |
37 | 39 | from dlt.pipeline.exceptions import PipelineStepFailed |
38 | | - |
| 40 | +from dlt.sources.helpers.transform import take_first |
39 | 41 | from tests.extract.utils import AssertItems, data_item_to_list |
| 42 | +from tests.pipeline.utils import assert_query_data |
40 | 43 | from tests.utils import ( |
| 44 | + ALL_TEST_DATA_ITEM_FORMATS, |
| 45 | + TestDataItemFormat, |
41 | 46 | data_item_length, |
42 | 47 | data_to_item_format, |
43 | | - TestDataItemFormat, |
44 | | - ALL_TEST_DATA_ITEM_FORMATS, |
45 | 48 | ) |
46 | 49 |
|
47 | | -from tests.pipeline.utils import assert_query_data |
48 | | - |
49 | | -import pyarrow as pa |
50 | | - |
51 | 50 |
|
52 | 51 | @pytest.fixture(autouse=True) |
53 | 52 | def switch_to_fifo(): |
@@ -919,39 +918,39 @@ def some_data( |
919 | 918 | def test_cursor_path_not_nullable_arrow( |
920 | 919 | item_type: TestDataItemFormat, |
921 | 920 | ) -> None: |
922 | | - |
923 | 921 | @dlt.resource |
924 | 922 | def some_data( |
925 | 923 | invocation: int, |
926 | 924 | created_at=dlt.sources.incremental("created_at", on_cursor_value_missing="include"), |
927 | 925 | ): |
928 | 926 | if invocation == 1: |
929 | 927 | data = [ |
930 | | - {"id": 1, "created_at": 1}, |
931 | | - {"id": 2, "created_at": 1}, |
932 | | - {"id": 3, "created_at": 2}, |
933 | | - ] |
| 928 | + {"id": 1, "created_at": 1}, |
| 929 | + {"id": 2, "created_at": 1}, |
| 930 | + {"id": 3, "created_at": 2}, |
| 931 | + ] |
934 | 932 | elif invocation == 2: |
935 | 933 | data = [ |
936 | | - {"id": 4, "created_at": 1}, |
937 | | - {"id": 5, "created_at": 2}, |
938 | | - {"id": 6, "created_at": 3}, |
939 | | - ] |
| 934 | + {"id": 4, "created_at": 1}, |
| 935 | + {"id": 5, "created_at": 2}, |
| 936 | + {"id": 6, "created_at": 3}, |
| 937 | + ] |
940 | 938 |
|
941 | | - schema = pa.schema([ |
942 | | - pa.field('id', pa.int32(), nullable=False), |
943 | | - pa.field('created_at', pa.int32(), nullable=False) |
944 | | - ]) |
945 | | - id_array = pa.array([item['id'] for item in data], type=pa.int32()) |
946 | | - created_at_array = pa.array([item['created_at'] for item in data], type=pa.int32()) |
| 939 | + schema = pa.schema( |
| 940 | + [ |
| 941 | + pa.field("id", pa.int32(), nullable=False), |
| 942 | + pa.field("created_at", pa.int32(), nullable=False), |
| 943 | + ] |
| 944 | + ) |
| 945 | + id_array = pa.array([item["id"] for item in data], type=pa.int32()) |
| 946 | + created_at_array = pa.array([item["created_at"] for item in data], type=pa.int32()) |
947 | 947 | if item_type == "arrow-table": |
948 | 948 | source_items = [pa.Table.from_arrays([id_array, created_at_array], schema=schema)] |
949 | 949 | elif item_type == "arrow-batch": |
950 | 950 | source_items = [pa.RecordBatch.from_arrays([id_array, created_at_array], schema=schema)] |
951 | 951 |
|
952 | 952 | yield source_items |
953 | 953 |
|
954 | | - |
955 | 954 | p = dlt.pipeline(pipeline_name=uniq_id()) |
956 | 955 | p.run(some_data(1), destination="duckdb") |
957 | 956 | p.run(some_data(2), destination="duckdb") |
@@ -2523,7 +2522,7 @@ def test_type_3(): |
2523 | 2522 |
|
2524 | 2523 | @pytest.mark.parametrize("yield_pydantic", (True, False)) |
2525 | 2524 | def test_pydantic_columns_validator(yield_pydantic: bool) -> None: |
2526 | | - from pydantic import BaseModel, Field, ConfigDict |
| 2525 | + from pydantic import BaseModel, ConfigDict, Field |
2527 | 2526 |
|
2528 | 2527 | # forbid extra fields so "id" in json is not a valid field BUT |
2529 | 2528 | # add alias for id_ that will serde "id" correctly |
|
0 commit comments