Skip to content

Commit ba86b2a

Browse files
authored
feat: add support for table snapshots (#740)
* feat: add support for table snapshots * Add system test for table snapshots * Make test taxonomy resource name unique * Store timezone aware snapshot time on snapshots * Make copy config tests more detailed * Use unique resource ID differently for display name * Add new classes to docs
1 parent 7d2d3e9 commit ba86b2a

File tree

8 files changed

+260
-2
lines changed

8 files changed

+260
-2
lines changed

docs/reference.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ Job-Related Types
5959
job.CreateDisposition
6060
job.DestinationFormat
6161
job.Encoding
62+
job.OperationType
6263
job.QueryPlanEntry
6364
job.QueryPlanEntryStep
6465
job.QueryPriority
@@ -90,6 +91,7 @@ Table
9091
table.RangePartitioning
9192
table.Row
9293
table.RowIterator
94+
table.SnapshotDefinition
9395
table.Table
9496
table.TableListItem
9597
table.TableReference

google/cloud/bigquery/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
from google.cloud.bigquery.job import ExtractJobConfig
6262
from google.cloud.bigquery.job import LoadJob
6363
from google.cloud.bigquery.job import LoadJobConfig
64+
from google.cloud.bigquery.job import OperationType
6465
from google.cloud.bigquery.job import QueryJob
6566
from google.cloud.bigquery.job import QueryJobConfig
6667
from google.cloud.bigquery.job import QueryPriority
@@ -87,6 +88,7 @@
8788
from google.cloud.bigquery.table import PartitionRange
8889
from google.cloud.bigquery.table import RangePartitioning
8990
from google.cloud.bigquery.table import Row
91+
from google.cloud.bigquery.table import SnapshotDefinition
9092
from google.cloud.bigquery.table import Table
9193
from google.cloud.bigquery.table import TableReference
9294
from google.cloud.bigquery.table import TimePartitioningType
@@ -115,6 +117,7 @@
115117
"PartitionRange",
116118
"RangePartitioning",
117119
"Row",
120+
"SnapshotDefinition",
118121
"TimePartitioning",
119122
"TimePartitioningType",
120123
# Jobs
@@ -155,6 +158,7 @@
155158
"ExternalSourceFormat",
156159
"Encoding",
157160
"KeyResultStatementKind",
161+
"OperationType",
158162
"QueryPriority",
159163
"SchemaUpdateOption",
160164
"SourceFormat",

google/cloud/bigquery/job/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from google.cloud.bigquery.job.base import UnknownJob
2626
from google.cloud.bigquery.job.copy_ import CopyJob
2727
from google.cloud.bigquery.job.copy_ import CopyJobConfig
28+
from google.cloud.bigquery.job.copy_ import OperationType
2829
from google.cloud.bigquery.job.extract import ExtractJob
2930
from google.cloud.bigquery.job.extract import ExtractJobConfig
3031
from google.cloud.bigquery.job.load import LoadJob
@@ -59,6 +60,7 @@
5960
"UnknownJob",
6061
"CopyJob",
6162
"CopyJobConfig",
63+
"OperationType",
6264
"ExtractJob",
6365
"ExtractJobConfig",
6466
"LoadJob",

google/cloud/bigquery/job/copy_.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
"""Classes for copy jobs."""
1616

17+
from typing import Optional
18+
1719
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
1820
from google.cloud.bigquery import _helpers
1921
from google.cloud.bigquery.table import TableReference
@@ -23,6 +25,25 @@
2325
from google.cloud.bigquery.job.base import _JobReference
2426

2527

28+
class OperationType:
29+
"""Different operation types supported in table copy job.
30+
31+
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#operationtype
32+
"""
33+
34+
OPERATION_TYPE_UNSPECIFIED = "OPERATION_TYPE_UNSPECIFIED"
35+
"""Unspecified operation type."""
36+
37+
COPY = "COPY"
38+
"""The source and destination table have the same table type."""
39+
40+
SNAPSHOT = "SNAPSHOT"
41+
"""The source table type is TABLE and the destination table type is SNAPSHOT."""
42+
43+
RESTORE = "RESTORE"
44+
"""The source table type is SNAPSHOT and the destination table type is TABLE."""
45+
46+
2647
class CopyJobConfig(_JobConfig):
2748
"""Configuration options for copy jobs.
2849
@@ -85,6 +106,23 @@ def destination_encryption_configuration(self, value):
85106
api_repr = value.to_api_repr()
86107
self._set_sub_prop("destinationEncryptionConfiguration", api_repr)
87108

109+
@property
110+
def operation_type(self) -> str:
111+
"""The operation to perform with this copy job.
112+
113+
See
114+
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.operation_type
115+
"""
116+
return self._get_sub_prop(
117+
"operationType", OperationType.OPERATION_TYPE_UNSPECIFIED
118+
)
119+
120+
@operation_type.setter
121+
def operation_type(self, value: Optional[str]):
122+
if value is None:
123+
value = OperationType.OPERATION_TYPE_UNSPECIFIED
124+
self._set_sub_prop("operationType", value)
125+
88126

89127
class CopyJob(_AsyncJob):
90128
"""Asynchronous job: copy data into a table from other tables.

google/cloud/bigquery/table.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,7 @@ class Table(object):
321321
"range_partitioning": "rangePartitioning",
322322
"time_partitioning": "timePartitioning",
323323
"schema": "schema",
324+
"snapshot_definition": "snapshotDefinition",
324325
"streaming_buffer": "streamingBuffer",
325326
"self_link": "selfLink",
326327
"table_id": ["tableReference", "tableId"],
@@ -910,6 +911,19 @@ def external_data_configuration(self, value):
910911
self._PROPERTY_TO_API_FIELD["external_data_configuration"]
911912
] = api_repr
912913

914+
@property
915+
def snapshot_definition(self) -> Optional["SnapshotDefinition"]:
916+
"""Information about the snapshot. This value is set via snapshot creation.
917+
918+
See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.snapshot_definition
919+
"""
920+
snapshot_info = self._properties.get(
921+
self._PROPERTY_TO_API_FIELD["snapshot_definition"]
922+
)
923+
if snapshot_info is not None:
924+
snapshot_info = SnapshotDefinition(snapshot_info)
925+
return snapshot_info
926+
913927
@classmethod
914928
def from_string(cls, full_table_id: str) -> "Table":
915929
"""Construct a table from fully-qualified table ID.
@@ -1274,6 +1288,29 @@ def __init__(self, resource):
12741288
)
12751289

12761290

1291+
class SnapshotDefinition:
1292+
"""Information about base table and snapshot time of the snapshot.
1293+
1294+
See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#snapshotdefinition
1295+
1296+
Args:
1297+
resource: Snapshot definition representation returned from the API.
1298+
"""
1299+
1300+
def __init__(self, resource: Dict[str, Any]):
1301+
self.base_table_reference = None
1302+
if "baseTableReference" in resource:
1303+
self.base_table_reference = TableReference.from_api_repr(
1304+
resource["baseTableReference"]
1305+
)
1306+
1307+
self.snapshot_time = None
1308+
if "snapshotTime" in resource:
1309+
self.snapshot_time = google.cloud._helpers._rfc3339_to_datetime(
1310+
resource["snapshotTime"]
1311+
)
1312+
1313+
12771314
class Row(object):
12781315
"""A BigQuery row.
12791316

tests/system/test_client.py

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ def test_create_table_with_real_custom_policy(self):
394394
taxonomy_parent = f"projects/{Config.CLIENT.project}/locations/us"
395395

396396
new_taxonomy = datacatalog_types.Taxonomy(
397-
display_name="Custom test taxonomy",
397+
display_name="Custom test taxonomy" + unique_resource_id(),
398398
description="This taxonomy is ony used for a test.",
399399
activated_policy_types=[
400400
datacatalog_types.Taxonomy.PolicyType.FINE_GRAINED_ACCESS_CONTROL
@@ -2370,6 +2370,75 @@ def test_parameterized_types_round_trip(self):
23702370

23712371
self.assertEqual(tuple(s._key()[:2] for s in table2.schema), fields)
23722372

2373+
def test_table_snapshots(self):
2374+
from google.cloud.bigquery import CopyJobConfig
2375+
from google.cloud.bigquery import OperationType
2376+
2377+
client = Config.CLIENT
2378+
2379+
source_table_path = f"{client.project}.{Config.DATASET}.test_table"
2380+
snapshot_table_path = f"{source_table_path}_snapshot"
2381+
2382+
# Create the table before loading so that the column order is predictable.
2383+
schema = [
2384+
bigquery.SchemaField("foo", "INTEGER"),
2385+
bigquery.SchemaField("bar", "STRING"),
2386+
]
2387+
source_table = helpers.retry_403(Config.CLIENT.create_table)(
2388+
Table(source_table_path, schema=schema)
2389+
)
2390+
self.to_delete.insert(0, source_table)
2391+
2392+
# Populate the table with initial data.
2393+
rows = [{"foo": 1, "bar": "one"}, {"foo": 2, "bar": "two"}]
2394+
load_job = Config.CLIENT.load_table_from_json(rows, source_table)
2395+
load_job.result()
2396+
2397+
# Now create a snapshot before modifying the original table data.
2398+
copy_config = CopyJobConfig()
2399+
copy_config.operation_type = OperationType.SNAPSHOT
2400+
2401+
copy_job = client.copy_table(
2402+
sources=source_table_path,
2403+
destination=snapshot_table_path,
2404+
job_config=copy_config,
2405+
)
2406+
copy_job.result()
2407+
2408+
snapshot_table = client.get_table(snapshot_table_path)
2409+
self.to_delete.insert(0, snapshot_table)
2410+
2411+
# Modify data in original table.
2412+
sql = f'INSERT INTO `{source_table_path}`(foo, bar) VALUES (3, "three")'
2413+
query_job = client.query(sql)
2414+
query_job.result()
2415+
2416+
# List rows from the source table and compare them to rows from the snapshot.
2417+
rows_iter = client.list_rows(source_table_path)
2418+
rows = sorted(row.values() for row in rows_iter)
2419+
assert rows == [(1, "one"), (2, "two"), (3, "three")]
2420+
2421+
rows_iter = client.list_rows(snapshot_table_path)
2422+
rows = sorted(row.values() for row in rows_iter)
2423+
assert rows == [(1, "one"), (2, "two")]
2424+
2425+
# Now restore the table from the snapshot and it should again contain the old
2426+
# set of rows.
2427+
copy_config = CopyJobConfig()
2428+
copy_config.operation_type = OperationType.RESTORE
2429+
copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
2430+
2431+
copy_job = client.copy_table(
2432+
sources=snapshot_table_path,
2433+
destination=source_table_path,
2434+
job_config=copy_config,
2435+
)
2436+
copy_job.result()
2437+
2438+
rows_iter = client.list_rows(source_table_path)
2439+
rows = sorted(row.values() for row in rows_iter)
2440+
assert rows == [(1, "one"), (2, "two")]
2441+
23732442
def temp_dataset(self, dataset_id, location=None):
23742443
project = Config.CLIENT.project
23752444
dataset_ref = bigquery.DatasetReference(project, dataset_id)

tests/unit/job/test_copy.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,18 +28,34 @@ def _get_target_class():
2828

2929
return CopyJobConfig
3030

31+
def test_ctor_defaults(self):
32+
from google.cloud.bigquery.job import OperationType
33+
34+
config = self._make_one()
35+
36+
assert config.create_disposition is None
37+
assert config.write_disposition is None
38+
assert config.destination_encryption_configuration is None
39+
assert config.operation_type == OperationType.OPERATION_TYPE_UNSPECIFIED
40+
3141
def test_ctor_w_properties(self):
3242
from google.cloud.bigquery.job import CreateDisposition
43+
from google.cloud.bigquery.job import OperationType
3344
from google.cloud.bigquery.job import WriteDisposition
3445

3546
create_disposition = CreateDisposition.CREATE_NEVER
3647
write_disposition = WriteDisposition.WRITE_TRUNCATE
48+
snapshot_operation = OperationType.SNAPSHOT
49+
3750
config = self._get_target_class()(
38-
create_disposition=create_disposition, write_disposition=write_disposition
51+
create_disposition=create_disposition,
52+
write_disposition=write_disposition,
53+
operation_type=snapshot_operation,
3954
)
4055

4156
self.assertEqual(config.create_disposition, create_disposition)
4257
self.assertEqual(config.write_disposition, write_disposition)
58+
self.assertEqual(config.operation_type, snapshot_operation)
4359

4460
def test_to_api_repr_with_encryption(self):
4561
from google.cloud.bigquery.encryption_configuration import (
@@ -70,6 +86,22 @@ def test_to_api_repr_with_encryption_none(self):
7086
resource, {"copy": {"destinationEncryptionConfiguration": None}}
7187
)
7288

89+
def test_operation_type_setting_none(self):
90+
from google.cloud.bigquery.job import OperationType
91+
92+
config = self._make_one(operation_type=OperationType.SNAPSHOT)
93+
94+
# Setting it to None is the same as setting it to OPERATION_TYPE_UNSPECIFIED.
95+
config.operation_type = None
96+
assert config.operation_type == OperationType.OPERATION_TYPE_UNSPECIFIED
97+
98+
def test_operation_type_setting_non_none(self):
99+
from google.cloud.bigquery.job import OperationType
100+
101+
config = self._make_one(operation_type=None)
102+
config.operation_type = OperationType.RESTORE
103+
assert config.operation_type == OperationType.RESTORE
104+
73105

74106
class TestCopyJob(_Base):
75107
JOB_TYPE = "copy"

0 commit comments

Comments
 (0)