diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml
index 2567653c0..108063d4d 100644
--- a/.github/.OwlBot.lock.yaml
+++ b/.github/.OwlBot.lock.yaml
@@ -1,3 +1,3 @@
docker:
image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest
- digest: sha256:87eee22d276554e4e52863ec9b1cb6a7245815dfae20439712bf644348215a5a
+ digest: sha256:4ee57a76a176ede9087c14330c625a71553cf9c72828b2c0ca12f5338171ba60
diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml
index 6572e5982..01affbae5 100644
--- a/.github/sync-repo-settings.yaml
+++ b/.github/sync-repo-settings.yaml
@@ -1,9 +1,12 @@
# https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings
+# Allow merge commits to sync main and v3 with fewer conflicts.
+mergeCommitAllowed: true
# Rules for main branch protection
branchProtectionRules:
# Identifies the protection rule pattern. Name of the branch to be protected.
# Defaults to `main`
- pattern: main
+ requiresLinearHistory: true
requiresCodeOwnerReviews: true
requiresStrictStatusChecks: true
requiredStatusCheckContexts:
@@ -15,6 +18,7 @@ branchProtectionRules:
- 'Samples - Python 3.7'
- 'Samples - Python 3.8'
- pattern: v3
+ requiresLinearHistory: false
requiresCodeOwnerReviews: true
requiresStrictStatusChecks: true
requiredStatusCheckContexts:
diff --git a/.kokoro/docs/common.cfg b/.kokoro/docs/common.cfg
index 0c99ae611..41b86fc29 100644
--- a/.kokoro/docs/common.cfg
+++ b/.kokoro/docs/common.cfg
@@ -30,6 +30,7 @@ env_vars: {
env_vars: {
key: "V2_STAGING_BUCKET"
+ # Push google cloud library docs to the Cloud RAD bucket `docs-staging-v2`
value: "docs-staging-v2"
}
diff --git a/.kokoro/samples/lint/common.cfg b/.kokoro/samples/lint/common.cfg
index 3e41df313..153746ccc 100644
--- a/.kokoro/samples/lint/common.cfg
+++ b/.kokoro/samples/lint/common.cfg
@@ -31,4 +31,4 @@ gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples"
gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline"
# Use the trampoline script to run in docker.
-build_file: "python-bigquery/.kokoro/trampoline.sh"
\ No newline at end of file
+build_file: "python-bigquery/.kokoro/trampoline_v2.sh"
\ No newline at end of file
diff --git a/.kokoro/samples/python3.10/common.cfg b/.kokoro/samples/python3.10/common.cfg
new file mode 100644
index 000000000..da4003d76
--- /dev/null
+++ b/.kokoro/samples/python3.10/common.cfg
@@ -0,0 +1,40 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Build logs will be here
+action {
+ define_artifacts {
+ regex: "**/*sponge_log.xml"
+ }
+}
+
+# Specify which tests to run
+env_vars: {
+ key: "RUN_TESTS_SESSION"
+ value: "py-3.10"
+}
+
+# Declare build specific Cloud project.
+env_vars: {
+ key: "BUILD_SPECIFIC_GCLOUD_PROJECT"
+ value: "python-docs-samples-tests-310"
+}
+
+env_vars: {
+ key: "TRAMPOLINE_BUILD_FILE"
+ value: "github/python-bigquery/.kokoro/test-samples.sh"
+}
+
+# Configure the docker image for kokoro-trampoline.
+env_vars: {
+ key: "TRAMPOLINE_IMAGE"
+ value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker"
+}
+
+# Download secrets for samples
+gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples"
+
+# Download trampoline resources.
+gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline"
+
+# Use the trampoline script to run in docker.
+build_file: "python-bigquery/.kokoro/trampoline_v2.sh"
\ No newline at end of file
diff --git a/.kokoro/samples/python3.10/continuous.cfg b/.kokoro/samples/python3.10/continuous.cfg
new file mode 100644
index 000000000..a1c8d9759
--- /dev/null
+++ b/.kokoro/samples/python3.10/continuous.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
\ No newline at end of file
diff --git a/.kokoro/samples/python3.10/periodic-head.cfg b/.kokoro/samples/python3.10/periodic-head.cfg
new file mode 100644
index 000000000..5aa01bab5
--- /dev/null
+++ b/.kokoro/samples/python3.10/periodic-head.cfg
@@ -0,0 +1,11 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
+
+env_vars: {
+ key: "TRAMPOLINE_BUILD_FILE"
+ value: "github/python-bigquery/.kokoro/test-samples-against-head.sh"
+}
diff --git a/.kokoro/samples/python3.10/periodic.cfg b/.kokoro/samples/python3.10/periodic.cfg
new file mode 100644
index 000000000..71cd1e597
--- /dev/null
+++ b/.kokoro/samples/python3.10/periodic.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "False"
+}
diff --git a/.kokoro/samples/python3.10/presubmit.cfg b/.kokoro/samples/python3.10/presubmit.cfg
new file mode 100644
index 000000000..a1c8d9759
--- /dev/null
+++ b/.kokoro/samples/python3.10/presubmit.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
\ No newline at end of file
diff --git a/.kokoro/samples/python3.6/common.cfg b/.kokoro/samples/python3.6/common.cfg
index f3b930960..20f6b9691 100644
--- a/.kokoro/samples/python3.6/common.cfg
+++ b/.kokoro/samples/python3.6/common.cfg
@@ -37,4 +37,4 @@ gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples"
gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline"
# Use the trampoline script to run in docker.
-build_file: "python-bigquery/.kokoro/trampoline.sh"
\ No newline at end of file
+build_file: "python-bigquery/.kokoro/trampoline_v2.sh"
\ No newline at end of file
diff --git a/.kokoro/samples/python3.6/periodic.cfg b/.kokoro/samples/python3.6/periodic.cfg
index 50fec9649..71cd1e597 100644
--- a/.kokoro/samples/python3.6/periodic.cfg
+++ b/.kokoro/samples/python3.6/periodic.cfg
@@ -3,4 +3,4 @@
env_vars: {
key: "INSTALL_LIBRARY_FROM_SOURCE"
value: "False"
-}
\ No newline at end of file
+}
diff --git a/.kokoro/samples/python3.7/common.cfg b/.kokoro/samples/python3.7/common.cfg
index fc0654565..d30dc6018 100644
--- a/.kokoro/samples/python3.7/common.cfg
+++ b/.kokoro/samples/python3.7/common.cfg
@@ -37,4 +37,4 @@ gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples"
gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline"
# Use the trampoline script to run in docker.
-build_file: "python-bigquery/.kokoro/trampoline.sh"
\ No newline at end of file
+build_file: "python-bigquery/.kokoro/trampoline_v2.sh"
\ No newline at end of file
diff --git a/.kokoro/samples/python3.7/periodic.cfg b/.kokoro/samples/python3.7/periodic.cfg
index 50fec9649..71cd1e597 100644
--- a/.kokoro/samples/python3.7/periodic.cfg
+++ b/.kokoro/samples/python3.7/periodic.cfg
@@ -3,4 +3,4 @@
env_vars: {
key: "INSTALL_LIBRARY_FROM_SOURCE"
value: "False"
-}
\ No newline at end of file
+}
diff --git a/.kokoro/samples/python3.8/common.cfg b/.kokoro/samples/python3.8/common.cfg
index 2b0bf59b3..46759c6d6 100644
--- a/.kokoro/samples/python3.8/common.cfg
+++ b/.kokoro/samples/python3.8/common.cfg
@@ -37,4 +37,4 @@ gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples"
gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline"
# Use the trampoline script to run in docker.
-build_file: "python-bigquery/.kokoro/trampoline.sh"
\ No newline at end of file
+build_file: "python-bigquery/.kokoro/trampoline_v2.sh"
\ No newline at end of file
diff --git a/.kokoro/samples/python3.8/periodic.cfg b/.kokoro/samples/python3.8/periodic.cfg
index 50fec9649..71cd1e597 100644
--- a/.kokoro/samples/python3.8/periodic.cfg
+++ b/.kokoro/samples/python3.8/periodic.cfg
@@ -3,4 +3,4 @@
env_vars: {
key: "INSTALL_LIBRARY_FROM_SOURCE"
value: "False"
-}
\ No newline at end of file
+}
diff --git a/.kokoro/samples/python3.9/common.cfg b/.kokoro/samples/python3.9/common.cfg
index f179577a5..58d56ce74 100644
--- a/.kokoro/samples/python3.9/common.cfg
+++ b/.kokoro/samples/python3.9/common.cfg
@@ -37,4 +37,4 @@ gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples"
gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline"
# Use the trampoline script to run in docker.
-build_file: "python-bigquery/.kokoro/trampoline.sh"
\ No newline at end of file
+build_file: "python-bigquery/.kokoro/trampoline_v2.sh"
\ No newline at end of file
diff --git a/.kokoro/samples/python3.9/periodic.cfg b/.kokoro/samples/python3.9/periodic.cfg
index 50fec9649..71cd1e597 100644
--- a/.kokoro/samples/python3.9/periodic.cfg
+++ b/.kokoro/samples/python3.9/periodic.cfg
@@ -3,4 +3,4 @@
env_vars: {
key: "INSTALL_LIBRARY_FROM_SOURCE"
value: "False"
-}
\ No newline at end of file
+}
diff --git a/.kokoro/test-samples-against-head.sh b/.kokoro/test-samples-against-head.sh
index 689948a23..ba3a707b0 100755
--- a/.kokoro/test-samples-against-head.sh
+++ b/.kokoro/test-samples-against-head.sh
@@ -23,6 +23,4 @@ set -eo pipefail
# Enables `**` to include files nested inside sub-folders
shopt -s globstar
-cd github/python-bigquery
-
exec .kokoro/test-samples-impl.sh
diff --git a/.kokoro/test-samples.sh b/.kokoro/test-samples.sh
index 62ef534cd..11c042d34 100755
--- a/.kokoro/test-samples.sh
+++ b/.kokoro/test-samples.sh
@@ -24,8 +24,6 @@ set -eo pipefail
# Enables `**` to include files nested inside sub-folders
shopt -s globstar
-cd github/python-bigquery
-
# Run periodic samples tests at latest release
if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then
# preserving the test runner implementation.
diff --git a/.repo-metadata.json b/.repo-metadata.json
index f132056d5..124b40eb9 100644
--- a/.repo-metadata.json
+++ b/.repo-metadata.json
@@ -1,14 +1,16 @@
{
- "name": "bigquery",
- "name_pretty": "Google Cloud BigQuery",
- "product_documentation": "https://cloud.google.com/bigquery",
- "client_documentation": "https://googleapis.dev/python/bigquery/latest",
- "issue_tracker": "https://issuetracker.google.com/savedsearches/559654",
- "release_level": "ga",
- "language": "python",
- "library_type": "GAPIC_COMBO",
- "repo": "googleapis/python-bigquery",
- "distribution_name": "google-cloud-bigquery",
- "api_id": "bigquery.googleapis.com",
- "requires_billing": false
-}
\ No newline at end of file
+ "name": "bigquery",
+ "name_pretty": "Google Cloud BigQuery",
+ "product_documentation": "https://cloud.google.com/bigquery",
+ "client_documentation": "https://googleapis.dev/python/bigquery/latest",
+ "issue_tracker": "https://issuetracker.google.com/savedsearches/559654",
+ "release_level": "ga",
+ "language": "python",
+ "library_type": "GAPIC_COMBO",
+ "repo": "googleapis/python-bigquery",
+ "distribution_name": "google-cloud-bigquery",
+ "api_id": "bigquery.googleapis.com",
+ "requires_billing": false,
+ "default_version": "v2",
+ "codeowner_team": "@googleapis/api-bigquery"
+}
diff --git a/.trampolinerc b/.trampolinerc
index 383b6ec89..0eee72ab6 100644
--- a/.trampolinerc
+++ b/.trampolinerc
@@ -16,15 +16,26 @@
# Add required env vars here.
required_envvars+=(
- "STAGING_BUCKET"
- "V2_STAGING_BUCKET"
)
# Add env vars which are passed down into the container here.
pass_down_envvars+=(
+ "NOX_SESSION"
+ ###############
+ # Docs builds
+ ###############
"STAGING_BUCKET"
"V2_STAGING_BUCKET"
- "NOX_SESSION"
+ ##################
+ # Samples builds
+ ##################
+ "INSTALL_LIBRARY_FROM_SOURCE"
+ "RUN_TESTS_SESSION"
+ "BUILD_SPECIFIC_GCLOUD_PROJECT"
+ # Target directories.
+ "RUN_TESTS_DIRS"
+ # The nox session to run.
+ "RUN_TESTS_SESSION"
)
# Prevent unintentional override on the default image.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d531ec477..d15f22851 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,25 @@
[1]: https://pypi.org/project/google-cloud-bigquery/#history
+### [2.28.1](https://www.github.com/googleapis/python-bigquery/compare/v2.28.0...v2.28.1) (2021-10-07)
+
+
+### Bug Fixes
+
+* support ARRAY data type when loading from DataFrame with Parquet ([#980](https://www.github.com/googleapis/python-bigquery/issues/980)) ([1e59083](https://www.github.com/googleapis/python-bigquery/commit/1e5908302d36e15442013af6f46b1c20af28255e))
+
+## [2.28.0](https://www.github.com/googleapis/python-bigquery/compare/v2.27.1...v2.28.0) (2021-09-30)
+
+
+### Features
+
+* add `AvroOptions` to configure AVRO external data ([#994](https://www.github.com/googleapis/python-bigquery/issues/994)) ([1a9431d](https://www.github.com/googleapis/python-bigquery/commit/1a9431d9e02eeb99e4712b61c623f9cca80134a6))
+
+
+### Documentation
+
+* link to stable pandas docs ([#990](https://www.github.com/googleapis/python-bigquery/issues/990)) ([ea50e80](https://www.github.com/googleapis/python-bigquery/commit/ea50e8031fc035b3772a338bc00982de263cefad))
+
### [2.27.1](https://www.github.com/googleapis/python-bigquery/compare/v2.27.0...v2.27.1) (2021-09-27)
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 8aecf9dd2..f183b63b4 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -22,7 +22,7 @@ In order to add a feature:
documentation.
- The feature must work fully on the following CPython versions:
- 3.6, 3.7, 3.8 and 3.9 on both UNIX and Windows.
+ 3.6, 3.7, 3.8, 3.9 and 3.10 on both UNIX and Windows.
- The feature must not add unnecessary dependencies (where
"unnecessary" is of course subjective, but new dependencies should
@@ -72,7 +72,7 @@ We use `nox `__ to instrument our tests.
- To run a single unit test::
- $ nox -s unit-3.9 -- -k
+ $ nox -s unit-3.10 -- -k
.. note::
@@ -225,11 +225,13 @@ We support:
- `Python 3.7`_
- `Python 3.8`_
- `Python 3.9`_
+- `Python 3.10`_
.. _Python 3.6: https://docs.python.org/3.6/
.. _Python 3.7: https://docs.python.org/3.7/
.. _Python 3.8: https://docs.python.org/3.8/
.. _Python 3.9: https://docs.python.org/3.9/
+.. _Python 3.10: https://docs.python.org/3.10/
Supported versions can be found in our ``noxfile.py`` `config`_.
diff --git a/docs/conf.py b/docs/conf.py
index fa5217731..3d07b6bf5 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -364,10 +364,10 @@
"google-auth": ("https://googleapis.dev/python/google-auth/latest/", None),
"google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,),
"grpc": ("https://grpc.github.io/grpc/python/", None),
- "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None),
- "geopandas": ("https://geopandas.org/", None),
"proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None),
"protobuf": ("https://googleapis.dev/python/protobuf/latest/", None),
+ "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None),
+ "geopandas": ("https://geopandas.org/", None),
}
diff --git a/docs/format_options.rst b/docs/format_options.rst
new file mode 100644
index 000000000..b3948209e
--- /dev/null
+++ b/docs/format_options.rst
@@ -0,0 +1,6 @@
+BigQuery Format Options
+=======================
+
+.. automodule:: google.cloud.bigquery.format_options
+ :members:
+ :undoc-members:
diff --git a/docs/job_base.rst b/docs/job_base.rst
new file mode 100644
index 000000000..f5ef06b88
--- /dev/null
+++ b/docs/job_base.rst
@@ -0,0 +1,5 @@
+Common Job Resource Classes
+===========================
+
+.. automodule:: google.cloud.bigquery.job.base
+ :members:
diff --git a/docs/query.rst b/docs/query.rst
new file mode 100644
index 000000000..d3cb8fe83
--- /dev/null
+++ b/docs/query.rst
@@ -0,0 +1,5 @@
+Query Resource Classes
+======================
+
+.. automodule:: google.cloud.bigquery.query
+ :members:
diff --git a/docs/reference.rst b/docs/reference.rst
index 713b9239d..4f655b09e 100644
--- a/docs/reference.rst
+++ b/docs/reference.rst
@@ -47,7 +47,6 @@ Job Classes
job.CopyJob
job.LoadJob
job.ExtractJob
- job.UnknownJob
Job-Related Types
-----------------
@@ -68,7 +67,11 @@ Job-Related Types
job.SourceFormat
job.WriteDisposition
job.SchemaUpdateOption
- job.TransactionInfo
+
+.. toctree::
+ :maxdepth: 2
+
+ job_base
Dataset
@@ -134,14 +137,10 @@ Schema
Query
=====
-.. autosummary::
- :toctree: generated
+.. toctree::
+ :maxdepth: 2
- query.ArrayQueryParameter
- query.ScalarQueryParameter
- query.ScalarQueryParameterType
- query.StructQueryParameter
- query.UDFResource
+ query
Retries
@@ -167,6 +166,11 @@ External Configuration
external_config.CSVOptions
external_config.GoogleSheetsOptions
+.. toctree::
+ :maxdepth: 2
+
+ format_options
+
Magics
======
diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py
index ec8f68af0..a30d748bb 100644
--- a/google/cloud/bigquery/__init__.py
+++ b/google/cloud/bigquery/__init__.py
@@ -49,6 +49,7 @@
from google.cloud.bigquery.external_config import CSVOptions
from google.cloud.bigquery.external_config import GoogleSheetsOptions
from google.cloud.bigquery.external_config import ExternalSourceFormat
+from google.cloud.bigquery.format_options import AvroOptions
from google.cloud.bigquery.format_options import ParquetOptions
from google.cloud.bigquery.job import Compression
from google.cloud.bigquery.job import CopyJob
@@ -149,6 +150,7 @@
"PolicyTagList",
"UDFResource",
"ExternalConfig",
+ "AvroOptions",
"BigtableOptions",
"BigtableColumnFamily",
"BigtableColumn",
diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py
index f400f9b70..f2a8f34f0 100644
--- a/google/cloud/bigquery/_helpers.py
+++ b/google/cloud/bigquery/_helpers.py
@@ -96,6 +96,10 @@ def installed_version(self) -> packaging.version.Version:
return self._installed_version
+ @property
+ def use_compliant_nested_type(self) -> bool:
+ return self.installed_version.major >= 4
+
BQ_STORAGE_VERSIONS = BQStorageVersions()
PYARROW_VERSIONS = PyarrowVersions()
diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index b034c0fd1..da2fdc811 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -89,8 +89,8 @@ def _to_wkb(v):
_PANDAS_DTYPE_TO_BQ = {
"bool": "BOOLEAN",
"datetime64[ns, UTC]": "TIMESTAMP",
- # BigQuery does not support uploading DATETIME values from Parquet files.
- # See: https://github.com/googleapis/google-cloud-python/issues/9996
+ # TODO: Update to DATETIME in V3
+ # https://github.com/googleapis/python-bigquery/issues/985
"datetime64[ns]": "TIMESTAMP",
"float32": "FLOAT",
"float64": "FLOAT",
@@ -424,7 +424,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
# column, but it was not found.
if bq_schema_unused:
raise ValueError(
- u"bq_schema contains fields not present in dataframe: {}".format(
+ "bq_schema contains fields not present in dataframe: {}".format(
bq_schema_unused
)
)
@@ -465,7 +465,14 @@ def augment_schema(dataframe, current_bq_schema):
continue
arrow_table = pyarrow.array(dataframe[field.name])
- detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.type.id)
+
+ if pyarrow.types.is_list(arrow_table.type):
+ # `pyarrow.ListType`
+ detected_mode = "REPEATED"
+ detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.values.type.id)
+ else:
+ detected_mode = field.mode
+ detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.type.id)
if detected_type is None:
unknown_type_fields.append(field)
@@ -474,7 +481,7 @@ def augment_schema(dataframe, current_bq_schema):
new_field = schema.SchemaField(
name=field.name,
field_type=detected_type,
- mode=field.mode,
+ mode=detected_mode,
description=field.description,
fields=field.fields,
)
@@ -482,7 +489,7 @@ def augment_schema(dataframe, current_bq_schema):
if unknown_type_fields:
warnings.warn(
- u"Pyarrow could not determine the type of columns: {}.".format(
+ "Pyarrow could not determine the type of columns: {}.".format(
", ".join(field.name for field in unknown_type_fields)
)
)
@@ -521,7 +528,7 @@ def dataframe_to_arrow(dataframe, bq_schema):
extra_fields = bq_field_names - column_and_index_names
if extra_fields:
raise ValueError(
- u"bq_schema contains fields not present in dataframe: {}".format(
+ "bq_schema contains fields not present in dataframe: {}".format(
extra_fields
)
)
@@ -531,7 +538,7 @@ def dataframe_to_arrow(dataframe, bq_schema):
missing_fields = column_names - bq_field_names
if missing_fields:
raise ValueError(
- u"bq_schema is missing fields from dataframe: {}".format(missing_fields)
+ "bq_schema is missing fields from dataframe: {}".format(missing_fields)
)
arrow_arrays = []
@@ -551,7 +558,13 @@ def dataframe_to_arrow(dataframe, bq_schema):
return pyarrow.Table.from_arrays(arrow_arrays, names=arrow_names)
-def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SNAPPY"):
+def dataframe_to_parquet(
+ dataframe,
+ bq_schema,
+ filepath,
+ parquet_compression="SNAPPY",
+ parquet_use_compliant_nested_type=True,
+):
"""Write dataframe as a Parquet file, according to the desired BQ schema.
This function requires the :mod:`pyarrow` package. Arrow is used as an
@@ -572,10 +585,27 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN
The compression codec to use by the the ``pyarrow.parquet.write_table``
serializing method. Defaults to "SNAPPY".
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table
+ parquet_use_compliant_nested_type (bool):
+ Whether the ``pyarrow.parquet.write_table`` serializing method should write
+ compliant Parquet nested type (lists). Defaults to ``True``.
+ https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#nested-types
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table
+
+ This argument is ignored for ``pyarrow`` versions earlier than ``4.0.0``.
"""
+ import pyarrow.parquet
+
+ kwargs = (
+ {"use_compliant_nested_type": parquet_use_compliant_nested_type}
+ if _helpers.PYARROW_VERSIONS.use_compliant_nested_type
+ else {}
+ )
+
bq_schema = schema._to_schema_fields(bq_schema)
arrow_table = dataframe_to_arrow(dataframe, bq_schema)
- pyarrow.parquet.write_table(arrow_table, filepath, compression=parquet_compression)
+ pyarrow.parquet.write_table(
+ arrow_table, filepath, compression=parquet_compression, **kwargs,
+ )
def _row_iterator_page_to_arrow(page, column_names, arrow_types):
diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py
index a738dd0f3..9f3a4f972 100644
--- a/google/cloud/bigquery/client.py
+++ b/google/cloud/bigquery/client.py
@@ -89,6 +89,8 @@
from google.cloud.bigquery.table import TableListItem
from google.cloud.bigquery.table import TableReference
from google.cloud.bigquery.table import RowIterator
+from google.cloud.bigquery.format_options import ParquetOptions
+from google.cloud.bigquery import _helpers
_DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB
@@ -518,7 +520,7 @@ def _dataset_from_arg(self, dataset):
def create_dataset(
self,
- dataset: Union[str, Dataset, DatasetReference],
+ dataset: Union[str, Dataset, DatasetReference, DatasetListItem],
exists_ok: bool = False,
retry: retries.Retry = DEFAULT_RETRY,
timeout: float = DEFAULT_TIMEOUT,
@@ -648,7 +650,7 @@ def create_routine(
def create_table(
self,
- table: Union[str, Table, TableReference],
+ table: Union[str, Table, TableReference, TableListItem],
exists_ok: bool = False,
retry: retries.Retry = DEFAULT_RETRY,
timeout: float = DEFAULT_TIMEOUT,
@@ -662,6 +664,7 @@ def create_table(
table (Union[ \
google.cloud.bigquery.table.Table, \
google.cloud.bigquery.table.TableReference, \
+ google.cloud.bigquery.table.TableListItem, \
str, \
]):
A :class:`~google.cloud.bigquery.table.Table` to create.
@@ -1264,7 +1267,7 @@ def update_table(
def list_models(
self,
- dataset: Union[Dataset, DatasetReference, str],
+ dataset: Union[Dataset, DatasetReference, DatasetListItem, str],
max_results: int = None,
page_token: str = None,
retry: retries.Retry = DEFAULT_RETRY,
@@ -1341,7 +1344,7 @@ def api_request(*args, **kwargs):
def list_routines(
self,
- dataset: Union[Dataset, DatasetReference, str],
+ dataset: Union[Dataset, DatasetReference, DatasetListItem, str],
max_results: int = None,
page_token: str = None,
retry: retries.Retry = DEFAULT_RETRY,
@@ -1418,7 +1421,7 @@ def api_request(*args, **kwargs):
def list_tables(
self,
- dataset: Union[Dataset, DatasetReference, str],
+ dataset: Union[Dataset, DatasetReference, DatasetListItem, str],
max_results: int = None,
page_token: str = None,
retry: retries.Retry = DEFAULT_RETRY,
@@ -1494,7 +1497,7 @@ def api_request(*args, **kwargs):
def delete_dataset(
self,
- dataset: Union[Dataset, DatasetReference, str],
+ dataset: Union[Dataset, DatasetReference, DatasetListItem, str],
delete_contents: bool = False,
retry: retries.Retry = DEFAULT_RETRY,
timeout: float = DEFAULT_TIMEOUT,
@@ -2430,10 +2433,10 @@ def load_table_from_dataframe(
They are supported when using the PARQUET source format, but
due to the way they are encoded in the ``parquet`` file,
a mismatch with the existing table schema can occur, so
- 100% compatibility cannot be guaranteed for REPEATED fields when
+ REPEATED fields are not properly supported when using ``pyarrow<4.0.0``
using the parquet format.
- https://github.com/googleapis/python-bigquery/issues/17
+ https://github.com/googleapis/python-bigquery/issues/19
Args:
dataframe (pandas.DataFrame):
@@ -2480,18 +2483,18 @@ def load_table_from_dataframe(
:attr:`~google.cloud.bigquery.job.SourceFormat.PARQUET` are
supported.
parquet_compression (Optional[str]):
- [Beta] The compression method to use if intermittently
- serializing ``dataframe`` to a parquet file.
-
- The argument is directly passed as the ``compression``
- argument to the underlying ``pyarrow.parquet.write_table()``
- method (the default value "snappy" gets converted to uppercase).
- https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table
-
- If the job config schema is missing, the argument is directly
- passed as the ``compression`` argument to the underlying
- ``DataFrame.to_parquet()`` method.
- https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet
+ [Beta] The compression method to use if intermittently
+ serializing ``dataframe`` to a parquet file.
+
+ The argument is directly passed as the ``compression``
+ argument to the underlying ``pyarrow.parquet.write_table()``
+ method (the default value "snappy" gets converted to uppercase).
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table
+
+ If the job config schema is missing, the argument is directly
+ passed as the ``compression`` argument to the underlying
+ ``DataFrame.to_parquet()`` method.
+ https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
@@ -2520,6 +2523,16 @@ def load_table_from_dataframe(
if job_config.source_format is None:
# default value
job_config.source_format = job.SourceFormat.PARQUET
+
+ if (
+ job_config.source_format == job.SourceFormat.PARQUET
+ and job_config.parquet_options is None
+ ):
+ parquet_options = ParquetOptions()
+ # default value
+ parquet_options.enable_list_inference = True
+ job_config.parquet_options = parquet_options
+
if job_config.source_format not in supported_formats:
raise ValueError(
"Got unexpected source_format: '{}'. Currently, only PARQUET and CSV are supported".format(
@@ -2591,9 +2604,19 @@ def load_table_from_dataframe(
job_config.schema,
tmppath,
parquet_compression=parquet_compression,
+ parquet_use_compliant_nested_type=True,
)
else:
- dataframe.to_parquet(tmppath, compression=parquet_compression)
+ dataframe.to_parquet(
+ tmppath,
+ engine="pyarrow",
+ compression=parquet_compression,
+ **(
+ {"use_compliant_nested_type": True}
+ if _helpers.PYARROW_VERSIONS.use_compliant_nested_type
+ else {}
+ ),
+ )
else:
diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py
index f1692ba50..5f284c639 100644
--- a/google/cloud/bigquery/external_config.py
+++ b/google/cloud/bigquery/external_config.py
@@ -22,13 +22,13 @@
import base64
import copy
-from typing import FrozenSet, Iterable, Optional
+from typing import FrozenSet, Iterable, Optional, Union
from google.cloud.bigquery._helpers import _to_bytes
from google.cloud.bigquery._helpers import _bytes_to_json
from google.cloud.bigquery._helpers import _int_or_none
from google.cloud.bigquery._helpers import _str_or_none
-from google.cloud.bigquery.format_options import ParquetOptions
+from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions
from google.cloud.bigquery.schema import SchemaField
@@ -548,7 +548,13 @@ def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions":
return config
-_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions, ParquetOptions)
+_OPTION_CLASSES = (
+ AvroOptions,
+ BigtableOptions,
+ CSVOptions,
+ GoogleSheetsOptions,
+ ParquetOptions,
+)
class HivePartitioningOptions(object):
@@ -646,11 +652,6 @@ class ExternalConfig(object):
def __init__(self, source_format):
self._properties = {"sourceFormat": source_format}
- self._options = None
- for optcls in _OPTION_CLASSES:
- if source_format == optcls._SOURCE_FORMAT:
- self._options = optcls()
- break
@property
def source_format(self):
@@ -663,9 +664,17 @@ def source_format(self):
return self._properties["sourceFormat"]
@property
- def options(self):
- """Optional[Dict[str, Any]]: Source-specific options."""
- return self._options
+ def options(self) -> Optional[Union[_OPTION_CLASSES]]:
+ """Source-specific options."""
+ for optcls in _OPTION_CLASSES:
+ if self.source_format == optcls._SOURCE_FORMAT:
+ options = optcls()
+ self._properties.setdefault(optcls._RESOURCE_NAME, {})
+ options._properties = self._properties[optcls._RESOURCE_NAME]
+ return options
+
+ # No matching source format found.
+ return None
@property
def autodetect(self):
@@ -815,23 +824,120 @@ def schema(self, value):
self._properties["schema"] = prop
@property
- def parquet_options(self):
- """Optional[google.cloud.bigquery.format_options.ParquetOptions]: Additional
- properties to set if ``sourceFormat`` is set to PARQUET.
+ def avro_options(self) -> Optional[AvroOptions]:
+ """Additional properties to set if ``sourceFormat`` is set to AVRO.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.avro_options
+ """
+ if self.source_format == ExternalSourceFormat.AVRO:
+ self._properties.setdefault(AvroOptions._RESOURCE_NAME, {})
+ resource = self._properties.get(AvroOptions._RESOURCE_NAME)
+ if resource is None:
+ return None
+ options = AvroOptions()
+ options._properties = resource
+ return options
+
+ @avro_options.setter
+ def avro_options(self, value):
+ if self.source_format != ExternalSourceFormat.AVRO:
+ msg = f"Cannot set Avro options, source format is {self.source_format}"
+ raise TypeError(msg)
+ self._properties[AvroOptions._RESOURCE_NAME] = value._properties
+
+ @property
+ def bigtable_options(self) -> Optional[BigtableOptions]:
+ """Additional properties to set if ``sourceFormat`` is set to BIGTABLE.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.bigtable_options
+ """
+ if self.source_format == ExternalSourceFormat.BIGTABLE:
+ self._properties.setdefault(BigtableOptions._RESOURCE_NAME, {})
+ resource = self._properties.get(BigtableOptions._RESOURCE_NAME)
+ if resource is None:
+ return None
+ options = BigtableOptions()
+ options._properties = resource
+ return options
+
+ @bigtable_options.setter
+ def bigtable_options(self, value):
+ if self.source_format != ExternalSourceFormat.BIGTABLE:
+ msg = f"Cannot set Bigtable options, source format is {self.source_format}"
+ raise TypeError(msg)
+ self._properties[BigtableOptions._RESOURCE_NAME] = value._properties
+
+ @property
+ def csv_options(self) -> Optional[CSVOptions]:
+ """Additional properties to set if ``sourceFormat`` is set to CSV.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.csv_options
+ """
+ if self.source_format == ExternalSourceFormat.CSV:
+ self._properties.setdefault(CSVOptions._RESOURCE_NAME, {})
+ resource = self._properties.get(CSVOptions._RESOURCE_NAME)
+ if resource is None:
+ return None
+ options = CSVOptions()
+ options._properties = resource
+ return options
+
+ @csv_options.setter
+ def csv_options(self, value):
+ if self.source_format != ExternalSourceFormat.CSV:
+ msg = f"Cannot set CSV options, source format is {self.source_format}"
+ raise TypeError(msg)
+ self._properties[CSVOptions._RESOURCE_NAME] = value._properties
+
+ @property
+ def google_sheets_options(self) -> Optional[GoogleSheetsOptions]:
+ """Additional properties to set if ``sourceFormat`` is set to
+ GOOGLE_SHEETS.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.google_sheets_options
+ """
+ if self.source_format == ExternalSourceFormat.GOOGLE_SHEETS:
+ self._properties.setdefault(GoogleSheetsOptions._RESOURCE_NAME, {})
+ resource = self._properties.get(GoogleSheetsOptions._RESOURCE_NAME)
+ if resource is None:
+ return None
+ options = GoogleSheetsOptions()
+ options._properties = resource
+ return options
+
+ @google_sheets_options.setter
+ def google_sheets_options(self, value):
+ if self.source_format != ExternalSourceFormat.GOOGLE_SHEETS:
+ msg = f"Cannot set Google Sheets options, source format is {self.source_format}"
+ raise TypeError(msg)
+ self._properties[GoogleSheetsOptions._RESOURCE_NAME] = value._properties
+
+ @property
+ def parquet_options(self) -> Optional[ParquetOptions]:
+ """Additional properties to set if ``sourceFormat`` is set to PARQUET.
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.parquet_options
"""
- if self.source_format != ExternalSourceFormat.PARQUET:
+ if self.source_format == ExternalSourceFormat.PARQUET:
+ self._properties.setdefault(ParquetOptions._RESOURCE_NAME, {})
+ resource = self._properties.get(ParquetOptions._RESOURCE_NAME)
+ if resource is None:
return None
- return self._options
+ options = ParquetOptions()
+ options._properties = resource
+ return options
@parquet_options.setter
def parquet_options(self, value):
if self.source_format != ExternalSourceFormat.PARQUET:
msg = f"Cannot set Parquet options, source format is {self.source_format}"
raise TypeError(msg)
- self._options = value
+ self._properties[ParquetOptions._RESOURCE_NAME] = value._properties
def to_api_repr(self) -> dict:
"""Build an API representation of this object.
@@ -841,10 +947,6 @@ def to_api_repr(self) -> dict:
A dictionary in the format used by the BigQuery API.
"""
config = copy.deepcopy(self._properties)
- if self.options is not None:
- r = self.options.to_api_repr()
- if r != {}:
- config[self.options._RESOURCE_NAME] = r
return config
@classmethod
@@ -862,10 +964,5 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig":
ExternalConfig: Configuration parsed from ``resource``.
"""
config = cls(resource["sourceFormat"])
- for optcls in _OPTION_CLASSES:
- opts = resource.get(optcls._RESOURCE_NAME)
- if opts is not None:
- config._options = optcls.from_api_repr(opts)
- break
config._properties = copy.deepcopy(resource)
return config
diff --git a/google/cloud/bigquery/format_options.py b/google/cloud/bigquery/format_options.py
index 2c9a2ce20..1208565a9 100644
--- a/google/cloud/bigquery/format_options.py
+++ b/google/cloud/bigquery/format_options.py
@@ -13,7 +13,59 @@
# limitations under the License.
import copy
-from typing import Dict
+from typing import Dict, Optional
+
+
+class AvroOptions:
+ """Options if source format is set to AVRO."""
+
+ _SOURCE_FORMAT = "AVRO"
+ _RESOURCE_NAME = "avroOptions"
+
+ def __init__(self):
+ self._properties = {}
+
+ @property
+ def use_avro_logical_types(self) -> Optional[bool]:
+ """[Optional] If sourceFormat is set to 'AVRO', indicates whether to
+ interpret logical types as the corresponding BigQuery data type (for
+ example, TIMESTAMP), instead of using the raw type (for example,
+ INTEGER).
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#AvroOptions.FIELDS.use_avro_logical_types
+ """
+ return self._properties.get("useAvroLogicalTypes")
+
+ @use_avro_logical_types.setter
+ def use_avro_logical_types(self, value):
+ self._properties["useAvroLogicalTypes"] = value
+
+ @classmethod
+ def from_api_repr(cls, resource: Dict[str, bool]) -> "AvroOptions":
+ """Factory: construct an instance from a resource dict.
+
+ Args:
+ resource (Dict[str, bool]):
+ Definition of a :class:`~.format_options.AvroOptions` instance in
+ the same representation as is returned from the API.
+
+ Returns:
+ :class:`~.format_options.AvroOptions`:
+ Configuration parsed from ``resource``.
+ """
+ config = cls()
+ config._properties = copy.deepcopy(resource)
+ return config
+
+ def to_api_repr(self) -> dict:
+ """Build an API representation of this object.
+
+ Returns:
+ Dict[str, bool]:
+ A dictionary in the format used by the BigQuery API.
+ """
+ return copy.deepcopy(self._properties)
class ParquetOptions:
diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py
index 698181092..9e381ded6 100644
--- a/google/cloud/bigquery/job/base.py
+++ b/google/cloud/bigquery/job/base.py
@@ -19,7 +19,7 @@
import http
import threading
import typing
-from typing import Dict, Optional
+from typing import Dict, Optional, Sequence
from google.api_core import exceptions
import google.api_core.future.polling
@@ -193,7 +193,8 @@ def parent_job_id(self):
return _helpers._get_sub_prop(self._properties, ["statistics", "parentJobId"])
@property
- def script_statistics(self):
+ def script_statistics(self) -> Optional["ScriptStatistics"]:
+ """Statistics for a child job of a script."""
resource = _helpers._get_sub_prop(
self._properties, ["statistics", "scriptStatistics"]
)
@@ -968,9 +969,8 @@ def __init__(self, resource):
self._properties = resource
@property
- def stack_frames(self):
- """List[ScriptStackFrame]: Stack trace where the current evaluation
- happened.
+ def stack_frames(self) -> Sequence[ScriptStackFrame]:
+ """Stack trace where the current evaluation happened.
Shows line/column/procedure name of each frame on the stack at the
point where the current evaluation happened.
@@ -982,7 +982,7 @@ def stack_frames(self):
]
@property
- def evaluation_kind(self):
+ def evaluation_kind(self) -> Optional[str]:
"""str: Indicates the type of child job.
Possible values include ``STATEMENT`` and ``EXPRESSION``.
@@ -1005,7 +1005,9 @@ def from_api_repr(cls, resource: dict, client) -> "UnknownJob":
Returns:
UnknownJob: Job corresponding to the resource.
"""
- job_ref_properties = resource.get("jobReference", {"projectId": client.project})
+ job_ref_properties = resource.get(
+ "jobReference", {"projectId": client.project, "jobId": None}
+ )
job_ref = _JobReference._from_api_repr(job_ref_properties)
job = cls(job_ref, client)
# Populate the job reference with the project, even if it has been
diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py
index c07daec99..6a973bb65 100644
--- a/google/cloud/bigquery/job/query.py
+++ b/google/cloud/bigquery/job/query.py
@@ -18,7 +18,7 @@
import copy
import re
import typing
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, List, Optional, Union
from google.api_core import exceptions
from google.api_core.future import polling as polling_future
@@ -38,6 +38,7 @@
from google.cloud.bigquery.query import UDFResource
from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY
from google.cloud.bigquery.routine import RoutineReference
+from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import _EmptyRowIterator
from google.cloud.bigquery.table import RangePartitioning
from google.cloud.bigquery.table import _table_arg_to_table_ref
@@ -57,6 +58,7 @@
import pyarrow
from google.api_core import retry as retries
from google.cloud import bigquery_storage
+ from google.cloud.bigquery.client import Client
from google.cloud.bigquery.table import RowIterator
@@ -853,7 +855,7 @@ def to_api_repr(self):
}
@classmethod
- def from_api_repr(cls, resource: dict, client) -> "QueryJob":
+ def from_api_repr(cls, resource: dict, client: "Client") -> "QueryJob":
"""Factory: construct a job given its API representation
Args:
@@ -866,8 +868,10 @@ def from_api_repr(cls, resource: dict, client) -> "QueryJob":
Returns:
google.cloud.bigquery.job.QueryJob: Job parsed from ``resource``.
"""
- cls._check_resource_config(resource)
- job_ref = _JobReference._from_api_repr(resource["jobReference"])
+ job_ref_properties = resource.setdefault(
+ "jobReference", {"projectId": client.project, "jobId": None}
+ )
+ job_ref = _JobReference._from_api_repr(job_ref_properties)
job = cls(job_ref, None, client=client)
job._set_properties(resource)
return job
@@ -887,6 +891,18 @@ def query_plan(self):
plan_entries = self._job_statistics().get("queryPlan", ())
return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries]
+ @property
+ def schema(self) -> Optional[List[SchemaField]]:
+ """The schema of the results.
+
+ Present only for successful dry run of non-legacy SQL queries.
+ """
+ resource = self._job_statistics().get("schema")
+ if resource is None:
+ return None
+ fields = resource.get("fields", [])
+ return [SchemaField.from_api_repr(field) for field in fields]
+
@property
def timeline(self):
"""List(TimelineEntry): Return the query execution timeline
@@ -1318,6 +1334,8 @@ def result(
If Non-``None`` and non-default ``job_retry`` is
provided and the job is not retryable.
"""
+ if self.dry_run:
+ return _EmptyRowIterator()
try:
retry_do_query = getattr(self, "_retry_do_query", None)
if retry_do_query is not None:
diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py
index f221e65a8..91311d332 100644
--- a/google/cloud/bigquery/schema.py
+++ b/google/cloud/bigquery/schema.py
@@ -66,6 +66,22 @@ class _DefaultSentinel(enum.Enum):
_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE
+class _DefaultSentinel(enum.Enum):
+ """Object used as 'sentinel' indicating default value should be used.
+
+ Uses enum so that pytype/mypy knows that this is the only possible value.
+ https://stackoverflow.com/a/60605919/101923
+
+ Literal[_DEFAULT_VALUE] is an alternative, but only added in Python 3.8.
+ https://docs.python.org/3/library/typing.html#typing.Literal
+ """
+
+ DEFAULT_VALUE = object()
+
+
+_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE
+
+
class SchemaField(object):
"""Describe a single field within a table schema.
diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index 75901afb4..376323801 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -180,10 +180,8 @@ class TableReference(_TableBase):
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#tablereference
Args:
- dataset_ref:
- A pointer to the dataset
- table_id:
- The ID of the table
+ dataset_ref: A pointer to the dataset
+ table_id: The ID of the table
"""
_PROPERTY_TO_API_FIELD = {
diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py
index 3e5c77ede..967959b05 100644
--- a/google/cloud/bigquery/version.py
+++ b/google/cloud/bigquery/version.py
@@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-__version__ = "2.27.1"
+__version__ = "2.28.1"
diff --git a/google/cloud/bigquery_v2/types/encryption_config.py b/google/cloud/bigquery_v2/types/encryption_config.py
index 4b9139733..a95954a30 100644
--- a/google/cloud/bigquery_v2/types/encryption_config.py
+++ b/google/cloud/bigquery_v2/types/encryption_config.py
@@ -25,6 +25,7 @@
class EncryptionConfiguration(proto.Message):
r"""
+
Attributes:
kms_key_name (google.protobuf.wrappers_pb2.StringValue):
Optional. Describes the Cloud KMS encryption
diff --git a/google/cloud/bigquery_v2/types/model.py b/google/cloud/bigquery_v2/types/model.py
index 706418401..6e3ca0095 100644
--- a/google/cloud/bigquery_v2/types/model.py
+++ b/google/cloud/bigquery_v2/types/model.py
@@ -38,6 +38,7 @@
class Model(proto.Message):
r"""
+
Attributes:
etag (str):
Output only. A hash of this resource.
@@ -251,7 +252,8 @@ class FeedbackType(proto.Enum):
EXPLICIT = 2
class SeasonalPeriod(proto.Message):
- r""" """
+ r"""
+ """
class SeasonalPeriodType(proto.Enum):
r""""""
@@ -264,7 +266,8 @@ class SeasonalPeriodType(proto.Enum):
YEARLY = 6
class KmeansEnums(proto.Message):
- r""" """
+ r"""
+ """
class KmeansInitializationMethod(proto.Enum):
r"""Indicates the method used to initialize the centroids for
@@ -386,6 +389,7 @@ class BinaryClassificationMetrics(proto.Message):
class BinaryConfusionMatrix(proto.Message):
r"""Confusion matrix for binary classification models.
+
Attributes:
positive_class_threshold (google.protobuf.wrappers_pb2.DoubleValue):
Threshold value used when computing each of
@@ -464,6 +468,7 @@ class MultiClassClassificationMetrics(proto.Message):
class ConfusionMatrix(proto.Message):
r"""Confusion matrix for multi-class classification models.
+
Attributes:
confidence_threshold (google.protobuf.wrappers_pb2.DoubleValue):
Confidence threshold used when computing the
@@ -474,6 +479,7 @@ class ConfusionMatrix(proto.Message):
class Entry(proto.Message):
r"""A single entry in the confusion matrix.
+
Attributes:
predicted_label (str):
The predicted label. For confidence_threshold > 0, we will
@@ -491,6 +497,7 @@ class Entry(proto.Message):
class Row(proto.Message):
r"""A single row in the confusion matrix.
+
Attributes:
actual_label (str):
The original label of this row.
@@ -525,6 +532,7 @@ class Row(proto.Message):
class ClusteringMetrics(proto.Message):
r"""Evaluation metrics for clustering models.
+
Attributes:
davies_bouldin_index (google.protobuf.wrappers_pb2.DoubleValue):
Davies-Bouldin index.
@@ -537,6 +545,7 @@ class ClusteringMetrics(proto.Message):
class Cluster(proto.Message):
r"""Message containing the information about one cluster.
+
Attributes:
centroid_id (int):
Centroid id.
@@ -550,6 +559,7 @@ class Cluster(proto.Message):
class FeatureValue(proto.Message):
r"""Representative value of a single feature within the cluster.
+
Attributes:
feature_column (str):
The feature column name.
@@ -562,6 +572,7 @@ class FeatureValue(proto.Message):
class CategoricalValue(proto.Message):
r"""Representative value of a categorical feature.
+
Attributes:
category_counts (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]):
Counts of all categories for the categorical feature. If
@@ -573,6 +584,7 @@ class CategoricalValue(proto.Message):
class CategoryCount(proto.Message):
r"""Represents the count of a single category within the cluster.
+
Attributes:
category (str):
The name of category.
@@ -668,6 +680,7 @@ class RankingMetrics(proto.Message):
class ArimaForecastingMetrics(proto.Message):
r"""Model evaluation metrics for ARIMA forecasting models.
+
Attributes:
non_seasonal_order (Sequence[google.cloud.bigquery_v2.types.Model.ArimaOrder]):
Non-seasonal order.
@@ -857,6 +870,7 @@ class ArimaOrder(proto.Message):
class ArimaFittingMetrics(proto.Message):
r"""ARIMA model fitting metrics.
+
Attributes:
log_likelihood (float):
Log-likelihood.
@@ -888,6 +902,7 @@ class GlobalExplanation(proto.Message):
class Explanation(proto.Message):
r"""Explanation for a single feature.
+
Attributes:
feature_name (str):
Full name of the feature. For non-numerical features, will
@@ -910,6 +925,7 @@ class Explanation(proto.Message):
class TrainingRun(proto.Message):
r"""Information about a single training query run for the model.
+
Attributes:
training_options (google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions):
Options that were used for this training run,
@@ -935,6 +951,7 @@ class TrainingRun(proto.Message):
class TrainingOptions(proto.Message):
r"""Options used in model training.
+
Attributes:
max_iterations (int):
The maximum number of iterations in training.
@@ -1182,6 +1199,7 @@ class TrainingOptions(proto.Message):
class IterationResult(proto.Message):
r"""Information about a single iteration of the training run.
+
Attributes:
index (google.protobuf.wrappers_pb2.Int32Value):
Index of the iteration, 0 based.
@@ -1205,6 +1223,7 @@ class IterationResult(proto.Message):
class ClusterInfo(proto.Message):
r"""Information about a single cluster for clustering model.
+
Attributes:
centroid_id (int):
Centroid id.
@@ -1241,6 +1260,7 @@ class ArimaResult(proto.Message):
class ArimaCoefficients(proto.Message):
r"""Arima coefficients.
+
Attributes:
auto_regressive_coefficients (Sequence[float]):
Auto-regressive coefficients, an array of
@@ -1263,6 +1283,7 @@ class ArimaCoefficients(proto.Message):
class ArimaModelInfo(proto.Message):
r"""Arima model information.
+
Attributes:
non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder):
Non-seasonal order.
@@ -1409,6 +1430,7 @@ class ArimaModelInfo(proto.Message):
class GetModelRequest(proto.Message):
r"""
+
Attributes:
project_id (str):
Required. Project ID of the requested model.
@@ -1425,6 +1447,7 @@ class GetModelRequest(proto.Message):
class PatchModelRequest(proto.Message):
r"""
+
Attributes:
project_id (str):
Required. Project ID of the model to patch.
@@ -1447,6 +1470,7 @@ class PatchModelRequest(proto.Message):
class DeleteModelRequest(proto.Message):
r"""
+
Attributes:
project_id (str):
Required. Project ID of the model to delete.
@@ -1463,6 +1487,7 @@ class DeleteModelRequest(proto.Message):
class ListModelsRequest(proto.Message):
r"""
+
Attributes:
project_id (str):
Required. Project ID of the models to list.
@@ -1487,6 +1512,7 @@ class ListModelsRequest(proto.Message):
class ListModelsResponse(proto.Message):
r"""
+
Attributes:
models (Sequence[google.cloud.bigquery_v2.types.Model]):
Models in the requested dataset. Only the following fields
diff --git a/google/cloud/bigquery_v2/types/model_reference.py b/google/cloud/bigquery_v2/types/model_reference.py
index a9ebad613..544377f61 100644
--- a/google/cloud/bigquery_v2/types/model_reference.py
+++ b/google/cloud/bigquery_v2/types/model_reference.py
@@ -23,6 +23,7 @@
class ModelReference(proto.Message):
r"""Id path of a model.
+
Attributes:
project_id (str):
Required. The ID of the project containing
diff --git a/google/cloud/bigquery_v2/types/standard_sql.py b/google/cloud/bigquery_v2/types/standard_sql.py
index 7a845fc48..69a221c3c 100644
--- a/google/cloud/bigquery_v2/types/standard_sql.py
+++ b/google/cloud/bigquery_v2/types/standard_sql.py
@@ -78,6 +78,7 @@ class TypeKind(proto.Enum):
class StandardSqlField(proto.Message):
r"""A field or a column.
+
Attributes:
name (str):
Optional. The name of this field. Can be
@@ -96,6 +97,7 @@ class StandardSqlField(proto.Message):
class StandardSqlStructType(proto.Message):
r"""
+
Attributes:
fields (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]):
@@ -106,6 +108,7 @@ class StandardSqlStructType(proto.Message):
class StandardSqlTableType(proto.Message):
r"""A table type
+
Attributes:
columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]):
The columns in this table type
diff --git a/google/cloud/bigquery_v2/types/table_reference.py b/google/cloud/bigquery_v2/types/table_reference.py
index d56e5b09f..da206b4d7 100644
--- a/google/cloud/bigquery_v2/types/table_reference.py
+++ b/google/cloud/bigquery_v2/types/table_reference.py
@@ -23,6 +23,7 @@
class TableReference(proto.Message):
r"""
+
Attributes:
project_id (str):
Required. The ID of the project containing
diff --git a/owlbot.py b/owlbot.py
index 86374858e..5fd5c436a 100644
--- a/owlbot.py
+++ b/owlbot.py
@@ -32,8 +32,6 @@
intersphinx_dependencies={
"pandas": "http://pandas.pydata.org/pandas-docs/stable/",
"geopandas": "https://geopandas.org/",
- "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None),
- "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None),
},
)
@@ -43,6 +41,7 @@
excludes=[
"noxfile.py",
"docs/multiprocessing.rst",
+ "docs/index.rst",
".coveragerc",
".github/CODEOWNERS",
# Include custom SNIPPETS_TESTS job for performance.
@@ -55,10 +54,6 @@
],
)
-# Remove unneeded intersphinx links, the library does not use any proto-generated code.
-s.replace("docs/conf.py", r'\s+"(proto-plus|protobuf)":.*$', "")
-
-
# ----------------------------------------------------------------------------
# Samples templates
# ----------------------------------------------------------------------------
diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py
index b008613f0..93a9122cc 100644
--- a/samples/geography/noxfile.py
+++ b/samples/geography/noxfile.py
@@ -87,7 +87,7 @@ def get_pytest_env_vars() -> Dict[str, str]:
# DO NOT EDIT - automatically generated.
# All versions used to test samples.
-ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"]
+ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10"]
# Any default versions that should be ignored.
IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"]
@@ -98,6 +98,10 @@ def get_pytest_env_vars() -> Dict[str, str]:
"True",
"true",
)
+
+# Error if a python version is missing
+nox.options.error_on_missing_interpreters = True
+
#
# Style Checks
#
diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt
index 46162762c..ecd428ab9 100644
--- a/samples/geography/requirements.txt
+++ b/samples/geography/requirements.txt
@@ -1,5 +1,5 @@
attrs==21.2.0
-cachetools==4.2.2
+cachetools==4.2.4
certifi==2021.5.30
cffi==1.14.6
charset-normalizer==2.0.6
@@ -11,7 +11,7 @@ Fiona==1.8.20
geojson==2.5.0
geopandas==0.9.0
google-api-core==2.0.1
-google-auth==2.2.0
+google-auth==2.2.1
google-cloud-bigquery==2.27.1
google-cloud-bigquery-storage==2.9.0
google-cloud-core==2.0.0
@@ -29,6 +29,8 @@ numpy==1.21.2; python_version > "3.6"
packaging==21.0
pandas==1.1.5; python_version < '3.7'
pandas==1.3.2; python_version >= '3.7'
+proto-plus==1.19.2
+protobuf==3.18.0
pyarrow==5.0.0
pyasn1==0.4.8
pyasn1-modules==0.2.8
@@ -46,4 +48,4 @@ six==1.16.0
typing-extensions==3.10.0.2
typing-inspect==0.7.1
urllib3==1.26.7
-zipp==3.5.0
+zipp==3.6.0
diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py
index b008613f0..93a9122cc 100644
--- a/samples/snippets/noxfile.py
+++ b/samples/snippets/noxfile.py
@@ -87,7 +87,7 @@ def get_pytest_env_vars() -> Dict[str, str]:
# DO NOT EDIT - automatically generated.
# All versions used to test samples.
-ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"]
+ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10"]
# Any default versions that should be ignored.
IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"]
@@ -98,6 +98,10 @@ def get_pytest_env_vars() -> Dict[str, str]:
"True",
"true",
)
+
+# Error if a python version is missing
+nox.options.error_on_missing_interpreters = True
+
#
# Style Checks
#
diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index cbf4dff27..39ea3e878 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -24,6 +24,7 @@
import google.api_core.retry
import pkg_resources
import pytest
+import numpy
from google.cloud import bigquery
from google.cloud import bigquery_storage
@@ -83,6 +84,81 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i
("uint8_col", pandas.Series([0, 1, 2], dtype="uint8")),
("uint16_col", pandas.Series([3, 4, 5], dtype="uint16")),
("uint32_col", pandas.Series([6, 7, 8], dtype="uint32")),
+ ("array_bool_col", pandas.Series([[True], [False], [True]])),
+ (
+ "array_ts_col",
+ pandas.Series(
+ [
+ [
+ datetime.datetime(
+ 2010, 1, 2, 3, 44, 50, tzinfo=datetime.timezone.utc
+ ),
+ ],
+ [
+ datetime.datetime(
+ 2011, 2, 3, 14, 50, 59, tzinfo=datetime.timezone.utc
+ ),
+ ],
+ [
+ datetime.datetime(
+ 2012, 3, 14, 15, 16, tzinfo=datetime.timezone.utc
+ ),
+ ],
+ ],
+ ),
+ ),
+ (
+ "array_dt_col",
+ pandas.Series(
+ [
+ [datetime.datetime(2010, 1, 2, 3, 44, 50)],
+ [datetime.datetime(2011, 2, 3, 14, 50, 59)],
+ [datetime.datetime(2012, 3, 14, 15, 16)],
+ ],
+ ),
+ ),
+ (
+ "array_float32_col",
+ pandas.Series(
+ [numpy.array([_], dtype="float32") for _ in [1.0, 2.0, 3.0]]
+ ),
+ ),
+ (
+ "array_float64_col",
+ pandas.Series(
+ [numpy.array([_], dtype="float64") for _ in [4.0, 5.0, 6.0]]
+ ),
+ ),
+ (
+ "array_int8_col",
+ pandas.Series(
+ [numpy.array([_], dtype="int8") for _ in [-12, -11, -10]]
+ ),
+ ),
+ (
+ "array_int16_col",
+ pandas.Series([numpy.array([_], dtype="int16") for _ in [-9, -8, -7]]),
+ ),
+ (
+ "array_int32_col",
+ pandas.Series([numpy.array([_], dtype="int32") for _ in [-6, -5, -4]]),
+ ),
+ (
+ "array_int64_col",
+ pandas.Series([numpy.array([_], dtype="int64") for _ in [-3, -2, -1]]),
+ ),
+ (
+ "array_uint8_col",
+ pandas.Series([numpy.array([_], dtype="uint8") for _ in [0, 1, 2]]),
+ ),
+ (
+ "array_uint16_col",
+ pandas.Series([numpy.array([_], dtype="uint16") for _ in [3, 4, 5]]),
+ ),
+ (
+ "array_uint32_col",
+ pandas.Series([numpy.array([_], dtype="uint32") for _ in [6, 7, 8]]),
+ ),
]
)
dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
@@ -98,9 +174,8 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i
assert tuple(table.schema) == (
bigquery.SchemaField("bool_col", "BOOLEAN"),
bigquery.SchemaField("ts_col", "TIMESTAMP"),
- # BigQuery does not support uploading DATETIME values from
- # Parquet files. See:
- # https://github.com/googleapis/google-cloud-python/issues/9996
+ # TODO: Update to DATETIME in V3
+ # https://github.com/googleapis/python-bigquery/issues/985
bigquery.SchemaField("dt_col", "TIMESTAMP"),
bigquery.SchemaField("float32_col", "FLOAT"),
bigquery.SchemaField("float64_col", "FLOAT"),
@@ -111,6 +186,20 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i
bigquery.SchemaField("uint8_col", "INTEGER"),
bigquery.SchemaField("uint16_col", "INTEGER"),
bigquery.SchemaField("uint32_col", "INTEGER"),
+ bigquery.SchemaField("array_bool_col", "BOOLEAN", mode="REPEATED"),
+ bigquery.SchemaField("array_ts_col", "TIMESTAMP", mode="REPEATED"),
+ # TODO: Update to DATETIME in V3
+ # https://github.com/googleapis/python-bigquery/issues/985
+ bigquery.SchemaField("array_dt_col", "TIMESTAMP", mode="REPEATED"),
+ bigquery.SchemaField("array_float32_col", "FLOAT", mode="REPEATED"),
+ bigquery.SchemaField("array_float64_col", "FLOAT", mode="REPEATED"),
+ bigquery.SchemaField("array_int8_col", "INTEGER", mode="REPEATED"),
+ bigquery.SchemaField("array_int16_col", "INTEGER", mode="REPEATED"),
+ bigquery.SchemaField("array_int32_col", "INTEGER", mode="REPEATED"),
+ bigquery.SchemaField("array_int64_col", "INTEGER", mode="REPEATED"),
+ bigquery.SchemaField("array_uint8_col", "INTEGER", mode="REPEATED"),
+ bigquery.SchemaField("array_uint16_col", "INTEGER", mode="REPEATED"),
+ bigquery.SchemaField("array_uint32_col", "INTEGER", mode="REPEATED"),
)
assert table.num_rows == 3
diff --git a/tests/system/test_query.py b/tests/system/test_query.py
new file mode 100644
index 000000000..24758595b
--- /dev/null
+++ b/tests/system/test_query.py
@@ -0,0 +1,29 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from google.cloud import bigquery
+
+
+def test_dry_run(bigquery_client: bigquery.Client, scalars_table: str):
+ query_config = bigquery.QueryJobConfig()
+ query_config.dry_run = True
+
+ query_string = f"SELECT * FROM {scalars_table}"
+ query_job = bigquery_client.query(query_string, job_config=query_config,)
+
+ # Note: `query_job.result()` is not necessary on a dry run query. All
+ # necessary information is returned in the initial response.
+ assert query_job.dry_run is True
+ assert query_job.total_bytes_processed > 0
+ assert len(query_job.schema) > 0
diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py
index 4c598d797..17baacf5b 100644
--- a/tests/unit/job/test_query.py
+++ b/tests/unit/job/test_query.py
@@ -26,6 +26,7 @@
from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS
import google.cloud.bigquery.query
+from google.cloud.bigquery.table import _EmptyRowIterator
from ..helpers import make_connection
@@ -268,25 +269,6 @@ def test_ctor_w_query_parameters(self):
job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config)
self.assertEqual(job.query_parameters, query_parameters)
- def test_from_api_repr_missing_identity(self):
- self._setUpConstants()
- client = _make_client(project=self.PROJECT)
- RESOURCE = {}
- klass = self._get_target_class()
- with self.assertRaises(KeyError):
- klass.from_api_repr(RESOURCE, client=client)
-
- def test_from_api_repr_missing_config(self):
- self._setUpConstants()
- client = _make_client(project=self.PROJECT)
- RESOURCE = {
- "id": "%s:%s" % (self.PROJECT, self.DS_ID),
- "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
- }
- klass = self._get_target_class()
- with self.assertRaises(KeyError):
- klass.from_api_repr(RESOURCE, client=client)
-
def test_from_api_repr_bare(self):
self._setUpConstants()
client = _make_client(project=self.PROJECT)
@@ -989,6 +971,19 @@ def test_result(self):
[query_results_call, query_results_call, reload_call, query_page_call]
)
+ def test_result_dry_run(self):
+ job_resource = self._make_resource(started=True, location="EU")
+ job_resource["configuration"]["dryRun"] = True
+ conn = make_connection()
+ client = _make_client(self.PROJECT, connection=conn)
+ job = self._get_target_class().from_api_repr(job_resource, client)
+
+ result = job.result()
+
+ calls = conn.api_request.mock_calls
+ self.assertIsInstance(result, _EmptyRowIterator)
+ self.assertEqual(calls, [])
+
def test_result_with_done_job_calls_get_query_results(self):
query_resource_done = {
"jobComplete": True,
@@ -1391,6 +1386,43 @@ def test_result_transport_timeout_error(self):
with call_api_patch, self.assertRaises(concurrent.futures.TimeoutError):
job.result(timeout=1)
+ def test_no_schema(self):
+ client = _make_client(project=self.PROJECT)
+ resource = {}
+ klass = self._get_target_class()
+ job = klass.from_api_repr(resource, client=client)
+ assert job.schema is None
+
+ def test_schema(self):
+ client = _make_client(project=self.PROJECT)
+ resource = {
+ "statistics": {
+ "query": {
+ "schema": {
+ "fields": [
+ {"mode": "NULLABLE", "name": "bool_col", "type": "BOOLEAN"},
+ {
+ "mode": "NULLABLE",
+ "name": "string_col",
+ "type": "STRING",
+ },
+ {
+ "mode": "NULLABLE",
+ "name": "timestamp_col",
+ "type": "TIMESTAMP",
+ },
+ ]
+ },
+ },
+ },
+ }
+ klass = self._get_target_class()
+ job = klass.from_api_repr(resource, client=client)
+ assert len(job.schema) == 3
+ assert job.schema[0].field_type == "BOOLEAN"
+ assert job.schema[1].field_type == "STRING"
+ assert job.schema[2].field_type == "TIMESTAMP"
+
def test__begin_error(self):
from google.cloud import exceptions
diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
index 2ddf98077..2504b2838 100644
--- a/tests/unit/test_client.py
+++ b/tests/unit/test_client.py
@@ -53,6 +53,7 @@
from google.cloud import bigquery_storage
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
+from google.cloud.bigquery import ParquetOptions
from tests.unit.helpers import make_connection
@@ -6850,6 +6851,176 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self):
# the original config object should not have been modified
assert job_config.to_api_repr() == original_config_copy.to_api_repr()
+ @unittest.skipIf(pandas is None, "Requires `pandas`")
+ def test_load_table_from_dataframe_w_parquet_options_none(self):
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+ from google.cloud.bigquery import job
+ from google.cloud.bigquery.schema import SchemaField
+
+ client = self._make_client()
+ records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
+ dataframe = pandas.DataFrame(records)
+
+ job_config = job.LoadJobConfig(
+ write_disposition=job.WriteDisposition.WRITE_TRUNCATE,
+ source_format=job.SourceFormat.PARQUET,
+ )
+
+ get_table_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.get_table",
+ autospec=True,
+ return_value=mock.Mock(
+ schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")]
+ ),
+ )
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+ )
+ with load_patch as load_table_from_file, get_table_patch as get_table:
+ client.load_table_from_dataframe(
+ dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION
+ )
+
+ # no need to fetch and inspect table schema for WRITE_TRUNCATE jobs
+ assert not get_table.called
+
+ load_table_from_file.assert_called_once_with(
+ client,
+ mock.ANY,
+ self.TABLE_REF,
+ num_retries=_DEFAULT_NUM_RETRIES,
+ rewind=True,
+ size=mock.ANY,
+ job_id=mock.ANY,
+ job_id_prefix=None,
+ location=self.LOCATION,
+ project=None,
+ job_config=mock.ANY,
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+ sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
+ assert sent_config.parquet_options.enable_list_inference is True
+
+ @unittest.skipIf(pandas is None, "Requires `pandas`")
+ def test_load_table_from_dataframe_w_list_inference_none(self):
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+ from google.cloud.bigquery import job
+ from google.cloud.bigquery.schema import SchemaField
+
+ client = self._make_client()
+ records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
+ dataframe = pandas.DataFrame(records)
+
+ parquet_options = ParquetOptions()
+
+ job_config = job.LoadJobConfig(
+ write_disposition=job.WriteDisposition.WRITE_TRUNCATE,
+ source_format=job.SourceFormat.PARQUET,
+ )
+ job_config.parquet_options = parquet_options
+
+ original_config_copy = copy.deepcopy(job_config)
+
+ get_table_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.get_table",
+ autospec=True,
+ return_value=mock.Mock(
+ schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")]
+ ),
+ )
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+ )
+ with load_patch as load_table_from_file, get_table_patch as get_table:
+ client.load_table_from_dataframe(
+ dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION
+ )
+
+ # no need to fetch and inspect table schema for WRITE_TRUNCATE jobs
+ assert not get_table.called
+
+ load_table_from_file.assert_called_once_with(
+ client,
+ mock.ANY,
+ self.TABLE_REF,
+ num_retries=_DEFAULT_NUM_RETRIES,
+ rewind=True,
+ size=mock.ANY,
+ job_id=mock.ANY,
+ job_id_prefix=None,
+ location=self.LOCATION,
+ project=None,
+ job_config=mock.ANY,
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+ sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
+ assert sent_config.parquet_options.enable_list_inference is None
+
+ # the original config object should not have been modified
+ assert job_config.to_api_repr() == original_config_copy.to_api_repr()
+
+ @unittest.skipIf(pandas is None, "Requires `pandas`")
+ def test_load_table_from_dataframe_w_list_inference_false(self):
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+ from google.cloud.bigquery import job
+ from google.cloud.bigquery.schema import SchemaField
+
+ client = self._make_client()
+ records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
+ dataframe = pandas.DataFrame(records)
+
+ parquet_options = ParquetOptions()
+ parquet_options.enable_list_inference = False
+
+ job_config = job.LoadJobConfig(
+ write_disposition=job.WriteDisposition.WRITE_TRUNCATE,
+ source_format=job.SourceFormat.PARQUET,
+ )
+ job_config.parquet_options = parquet_options
+
+ original_config_copy = copy.deepcopy(job_config)
+
+ get_table_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.get_table",
+ autospec=True,
+ return_value=mock.Mock(
+ schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")]
+ ),
+ )
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+ )
+ with load_patch as load_table_from_file, get_table_patch as get_table:
+ client.load_table_from_dataframe(
+ dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION
+ )
+
+ # no need to fetch and inspect table schema for WRITE_TRUNCATE jobs
+ assert not get_table.called
+
+ load_table_from_file.assert_called_once_with(
+ client,
+ mock.ANY,
+ self.TABLE_REF,
+ num_retries=_DEFAULT_NUM_RETRIES,
+ rewind=True,
+ size=mock.ANY,
+ job_id=mock.ANY,
+ job_id_prefix=None,
+ location=self.LOCATION,
+ project=None,
+ job_config=mock.ANY,
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+ sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
+ assert sent_config.parquet_options.enable_list_inference is False
+
+ # the original config object should not have been modified
+ assert job_config.to_api_repr() == original_config_copy.to_api_repr()
+
@unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(self):
from google.cloud.bigquery import job
@@ -7253,6 +7424,122 @@ def test_load_table_from_dataframe_struct_fields(self):
assert sent_config.source_format == job.SourceFormat.PARQUET
assert sent_config.schema == schema
+ @unittest.skipIf(pandas is None, "Requires `pandas`")
+ def test_load_table_from_dataframe_array_fields(self):
+ """Test that a DataFrame with array columns can be uploaded correctly.
+
+ See: https://github.com/googleapis/python-bigquery/issues/19
+ """
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+ from google.cloud.bigquery import job
+ from google.cloud.bigquery.schema import SchemaField
+
+ client = self._make_client()
+
+ records = [(3.14, [1, 2])]
+ dataframe = pandas.DataFrame(
+ data=records, columns=["float_column", "array_column"]
+ )
+
+ schema = [
+ SchemaField("float_column", "FLOAT"),
+ SchemaField("array_column", "INTEGER", mode="REPEATED",),
+ ]
+ job_config = job.LoadJobConfig(schema=schema)
+
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+ )
+
+ get_table_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.get_table",
+ autospec=True,
+ side_effect=google.api_core.exceptions.NotFound("Table not found"),
+ )
+
+ with load_patch as load_table_from_file, get_table_patch:
+ client.load_table_from_dataframe(
+ dataframe,
+ self.TABLE_REF,
+ job_config=job_config,
+ location=self.LOCATION,
+ )
+
+ load_table_from_file.assert_called_once_with(
+ client,
+ mock.ANY,
+ self.TABLE_REF,
+ num_retries=_DEFAULT_NUM_RETRIES,
+ rewind=True,
+ size=mock.ANY,
+ job_id=mock.ANY,
+ job_id_prefix=None,
+ location=self.LOCATION,
+ project=None,
+ job_config=mock.ANY,
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+ sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
+ assert sent_config.source_format == job.SourceFormat.PARQUET
+ assert sent_config.schema == schema
+
+ @unittest.skipIf(pandas is None, "Requires `pandas`")
+ def test_load_table_from_dataframe_array_fields_w_auto_schema(self):
+ """Test that a DataFrame with array columns can be uploaded correctly.
+
+ See: https://github.com/googleapis/python-bigquery/issues/19
+ """
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+ from google.cloud.bigquery import job
+ from google.cloud.bigquery.schema import SchemaField
+
+ client = self._make_client()
+
+ records = [(3.14, [1, 2])]
+ dataframe = pandas.DataFrame(
+ data=records, columns=["float_column", "array_column"]
+ )
+
+ expected_schema = [
+ SchemaField("float_column", "FLOAT"),
+ SchemaField("array_column", "INT64", mode="REPEATED",),
+ ]
+
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+ )
+
+ get_table_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.get_table",
+ autospec=True,
+ side_effect=google.api_core.exceptions.NotFound("Table not found"),
+ )
+
+ with load_patch as load_table_from_file, get_table_patch:
+ client.load_table_from_dataframe(
+ dataframe, self.TABLE_REF, location=self.LOCATION,
+ )
+
+ load_table_from_file.assert_called_once_with(
+ client,
+ mock.ANY,
+ self.TABLE_REF,
+ num_retries=_DEFAULT_NUM_RETRIES,
+ rewind=True,
+ size=mock.ANY,
+ job_id=mock.ANY,
+ job_id_prefix=None,
+ location=self.LOCATION,
+ project=None,
+ job_config=mock.ANY,
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+ sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
+ assert sent_config.source_format == job.SourceFormat.PARQUET
+ assert sent_config.schema == expected_schema
+
@unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_partial_schema(self):
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py
index 3dc9dd179..3ef61d738 100644
--- a/tests/unit/test_external_config.py
+++ b/tests/unit/test_external_config.py
@@ -163,7 +163,7 @@ def test_to_api_repr_sheets(self):
options = external_config.GoogleSheetsOptions()
options.skip_leading_rows = 123
options.range = "Sheet1!A5:B10"
- ec._options = options
+ ec.google_sheets_options = options
exp_resource = {
"sourceFormat": "GOOGLE_SHEETS",
@@ -277,7 +277,7 @@ def test_to_api_repr_csv(self):
options.quote_character = "quote"
options.skip_leading_rows = 123
options.allow_jagged_rows = False
- ec._options = options
+ ec.csv_options = options
exp_resource = {
"sourceFormat": "CSV",
@@ -368,7 +368,7 @@ def test_to_api_repr_bigtable(self):
options = external_config.BigtableOptions()
options.ignore_unspecified_column_families = True
options.read_rowkey_as_string = False
- ec._options = options
+ ec.bigtable_options = options
fam1 = external_config.BigtableColumnFamily()
fam1.family_id = "familyId"
@@ -425,10 +425,166 @@ def test_to_api_repr_bigtable(self):
self.assertEqual(got_resource, exp_resource)
- def test_parquet_options_getter(self):
+ def test_avro_options_getter_and_setter(self):
+ from google.cloud.bigquery.external_config import AvroOptions
+
+ options = AvroOptions.from_api_repr({"useAvroLogicalTypes": True})
+ ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.AVRO)
+
+ self.assertIsNone(ec.avro_options.use_avro_logical_types)
+
+ ec.avro_options = options
+
+ self.assertTrue(ec.avro_options.use_avro_logical_types)
+ self.assertIs(
+ ec.options._properties, ec._properties[AvroOptions._RESOURCE_NAME]
+ )
+ self.assertIs(
+ ec.avro_options._properties, ec._properties[AvroOptions._RESOURCE_NAME]
+ )
+
+ def test_avro_options_getter_empty(self):
+ ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.AVRO)
+ self.assertIsNotNone(ec.avro_options)
+
+ def test_avro_options_getter_wrong_format(self):
+ ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV)
+ self.assertIsNone(ec.avro_options)
+
+ def test_avro_options_setter_wrong_format(self):
+ from google.cloud.bigquery.format_options import AvroOptions
+
+ options = AvroOptions()
+ ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV)
+
+ with self.assertRaisesRegex(TypeError, "Cannot set.*source format is CSV"):
+ ec.avro_options = options
+
+ def test_bigtable_options_getter_and_setter(self):
+ from google.cloud.bigquery.external_config import BigtableOptions
+
+ options = BigtableOptions.from_api_repr(
+ {"ignoreUnspecifiedColumnFamilies": True, "readRowkeyAsString": False}
+ )
+ ec = external_config.ExternalConfig(
+ external_config.ExternalSourceFormat.BIGTABLE
+ )
+
+ self.assertIsNone(ec.bigtable_options.ignore_unspecified_column_families)
+ self.assertIsNone(ec.bigtable_options.read_rowkey_as_string)
+
+ ec.bigtable_options = options
+
+ self.assertTrue(ec.bigtable_options.ignore_unspecified_column_families)
+ self.assertFalse(ec.bigtable_options.read_rowkey_as_string)
+ self.assertIs(
+ ec.options._properties, ec._properties[BigtableOptions._RESOURCE_NAME]
+ )
+ self.assertIs(
+ ec.bigtable_options._properties,
+ ec._properties[BigtableOptions._RESOURCE_NAME],
+ )
+
+ def test_bigtable_options_getter_empty(self):
+ ec = external_config.ExternalConfig(
+ external_config.ExternalSourceFormat.BIGTABLE
+ )
+ self.assertIsNotNone(ec.bigtable_options)
+
+ def test_bigtable_options_getter_wrong_format(self):
+ ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV)
+ self.assertIsNone(ec.bigtable_options)
+
+ def test_bigtable_options_setter_wrong_format(self):
+ from google.cloud.bigquery.external_config import BigtableOptions
+
+ options = BigtableOptions()
+ ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV)
+
+ with self.assertRaisesRegex(TypeError, "Cannot set.*source format is CSV"):
+ ec.bigtable_options = options
+
+ def test_csv_options_getter_and_setter(self):
+ from google.cloud.bigquery.external_config import CSVOptions
+
+ options = CSVOptions.from_api_repr(
+ {"allowJaggedRows": True, "allowQuotedNewlines": False}
+ )
+ ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV)
+
+ self.assertIsNone(ec.csv_options.allow_jagged_rows)
+ self.assertIsNone(ec.csv_options.allow_quoted_newlines)
+
+ ec.csv_options = options
+
+ self.assertTrue(ec.csv_options.allow_jagged_rows)
+ self.assertFalse(ec.csv_options.allow_quoted_newlines)
+ self.assertIs(ec.options._properties, ec._properties[CSVOptions._RESOURCE_NAME])
+ self.assertIs(
+ ec.csv_options._properties, ec._properties[CSVOptions._RESOURCE_NAME]
+ )
+
+ def test_csv_options_getter_empty(self):
+ ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV)
+ self.assertIsNotNone(ec.csv_options)
+
+ def test_csv_options_getter_wrong_format(self):
+ ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.AVRO)
+ self.assertIsNone(ec.csv_options)
+
+ def test_csv_options_setter_wrong_format(self):
+ from google.cloud.bigquery.external_config import CSVOptions
+
+ options = CSVOptions()
+ ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.AVRO)
+
+ with self.assertRaisesRegex(TypeError, "Cannot set.*source format is AVRO"):
+ ec.csv_options = options
+
+ def test_google_sheets_options_getter_and_setter(self):
+ from google.cloud.bigquery.external_config import GoogleSheetsOptions
+
+ options = GoogleSheetsOptions.from_api_repr({"skipLeadingRows": "123"})
+ ec = external_config.ExternalConfig(
+ external_config.ExternalSourceFormat.GOOGLE_SHEETS
+ )
+
+ self.assertIsNone(ec.google_sheets_options.skip_leading_rows)
+
+ ec.google_sheets_options = options
+
+ self.assertEqual(ec.google_sheets_options.skip_leading_rows, 123)
+ self.assertIs(
+ ec.options._properties, ec._properties[GoogleSheetsOptions._RESOURCE_NAME]
+ )
+ self.assertIs(
+ ec.google_sheets_options._properties,
+ ec._properties[GoogleSheetsOptions._RESOURCE_NAME],
+ )
+
+ def test_google_sheets_options_getter_empty(self):
+ ec = external_config.ExternalConfig(
+ external_config.ExternalSourceFormat.GOOGLE_SHEETS
+ )
+ self.assertIsNotNone(ec.google_sheets_options)
+
+ def test_google_sheets_options_getter_wrong_format(self):
+ ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV)
+ self.assertIsNone(ec.google_sheets_options)
+
+ def test_google_sheets_options_setter_wrong_format(self):
+ from google.cloud.bigquery.external_config import GoogleSheetsOptions
+
+ options = GoogleSheetsOptions()
+ ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV)
+
+ with self.assertRaisesRegex(TypeError, "Cannot set.*source format is CSV"):
+ ec.google_sheets_options = options
+
+ def test_parquet_options_getter_and_setter(self):
from google.cloud.bigquery.format_options import ParquetOptions
- parquet_options = ParquetOptions.from_api_repr(
+ options = ParquetOptions.from_api_repr(
{"enumAsString": True, "enableListInference": False}
)
ec = external_config.ExternalConfig(
@@ -438,32 +594,50 @@ def test_parquet_options_getter(self):
self.assertIsNone(ec.parquet_options.enum_as_string)
self.assertIsNone(ec.parquet_options.enable_list_inference)
- ec._options = parquet_options
+ ec.parquet_options = options
self.assertTrue(ec.parquet_options.enum_as_string)
self.assertFalse(ec.parquet_options.enable_list_inference)
+ self.assertIs(
+ ec.options._properties, ec._properties[ParquetOptions._RESOURCE_NAME]
+ )
+ self.assertIs(
+ ec.parquet_options._properties,
+ ec._properties[ParquetOptions._RESOURCE_NAME],
+ )
- self.assertIs(ec.parquet_options, ec.options)
-
- def test_parquet_options_getter_non_parquet_format(self):
- ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV)
- self.assertIsNone(ec.parquet_options)
+ def test_parquet_options_set_properties(self):
+ """Check that setting sub-properties works without having to create a
+ new ParquetOptions instance.
- def test_parquet_options_setter(self):
- from google.cloud.bigquery.format_options import ParquetOptions
+ This is required for compatibility with previous
+ ExternalConfig._options implementation.
+ """
- parquet_options = ParquetOptions.from_api_repr(
- {"enumAsString": False, "enableListInference": True}
- )
ec = external_config.ExternalConfig(
external_config.ExternalSourceFormat.PARQUET
)
- ec.parquet_options = parquet_options
+ self.assertIsNone(ec.parquet_options.enum_as_string)
+ self.assertIsNone(ec.parquet_options.enable_list_inference)
+
+ ec.parquet_options.enum_as_string = True
+ ec.parquet_options.enable_list_inference = False
+
+ self.assertTrue(ec.options.enum_as_string)
+ self.assertFalse(ec.options.enable_list_inference)
+ self.assertTrue(ec.parquet_options.enum_as_string)
+ self.assertFalse(ec.parquet_options.enable_list_inference)
+
+ def test_parquet_options_getter_empty(self):
+ ec = external_config.ExternalConfig(
+ external_config.ExternalSourceFormat.PARQUET
+ )
+ self.assertIsNotNone(ec.parquet_options)
- # Setting Parquet options should be reflected in the generic options attribute.
- self.assertFalse(ec.options.enum_as_string)
- self.assertTrue(ec.options.enable_list_inference)
+ def test_parquet_options_getter_non_parquet_format(self):
+ ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV)
+ self.assertIsNone(ec.parquet_options)
def test_parquet_options_setter_non_parquet_format(self):
from google.cloud.bigquery.format_options import ParquetOptions
@@ -514,7 +688,7 @@ def test_to_api_repr_parquet(self):
options = ParquetOptions.from_api_repr(
dict(enumAsString=False, enableListInference=True)
)
- ec._options = options
+ ec.parquet_options = options
exp_resource = {
"sourceFormat": external_config.ExternalSourceFormat.PARQUET,
@@ -584,6 +758,117 @@ def test_to_api_repr_decimal_target_types_unset(self):
ec.decimal_target_types = None # No error if unsetting when already unset.
+class BigtableOptions(unittest.TestCase):
+ def test_to_api_repr(self):
+ options = external_config.BigtableOptions()
+ family1 = external_config.BigtableColumnFamily()
+ column1 = external_config.BigtableColumn()
+ column1.qualifier_string = "col1"
+ column1.field_name = "bqcol1"
+ column1.type_ = "FLOAT"
+ column1.encoding = "TEXT"
+ column1.only_read_latest = True
+ column2 = external_config.BigtableColumn()
+ column2.qualifier_encoded = b"col2"
+ column2.field_name = "bqcol2"
+ column2.type_ = "STRING"
+ column2.only_read_latest = False
+ family1.family_id = "family1"
+ family1.type_ = "INTEGER"
+ family1.encoding = "BINARY"
+ family1.columns = [column1, column2]
+ family1.only_read_latest = False
+ family2 = external_config.BigtableColumnFamily()
+ column3 = external_config.BigtableColumn()
+ column3.qualifier_string = "col3"
+ family2.family_id = "family2"
+ family2.type_ = "BYTES"
+ family2.encoding = "TEXT"
+ family2.columns = [column3]
+ family2.only_read_latest = True
+ options.column_families = [family1, family2]
+ options.ignore_unspecified_column_families = False
+ options.read_rowkey_as_string = True
+
+ resource = options.to_api_repr()
+
+ expected_column_families = [
+ {
+ "familyId": "family1",
+ "type": "INTEGER",
+ "encoding": "BINARY",
+ "columns": [
+ {
+ "qualifierString": "col1",
+ "fieldName": "bqcol1",
+ "type": "FLOAT",
+ "encoding": "TEXT",
+ "onlyReadLatest": True,
+ },
+ {
+ "qualifierEncoded": "Y29sMg==",
+ "fieldName": "bqcol2",
+ "type": "STRING",
+ "onlyReadLatest": False,
+ },
+ ],
+ "onlyReadLatest": False,
+ },
+ {
+ "familyId": "family2",
+ "type": "BYTES",
+ "encoding": "TEXT",
+ "columns": [{"qualifierString": "col3"}],
+ "onlyReadLatest": True,
+ },
+ ]
+ self.maxDiff = None
+ self.assertEqual(
+ resource,
+ {
+ "columnFamilies": expected_column_families,
+ "ignoreUnspecifiedColumnFamilies": False,
+ "readRowkeyAsString": True,
+ },
+ )
+
+
+class CSVOptions(unittest.TestCase):
+ def test_to_api_repr(self):
+ options = external_config.CSVOptions()
+ options.field_delimiter = "\t"
+ options.skip_leading_rows = 42
+ options.quote_character = '"'
+ options.allow_quoted_newlines = True
+ options.allow_jagged_rows = False
+ options.encoding = "UTF-8"
+
+ resource = options.to_api_repr()
+
+ self.assertEqual(
+ resource,
+ {
+ "fieldDelimiter": "\t",
+ "skipLeadingRows": "42",
+ "quote": '"',
+ "allowQuotedNewlines": True,
+ "allowJaggedRows": False,
+ "encoding": "UTF-8",
+ },
+ )
+
+
+class TestGoogleSheetsOptions(unittest.TestCase):
+ def test_to_api_repr(self):
+ options = external_config.GoogleSheetsOptions()
+ options.range = "sheet1!A1:B20"
+ options.skip_leading_rows = 107
+
+ resource = options.to_api_repr()
+
+ self.assertEqual(resource, {"range": "sheet1!A1:B20", "skipLeadingRows": "107"})
+
+
def _copy_and_update(d, u):
d = copy.deepcopy(d)
d.update(u)
diff --git a/tests/unit/test_format_options.py b/tests/unit/test_format_options.py
index ab5f9e05c..c8fecbfa6 100644
--- a/tests/unit/test_format_options.py
+++ b/tests/unit/test_format_options.py
@@ -13,6 +13,29 @@
# limitations under the License.
+class TestAvroOptions:
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.format_options import AvroOptions
+
+ return AvroOptions
+
+ def test_ctor(self):
+ config = self._get_target_class()()
+ assert config.use_avro_logical_types is None
+
+ def test_from_api_repr(self):
+ config = self._get_target_class().from_api_repr({"useAvroLogicalTypes": True})
+ assert config.use_avro_logical_types
+
+ def test_to_api_repr(self):
+ config = self._get_target_class()()
+ config.use_avro_logical_types = False
+
+ result = config.to_api_repr()
+ assert result == {"useAvroLogicalTypes": False}
+
+
class TestParquetOptions:
@staticmethod
def _get_target_class():
diff --git a/tests/unit/test_legacy_types.py b/tests/unit/test_legacy_types.py
index 49ccb8e5a..4638d3762 100644
--- a/tests/unit/test_legacy_types.py
+++ b/tests/unit/test_legacy_types.py
@@ -15,7 +15,7 @@
import warnings
-def test_imprting_legacy_types_emits_warning():
+def test_importing_legacy_types_emits_warning():
with warnings.catch_warnings(record=True) as warned:
from google.cloud.bigquery_v2 import types # noqa: F401