diff --git a/bigquery/cloud-client/export_data_to_gcs.py b/bigquery/cloud-client/export_data_to_gcs.py new file mode 100644 index 00000000000..c9771ea1b0d --- /dev/null +++ b/bigquery/cloud-client/export_data_to_gcs.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Exports data from BigQuery to an object in Google Cloud Storage. + +For more information, see the README.md under /bigquery. + +Example invocation: + $ python export_data_to_gcs.py example_dataset example_table \ + gs://example-bucket/example-data.csv + +The dataset and table should already exist. +""" + +import argparse +import time +import uuid + +from gcloud import bigquery + + +def export_data_to_gcs(dataset_name, table_name, destination): + bigquery_client = bigquery.Client() + dataset = bigquery_client.dataset(dataset_name) + table = dataset.table(table_name) + job_name = str(uuid.uuid4()) + + job = bigquery_client.extract_table_to_storage( + job_name, table, destination) + + job.begin() + + wait_for_job(job) + + print('Exported {}:{} to {}'.format( + dataset_name, table_name, destination)) + + +def wait_for_job(job): + while True: + job.reload() + if job.state == 'DONE': + if job.error_result: + raise RuntimeError(job.error_result) + return + time.sleep(1) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('dataset_name') + parser.add_argument('table_name') + parser.add_argument( + 'destination', help='The desintation Google Cloud Storage object.' + 'Must be in the format gs://bucket_name/object_name') + + args = parser.parse_args() + + export_data_to_gcs( + args.dataset_name, + args.table_name, + args.destination) diff --git a/bigquery/cloud-client/export_data_to_gcs_test.py b/bigquery/cloud-client/export_data_to_gcs_test.py new file mode 100644 index 00000000000..e260e47b4f7 --- /dev/null +++ b/bigquery/cloud-client/export_data_to_gcs_test.py @@ -0,0 +1,30 @@ +# Copyright 2015, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import export_data_to_gcs + + +DATASET_ID = 'test_dataset' +TABLE_ID = 'test_import_table' + + +def test_export_data_to_gcs(cloud_config, capsys): + export_data_to_gcs.export_data_to_gcs( + DATASET_ID, + TABLE_ID, + 'gs://{}/test-export-data-to-gcs.csv'.format( + cloud_config.storage_bucket)) + + out, _ = capsys.readouterr() + + assert 'Exported' in out diff --git a/bigquery/cloud-client/load_data_from_file.py b/bigquery/cloud-client/load_data_from_file.py new file mode 100644 index 00000000000..cbb01534735 --- /dev/null +++ b/bigquery/cloud-client/load_data_from_file.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Loads data into BigQuery from a local file. + +For more information, see the README.md under /bigquery. + +Example invocation: + $ python load_data_from_file.py example_dataset example_table \ + example-data.csv + +The dataset and table should already exist. +""" + +import argparse +import time +from gcloud import bigquery + + +def load_data_from_file(dataset_name, table_name, source_file_name): + bigquery_client = bigquery.Client() + dataset = bigquery_client.dataset(dataset_name) + table = dataset.table(table_name) + + # Reload the table to get the schema. + table.reload() + + with open(source_file_name, 'rb') as source_file: + # This example uses CSV, but you can use other formats. + # See https://cloud.google.com/bigquery/loading-data + job = table.upload_from_file( + source_file, source_format='text/csv') + + job.begin() + + wait_for_job(job) + + print('Loaded {} rows into {}:{}.'.format( + job.output_rows, dataset_name, table_name)) + + +def wait_for_job(job): + while True: + job.reload() + if job.state == 'DONE': + if job.error_result: + raise RuntimeError(job.error_result) + return + time.sleep(1) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('dataset_name') + parser.add_argument('table_name') + parser.add_argument( + 'source_file_name', help='Path to a .csv file to upload.') + + args = parser.parse_args() + + load_data_from_file( + args.dataset_name, + args.table_name, + args.source_file_name) diff --git a/bigquery/cloud-client/load_data_from_file_test.py b/bigquery/cloud-client/load_data_from_file_test.py new file mode 100644 index 00000000000..eccefe03843 --- /dev/null +++ b/bigquery/cloud-client/load_data_from_file_test.py @@ -0,0 +1,35 @@ +# Copyright 2015, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import load_data_from_file + +DATASET_ID = 'test_dataset' +TABLE_ID = 'test_import_table' + + +@pytest.mark.xfail( + strict=True, + reason='https://github.com/GoogleCloudPlatform/gcloud-python/issues/2133') +def test_load_table(resource, capsys): + data_path = resource('data.csv') + + load_data_from_file.load_data_from_file( + DATASET_ID, + TABLE_ID, + data_path) + + out, _ = capsys.readouterr() + + assert 'Loaded 1 rows' in out diff --git a/bigquery/cloud-client/load_data_from_gcs.py b/bigquery/cloud-client/load_data_from_gcs.py new file mode 100644 index 00000000000..1a577be649c --- /dev/null +++ b/bigquery/cloud-client/load_data_from_gcs.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Loads data into BigQuery from an object in Google Cloud Storage. + +For more information, see the README.md under /bigquery. + +Example invocation: + $ python load_data_from_gcs.py example_dataset example_table \ + gs://example-bucket/example-data.csv + +The dataset and table should already exist. +""" + +import argparse +import time +import uuid + +from gcloud import bigquery + + +def load_data_from_gcs(dataset_name, table_name, source): + bigquery_client = bigquery.Client() + dataset = bigquery_client.dataset(dataset_name) + table = dataset.table(table_name) + job_name = str(uuid.uuid4()) + + job = bigquery_client.load_table_from_storage( + job_name, table, source) + + job.begin() + + wait_for_job(job) + + print('Loaded {} rows into {}:{}.'.format( + job.output_rows, dataset_name, table_name)) + + +def wait_for_job(job): + while True: + job.reload() + if job.state == 'DONE': + if job.error_result: + raise RuntimeError(job.error_result) + return + time.sleep(1) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('dataset_name') + parser.add_argument('table_name') + parser.add_argument( + 'source', help='The Google Cloud Storage object to load. Must be in ' + 'the format gs://bucket_name/object_name') + + args = parser.parse_args() + + load_data_from_gcs( + args.dataset_name, + args.table_name, + args.source) diff --git a/bigquery/cloud-client/load_data_from_gcs_test.py b/bigquery/cloud-client/load_data_from_gcs_test.py new file mode 100644 index 00000000000..2d1c66162c0 --- /dev/null +++ b/bigquery/cloud-client/load_data_from_gcs_test.py @@ -0,0 +1,31 @@ +# Copyright 2015, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import load_data_from_gcs + +DATASET_ID = 'test_dataset' +TABLE_ID = 'test_import_table' + + +def test_load_table(cloud_config, capsys): + cloud_storage_input_uri = 'gs://{}/data.csv'.format( + cloud_config.storage_bucket) + + load_data_from_gcs.load_data_from_gcs( + DATASET_ID, + TABLE_ID, + cloud_storage_input_uri) + + out, _ = capsys.readouterr() + + assert 'Loaded 1 rows' in out diff --git a/bigquery/cloud-client/resources/data.csv b/bigquery/cloud-client/resources/data.csv new file mode 100644 index 00000000000..230a96b559d --- /dev/null +++ b/bigquery/cloud-client/resources/data.csv @@ -0,0 +1 @@ +Gandalf, 2000, 140.0, 1 diff --git a/bigquery/cloud-client/resources/data.json b/bigquery/cloud-client/resources/data.json new file mode 100644 index 00000000000..b8eef90c591 --- /dev/null +++ b/bigquery/cloud-client/resources/data.json @@ -0,0 +1 @@ +{"Name": "Gandalf", "Age": 2000, "Weight": 140.0, "IsMagic": true} diff --git a/bigquery/cloud-client/resources/schema.json b/bigquery/cloud-client/resources/schema.json new file mode 100644 index 00000000000..a48971ef857 --- /dev/null +++ b/bigquery/cloud-client/resources/schema.json @@ -0,0 +1 @@ +[{"type": "STRING", "name": "Name"}, {"type": "INTEGER", "name": "Age"}, {"type": "FLOAT", "name": "Weight"}, {"type": "BOOLEAN", "name": "IsMagic"}] \ No newline at end of file diff --git a/bigquery/cloud-client/snippets.py b/bigquery/cloud-client/snippets.py new file mode 100644 index 00000000000..bcf534c8d34 --- /dev/null +++ b/bigquery/cloud-client/snippets.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Samples that demonstrate basic operations in the BigQuery API. + +For more information, see the README.md under /bigquery. + +Example invocation: + $ python snippets.py list-datasets + +The dataset and table should already exist. +""" + +import argparse + +from gcloud import bigquery + + +def list_projects(): + raise NotImplementedError( + 'https://github.com/GoogleCloudPlatform/gcloud-python/issues/2143') + + +def list_datasets(project=None): + """Lists all datasets in a given project. + + If no project is specified, then the currently active project is used + """ + bigquery_client = bigquery.Client(project=project) + + datasets = [] + page_token = None + + while True: + results, page_token = bigquery_client.list_datasets( + page_token=page_token) + datasets.extend(results) + + if not page_token: + break + + for dataset in datasets: + print(dataset.name) + + +def list_tables(dataset_name, project=None): + """Lists all of the tables in a given dataset. + + If no project is specified, then the currently active project is used. + """ + bigquery_client = bigquery.Client(project=project) + dataset = bigquery_client.dataset(dataset_name) + + if not dataset.exists(): + print('Dataset {} does not exist.'.format(dataset_name)) + return + + tables = [] + page_token = None + + while True: + results, page_token = dataset.list_tables(page_token=page_token) + tables.extend(results) + + if not page_token: + break + + for table in tables: + print(table.name) + + +def list_rows(dataset_name, table_name, project=None): + """Prints rows in the given table. + + Will print 25 rows at most for brevity as tables can contain large amounts + of rows. + + If no project is specified, then the currently active project is used. + """ + bigquery_client = bigquery.Client(project=project) + dataset = bigquery_client.dataset(dataset_name) + table = dataset.table(table_name) + + if not table.exists(): + print('Table {}:{} does not exist.'.format(dataset_name, table_name)) + return + + # Reload the table so that the schema is available. + table.reload() + + rows = [] + page_token = None + + while len(rows) < 25: + results, total_rows, page_token = table.fetch_data( + max_results=25, page_token=page_token) + rows.extend(results) + + if not page_token: + break + + # Use format to create a simple table. + format_string = '{:<16} ' * len(table.schema) + + # Print schema field names + field_names = [field.name for field in table.schema] + print(format_string.format(*field_names)) + + for row in rows: + print(format_string.format(*row)) + + +def delete_table(dataset_name, table_name, project=None): + """Deletes a table in a given dataset. + + If no project is specified, then the currently active project is used. + """ + bigquery_client = bigquery.Client(project=project) + dataset = bigquery_client.dataset(dataset_name) + table = dataset.table(table_name) + + table.delete() + + print('Table {}:{} deleted.'.format(dataset_name, table_name)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('--project', default=None) + + subparsers = parser.add_subparsers(dest='command') + + list_datasets_parser = subparsers.add_parser( + 'list-datasets', help=list_datasets.__doc__) + + list_tables_parser = subparsers.add_parser( + 'list-tables', help=list_tables.__doc__) + list_tables_parser.add_argument('dataset_name') + + list_rows_parser = subparsers.add_parser( + 'list-rows', help=list_rows.__doc__) + list_rows_parser.add_argument('dataset_name') + list_rows_parser.add_argument('table_name') + + delete_table_parser = subparsers.add_parser( + 'delete-table', help=delete_table.__doc__) + delete_table_parser.add_argument('dataset_name') + delete_table_parser.add_argument('table_name') + + args = parser.parse_args() + + if args.command == 'list-datasets': + list_datasets(args.project) + elif args.command == 'list-tables': + list_tables(args.dataset_name, args.project) + elif args.command == 'list-rows': + list_rows(args.dataset_name, args.table_name, args.project) + elif args.command == 'delete-table': + delete_table(args.dataset_name, args.table_name, args.project) diff --git a/bigquery/cloud-client/snippets_test.py b/bigquery/cloud-client/snippets_test.py new file mode 100644 index 00000000000..0a52922b9ac --- /dev/null +++ b/bigquery/cloud-client/snippets_test.py @@ -0,0 +1,77 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from gcloud import bigquery +import pytest + +import snippets + + +DATASET_ID = 'test_dataset' +TABLE_ID = 'test_import_table' + + +@pytest.mark.xfail( + strict=True, + reason='https://github.com/GoogleCloudPlatform/gcloud-python/issues/2143') +def test_list_projects(): + snippets.list_projects() + # No need to check the ouput, lack of exception is enough. + + +def test_list_datasets(capsys): + # Requires the dataset to have been created in the test project. + snippets.list_datasets() + + out, _ = capsys.readouterr() + + assert DATASET_ID in out + + +def test_list_tables(capsys): + # Requires teh dataset and table to have been created in the test project. + snippets.list_tables(DATASET_ID) + + out, _ = capsys.readouterr() + + assert TABLE_ID in out + + +def test_list_rows(capsys): + # Requires the dataset and table to have been created in the test project. + + # Check for the schema. It's okay if the table is empty as long as there + # aren't any errors. + + snippets.list_rows(DATASET_ID, TABLE_ID) + + out, _ = capsys.readouterr() + + assert 'Name' in out + assert 'Age' in out + + +def test_delete_table(capsys): + # Create a table to delete + bigquery_client = bigquery.Client() + dataset = bigquery_client.dataset(DATASET_ID) + table = dataset.table('test_delete_table') + + if not table.exists(): + table.schema = [bigquery.SchemaField('id', 'INTEGER')] + table.create() + + snippets.delete_table(DATASET_ID, table.name) + + assert not table.exists() diff --git a/bigquery/cloud-client/stream_data.py b/bigquery/cloud-client/stream_data.py new file mode 100644 index 00000000000..5df6be11444 --- /dev/null +++ b/bigquery/cloud-client/stream_data.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Loads a single row of data directly into BigQuery. + +For more information, see the README.md under /bigquery. + +Example invocation: + $ python stream_data.py example_dataset example_table \ + '["Gandalf", 2000]' + +The dataset and table should already exist. +""" + +import argparse +import json +from pprint import pprint + +from gcloud import bigquery + + +def stream_data(dataset_name, table_name, json_data): + bigquery_client = bigquery.Client() + dataset = bigquery_client.dataset(dataset_name) + table = dataset.table(table_name) + data = json.loads(json_data) + + # Reload the table to get the schema. + table.reload() + + rows = [data] + errors = table.insert_data(rows) + + if not errors: + print('Loaded 1 row into {}:{}'.format(dataset_name, table_name)) + else: + print('Errors:') + pprint(errors) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('dataset_name') + parser.add_argument('table_name') + parser.add_argument( + 'json_data', + help='The row to load into BigQuery as an array in JSON format.') + + args = parser.parse_args() + + stream_data( + args.dataset_name, + args.table_name, + args.json_data) diff --git a/bigquery/cloud-client/stream_data_test.py b/bigquery/cloud-client/stream_data_test.py new file mode 100644 index 00000000000..25982b88ad5 --- /dev/null +++ b/bigquery/cloud-client/stream_data_test.py @@ -0,0 +1,29 @@ +# Copyright 2015, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import stream_data + + +DATASET_ID = 'test_dataset' +TABLE_ID = 'test_import_table' + + +def test_stream_data(capsys): + stream_data.stream_data( + DATASET_ID, + TABLE_ID, + '["Gandalf", 2000]') + + out, _ = capsys.readouterr() + + assert 'Loaded 1 row' in out