Skip to content

Commit e6bd547

Browse files
committed
Merge pull request #2 from elibixby/bigquery
Samples from bigquery-samples-python
2 parents 3bf742d + 2e85600 commit e6bd547

17 files changed

+835
-0
lines changed

bigquery/requirements.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
argparse==1.2.1
2+
google-api-python-client==1.3.2
3+
httplib2==0.9
4+
oauth2client==1.4.6
5+
py==1.4.26
6+
pyasn1==0.1.7
7+
pyasn1-modules==0.0.5
8+
rsa==3.1.4
9+
simplejson==3.6.5
10+
six==1.9.0
11+
tox==1.9.0
12+
uritemplate==0.6
13+
virtualenv==12.0.7
14+
wsgiref==0.1.2

bigquery/samples/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2015, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
#

bigquery/samples/async_query.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Copyright 2015, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
#
14+
from __future__ import print_function # For python 2/3 interoperability
15+
from samples.utils import get_service, paging, poll_job
16+
import uuid
17+
import json
18+
19+
20+
# [START async_query]
21+
def async_query(service, project_id, query, batch=False, num_retries=5):
22+
# Generate a unique job_id so retries
23+
# don't accidentally duplicate query
24+
job_data = {
25+
'jobReference': {
26+
'projectId': project_id,
27+
'job_id': str(uuid.uuid4())
28+
},
29+
'configuration': {
30+
'query': {
31+
'query': query,
32+
'priority': 'BATCH' if batch else 'INTERACTIVE',
33+
},
34+
}
35+
}
36+
return service.jobs().insert(
37+
projectId=project_id,
38+
body=job_data).execute(num_retries=num_retries)
39+
# [END async_query]
40+
41+
42+
# [START run]
43+
def run(project_id, query_string, batch, num_retries, interval):
44+
service = get_service()
45+
46+
query_job = async_query(service,
47+
project_id,
48+
query_string,
49+
batch,
50+
num_retries)
51+
52+
poll_job(service,
53+
query_job['jobReference']['projectId'],
54+
query_job['jobReference']['jobId'],
55+
interval,
56+
num_retries)
57+
58+
59+
for page in paging(service,
60+
service.jobs().getQueryResults,
61+
num_retries=num_retries,
62+
**query_job['jobReference']):
63+
64+
yield json.dumps(page['rows'])
65+
# [END run]
66+
67+
68+
# [START main]
69+
def main():
70+
project_id = raw_input("Enter the project ID: ")
71+
query_string = raw_input("Enter the Bigquery SQL Query: ")
72+
batch = raw_input("Run query as batch (y/n)?: ") in ('True',
73+
'true',
74+
'y',
75+
'Y',
76+
'yes',
77+
'Yes')
78+
79+
80+
num_retries = raw_input(
81+
"Enter number of times to retry in case of 500 error: ")
82+
interval = raw_input(
83+
"Enter how often to poll the query for completion (seconds): ")
84+
85+
for result in run(project_id, query_string, batch, num_retries, interval):
86+
print(result)
87+
# [END main]

bigquery/samples/discovery_doc.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Copyright 2015, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
#
14+
import os
15+
import json
16+
import httplib2
17+
import time
18+
19+
# [START build_and_update]
20+
21+
RESOURCE_PATH='..' #look for discovery docs in the parent folder
22+
MAX_AGE = 86400 #update discovery docs older than a day
23+
24+
# A module that takes care of caching and updating discovery docs
25+
# for google-api-python-clients (until such a feature is integrated)
26+
27+
28+
def build_and_update(api, version):
29+
from oauth2client.client import GoogleCredentials
30+
from googleapiclient.discovery import build_from_document
31+
32+
33+
path = os.path.join(RESOURCE_PATH, '{}.{}'.format(api, version))
34+
try:
35+
age = time.time() - os.path.getmtime(path)
36+
if age > MAX_AGE:
37+
_update_discovery_doc(api, version, path)
38+
except os.error:
39+
_update_discovery_doc(api, version, path)
40+
41+
with open(path, 'r') as discovery_doc:
42+
return build_from_document(discovery_doc.read(),
43+
http=httplib2.Http(),
44+
credentials=GoogleCredentials
45+
.get_application_default())
46+
47+
def _update_discovery_doc(api, version, path):
48+
from apiclient.discovery import DISCOVERY_URI
49+
from apiclient.errors import HttpError
50+
from apiclient.errors import InvalidJsonError
51+
import uritemplate
52+
53+
requested_url = uritemplate.expand(DISCOVERY_URI,
54+
{'api': api, 'apiVersion': version})
55+
resp, content = httplib2.Http().request(requested_url)
56+
if resp.status >= 400:
57+
raise HttpError(resp, content, uri=requested_url)
58+
try:
59+
with open(path, 'w') as discovery_doc:
60+
discovery_json = json.loads(content)
61+
json.dump(discovery_json, discovery_doc)
62+
except ValueError:
63+
raise InvalidJsonError(
64+
'Bad JSON: %s from %s.' % (content, requested_url))
65+
# [END build_and_update]
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
from samples.utils import get_service, poll_job
2+
import uuid
3+
4+
5+
# [START export_table]
6+
def export_table(service, cloud_storage_path,
7+
projectId, datasetId, tableId,
8+
num_retries=5):
9+
# Generate a unique job_id so retries
10+
# don't accidentally duplicate export
11+
job_data = {
12+
'jobReference': {
13+
'projectId': projectId,
14+
'jobId': str(uuid.uuid4())
15+
},
16+
'configuration': {
17+
'extract': {
18+
'sourceTable': {
19+
'projectId': projectId,
20+
'datasetId': datasetId,
21+
'tableId': tableId,
22+
},
23+
'destinationUris': [cloud_storage_path],
24+
}
25+
}
26+
}
27+
return service.jobs().insert(
28+
projectId=projectId,
29+
body=job_data).execute(num_retries=num_retries)
30+
# [END export_table]
31+
32+
33+
# [START run]
34+
def run(cloud_storage_path,
35+
projectId, datasetId, tableId,
36+
num_retries, interval):
37+
38+
bigquery = get_service()
39+
resource = export_table(bigquery, cloud_storage_path,
40+
projectId, datasetId, tableId, num_retries)
41+
poll_job(bigquery,
42+
resource['jobReference']['projectId'],
43+
resource['jobReference']['jobId'],
44+
interval,
45+
num_retries)
46+
# [END run]
47+
48+
49+
# [START main]
50+
def main():
51+
projectId = raw_input("Enter the project ID: ")
52+
datasetId = raw_input("Enter a dataset ID: ")
53+
tableId = raw_input("Enter a table name to copy: ")
54+
cloud_storage_path = raw_input(
55+
"Enter a Google Cloud Storage URI: ")
56+
interval = raw_input(
57+
"Enter how often to poll the job (in seconds): ")
58+
num_retries = raw_input(
59+
"Enter the number of retries in case of 500 error: ")
60+
61+
run(cloud_storage_path,
62+
projectId, datasetId, tableId,
63+
num_retries, interval)
64+
65+
print 'Done exporting!'
66+
# [END main]

bigquery/samples/load_data_by_post.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Copyright 2015, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
#
14+
import json
15+
import httplib2
16+
from samples.utils import get_service, poll_job
17+
from oauth2client.client import GoogleCredentials
18+
19+
20+
# [START make_post]
21+
def make_post(http, schema, data, projectId, datasetId, tableId):
22+
url = ('https://www.googleapis.com/upload/bigquery/v2/projects/' +
23+
projectId + '/jobs')
24+
# Create the body of the request, separated by a boundary of xxx
25+
resource = ('--xxx\n' +
26+
'Content-Type: application/json; charset=UTF-8\n' + '\n' +
27+
'{\n' +
28+
' "configuration": {\n' +
29+
' "load": {\n' +
30+
' "schema": {\n'
31+
' "fields": ' + str(schema) + '\n' +
32+
' },\n' +
33+
' "destinationTable": {\n' +
34+
' "projectId": "' + projectId + '",\n' +
35+
' "datasetId": "' + datasetId + '",\n' +
36+
' "tableId": "' + tableId + '"\n' +
37+
' }\n' +
38+
' }\n' +
39+
' }\n' +
40+
'}\n' +
41+
'--xxx\n' +
42+
'Content-Type: application/octet-stream\n' +
43+
'\n')
44+
# Append data to the request body
45+
resource += data
46+
47+
# Signify the end of the body
48+
resource += ('--xxx--\n')
49+
50+
headers = {'Content-Type': 'multipart/related; boundary=xxx'}
51+
52+
return http.request(url,
53+
method='POST',
54+
body=resource,
55+
headers=headers)
56+
# [END make_post]
57+
58+
59+
# [START main]
60+
def main():
61+
credentials = GoogleCredentials.get_application_default()
62+
http = credentials.authorize(httplib2.Http())
63+
projectId = raw_input('Enter the project ID: ')
64+
datasetId = raw_input('Enter a dataset ID: ')
65+
tableId = raw_input('Enter a table name to load the data to: ')
66+
schema_path = raw_input(
67+
'Enter the path to the schema file for the table: ')
68+
69+
with open(schema_path, 'r') as schema_file:
70+
schema = schema_file.read()
71+
72+
data_path = raw_input('Enter the path to the data file: ')
73+
74+
with open(data_path, 'r') as data_file:
75+
data = data_file.read()
76+
77+
resp, content = make_post(http,
78+
schema,
79+
data,
80+
projectId,
81+
datasetId,
82+
tableId)
83+
84+
if resp.status == 200:
85+
job_resource = json.loads(content)
86+
service = get_service(credentials)
87+
poll_job(service, **job_resource['jobReference'])
88+
print("Success!")
89+
else:
90+
print("Http error code: {}".format(resp.status))
91+
# [END main]
92+
93+
if __name__ == '__main__':
94+
main()

0 commit comments

Comments
 (0)