Skip to content

Commit b0a9682

Browse files
committed
Merge pull request #127 from GoogleCloudPlatform/data-by-post
Update sample to use native sdk functionality.
2 parents 6c724cc + cb57b8f commit b0a9682

File tree

1 file changed

+39
-71
lines changed

1 file changed

+39
-71
lines changed

bigquery/api/load_data_by_post.py

Lines changed: 39 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
1818
This sample is used on this page:
1919
20-
https://cloud.google.com/bigquery/loading-data-post-request
20+
https://cloud.google.com/bigquery/loading-data-into-bigquery
2121
2222
For more information, see the README.md under /bigquery.
2323
"""
@@ -26,74 +26,60 @@
2626
import json
2727
import time
2828

29+
from apiclient.http import MediaFileUpload
30+
2931
from googleapiclient import discovery
30-
import httplib2
32+
3133
from oauth2client.client import GoogleCredentials
3234

3335

3436
# [START make_post]
35-
def make_post(http, schema, data, project_id, dataset_id, table_id):
37+
def load_data(schema_path, data_path, project_id, dataset_id, table_id):
3638
"""
3739
Creates an http POST request for loading data into
3840
a bigquery table
3941
4042
Args:
41-
http: an authorized httplib2 client,
42-
schema: a valid bigquery schema,
43-
see https://cloud.google.com/bigquery/docs/reference/v2/tables,
44-
data: valid JSON to insert into the table
43+
schema_path: the path to a file containing a valid bigquery schema.
44+
see https://cloud.google.com/bigquery/docs/reference/v2/tables
45+
data_path: the name of the file to insert into the table.
4546
4647
Returns: an http.request object
4748
"""
48-
url = ('https://www.googleapis.com/upload/bigquery/v2/projects/' +
49-
project_id + '/jobs')
50-
# Create the body of the request, separated by a boundary of xxx
51-
resource = ('--xxx\n' +
52-
'Content-Type: application/json; charset=UTF-8\n' + '\n' +
53-
'{\n' +
54-
' "configuration": {\n' +
55-
' "load": {\n' +
56-
' "schema": {\n'
57-
' "fields": ' + str(schema) + '\n' +
58-
' },\n' +
59-
' "destinationTable": {\n' +
60-
' "projectId": "' + project_id + '",\n' +
61-
' "datasetId": "' + dataset_id + '",\n' +
62-
' "tableId": "' + table_id + '"\n' +
63-
' }\n' +
64-
' }\n' +
65-
' }\n' +
66-
'}\n' +
67-
'--xxx\n' +
68-
'Content-Type: application/octet-stream\n' +
69-
'\n')
70-
# Append data to the request body
71-
resource += data
72-
73-
# Signify the end of the body
74-
resource += ('--xxx--\n')
75-
76-
headers = {'Content-Type': 'multipart/related; boundary=xxx'}
77-
78-
return http.request(url,
79-
method='POST',
80-
body=resource,
81-
headers=headers)
82-
# [END make_post]
83-
84-
85-
# [START poll_job]
86-
def poll_job(bigquery, job):
87-
"""Waits for a job to complete."""
49+
# Create a bigquery service object, using the application's default auth
50+
credentials = GoogleCredentials.get_application_default()
51+
bigquery = discovery.build('bigquery', 'v2', credentials=credentials)
52+
53+
insert_request = bigquery.jobs().insert(
54+
projectId=project_id,
55+
body={
56+
"configuration": {
57+
"load": {
58+
"schema": {
59+
"fields": json.load(open(schema_path, 'r'))
60+
},
61+
"destinationTable": {
62+
"projectId": project_id,
63+
"datasetId": dataset_id,
64+
"tableId": table_id
65+
}
66+
}
67+
}
68+
},
69+
media_body=MediaFileUpload(
70+
data_path,
71+
mimetype="application/octet-stream"))
72+
73+
job = insert_request.execute()
8874

8975
print('Waiting for job to finish...')
9076

91-
request = bigquery.jobs().get(
77+
status_request = bigquery.jobs().get(
9278
projectId=job['jobReference']['projectId'],
9379
jobId=job['jobReference']['jobId'])
9480

9581
while True:
96-
result = request.execute(num_retries=2)
82+
result = status_request.execute(num_retries=2)
9783

9884
if result['status']['state'] == 'DONE':
9985
if 'errorResult' in result['status']:
@@ -102,35 +88,17 @@ def poll_job(bigquery, job):
10288
return
10389

10490
time.sleep(1)
105-
# [END poll_job]
91+
# [END make_post]
10692

10793

10894
# [START main]
10995
def main(project_id, dataset_id, table_name, schema_path, data_path):
110-
credentials = GoogleCredentials.get_application_default()
111-
http = credentials.authorize(httplib2.Http())
112-
bigquery = discovery.build('bigquery', 'v2', credentials=credentials)
113-
114-
with open(schema_path, 'r') as schema_file:
115-
schema = schema_file.read()
116-
117-
with open(data_path, 'r') as data_file:
118-
data = data_file.read()
119-
120-
resp, content = make_post(
121-
http,
122-
schema,
123-
data,
96+
load_data(
97+
schema_path,
98+
data_path,
12499
project_id,
125100
dataset_id,
126101
table_name)
127-
128-
if resp.status == 200:
129-
job = json.loads(content)
130-
poll_job(bigquery, job)
131-
print("Success!")
132-
else:
133-
print("Http error code: {}".format(resp.status))
134102
# [END main]
135103

136104
if __name__ == '__main__':

0 commit comments

Comments
 (0)