Skip to content

Commit 03d320a

Browse files
aribraydandhlee
authored andcommitted
add: Document Translation snippets - translate_document and batch_translate_document (#183)
* add translate document snippet and test Change-Id: I44375a48211e4e25ebbd239e8d5cac84b5ce5eaf * remove gcs_source Change-Id: Iacf445ef4b24eb5abc8bad776a3217622c14b063 * language code Change-Id: I2bab14b86f60520dacb60fba741d64372f5090ac * never mind Change-Id: I4aacc10e31154672272c22c4ab780e10c82a517e * add batch translate document snippet and test Change-Id: I858d4b05c7efd1f7dac2c409eee841208663b22d * increase timeout Change-Id: Ic7b4bee9c74721fffcc05dfe1dacb5fe5bb14c13 * increase timeout Change-Id: I62d71a5a4b444d73d658afa243173814f3cbe900 * increase timeout Change-Id: I8d15d94473471e3028f08699205877c5d972522d * update year Change-Id: Ide83e4507e140d7a6e92ed03b0d864ef4b3f2721 * remove unnecessary comments Change-Id: Id778e7a0fff787f8b3e1eb63a7bf3217c0145524 * fix formatting Change-Id: I45d0b1519b975a22f8166acf0d60e6ff57699cd2 * remove whitespace Change-Id: I597a64c5ad9a99abc1c94c609cd87ec1867ba8a8
1 parent 075c3b6 commit 03d320a

5 files changed

+192
-0
lines changed
Binary file not shown.
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# [START translate_v3beta1_batch_translate_document]
16+
17+
from google.cloud import translate_v3beta1 as translate
18+
19+
20+
def batch_translate_document(
21+
input_uri: str,
22+
output_uri: str,
23+
project_id: str,
24+
timeout=180,
25+
):
26+
27+
client = translate.TranslationServiceClient()
28+
29+
# The ``global`` location is not supported for batch translation
30+
location = "us-central1"
31+
32+
# Google Cloud Storage location for the source input. This can be a single file
33+
# (for example, ``gs://translation-test/input.docx``) or a wildcard
34+
# (for example, ``gs://translation-test/*``).
35+
# Supported file types: https://cloud.google.com/translate/docs/supported-formats
36+
gcs_source = {"input_uri": input_uri}
37+
38+
batch_document_input_configs = {
39+
"gcs_source": gcs_source,
40+
}
41+
gcs_destination = {"output_uri_prefix": output_uri}
42+
batch_document_output_config = {"gcs_destination": gcs_destination}
43+
parent = f"projects/{project_id}/locations/{location}"
44+
45+
# Supported language codes: https://cloud.google.com/translate/docs/language
46+
operation = client.batch_translate_document(
47+
request={
48+
"parent": parent,
49+
"source_language_code": "en-US",
50+
"target_language_codes": ["fr-FR"],
51+
"input_configs": [batch_document_input_configs],
52+
"output_config": batch_document_output_config,
53+
}
54+
)
55+
56+
print("Waiting for operation to complete...")
57+
response = operation.result(timeout)
58+
59+
print("Total Pages: {}".format(response.total_pages))
60+
61+
62+
# [END translate_v3beta1_batch_translate_document]
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
import os
17+
import uuid
18+
19+
from google.cloud import storage
20+
import pytest
21+
22+
23+
import translate_v3beta1_batch_translate_document
24+
25+
26+
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]
27+
28+
29+
@pytest.fixture(scope="function")
30+
def bucket():
31+
# Create a temporary bucket to store annotation output.
32+
bucket_name = "test-{}".format(uuid.uuid4())
33+
storage_client = storage.Client()
34+
bucket = storage_client.bucket(bucket_name)
35+
bucket = storage_client.create_bucket(bucket, location="us-central1")
36+
37+
yield bucket
38+
39+
bucket.delete(force=True)
40+
41+
42+
@pytest.mark.flaky(max_runs=3, min_passes=1)
43+
def test_batch_translate_document(capsys, bucket):
44+
translate_v3beta1_batch_translate_document.batch_translate_document(
45+
input_uri="gs://cloud-samples-data/translation/async_invoices/*",
46+
output_uri=f"gs://{bucket.name}/translation/BATCH_TRANSLATE_DOCUMENT_OUTPUT/",
47+
project_id=PROJECT_ID,
48+
timeout=1000,
49+
)
50+
51+
out, _ = capsys.readouterr()
52+
assert "Total Pages" in out
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# [START translate_v3beta1_translate_document]
16+
from google.cloud import translate_v3beta1 as translate
17+
18+
19+
def translate_document(project_id: str, file_path: str):
20+
21+
client = translate.TranslationServiceClient()
22+
23+
location = "us-central1"
24+
25+
parent = f"projects/{project_id}/locations/{location}"
26+
27+
# Supported file types: https://cloud.google.com/translate/docs/supported-formats
28+
with open(file_path, "rb") as document:
29+
document_content = document.read()
30+
31+
document_input_config = {
32+
"content": document_content,
33+
"mime_type": "application/pdf",
34+
}
35+
36+
response = client.translate_document(
37+
request={
38+
"parent": parent,
39+
"target_language_code": "fr-FR",
40+
"document_input_config": document_input_config,
41+
}
42+
)
43+
44+
# To view translated document, write `response.document_translation.byte_stream_outputs` to file.
45+
# If not provided in the TranslationRequest, the translated file will only be returned through a byte-stream
46+
# and its output mime type will be the same as the input file's mime type
47+
print("Response: Detected Language Code - {}".format(response.document_translation.detected_language_code))
48+
49+
50+
# [END translate_v3beta1_translate_document]
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
import os
17+
18+
import translate_v3beta1_translate_document
19+
20+
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]
21+
22+
FILE_PATH = "resources/fake_invoice.pdf"
23+
24+
25+
def test_translate_document(capsys):
26+
translate_v3beta1_translate_document.translate_document(project_id=PROJECT_ID, file_path=FILE_PATH)
27+
out, _ = capsys.readouterr()
28+
assert "Response" in out

0 commit comments

Comments
 (0)