-
Notifications
You must be signed in to change notification settings - Fork 6.5k
Translate: migrate published v3 translate batch samples #2914
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
czahedi
merged 16 commits into
GoogleCloudPlatform:master
from
munkhuushmgl:translate-v3-batch-samples
Mar 2, 2020
Merged
Changes from 13 commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
8039dc1
Translate: migrate published b v3 tch samples
munkhuushmgl 2067cf6
added missing requirements
munkhuushmgl 64e1670
extended wait time
munkhuushmgl f53a769
Merge branch 'master' into translate-v3-batch-samples
munkhuushmgl fe9bc56
Merge branch 'master' into translate-v3-batch-samples
munkhuushmgl 62af28c
inlined some vals and specified input and output
munkhuushmgl 3c34ac4
Merge branch 'translate-v3-batch-samples' of https://github.com/munkh…
munkhuushmgl 4a8a65b
Merge branch 'master' into translate-v3-batch-samples
munkhuushmgl 741d5fa
Merge branch 'master' into translate-v3-batch-samples
munkhuushmgl 990be2f
added link to supported file types & modified default values of input…
munkhuushmgl ae36779
Merge branch 'translate-v3-batch-samples' of https://github.com/munkh…
munkhuushmgl c9a2d1e
Merge branch 'master' into translate-v3-batch-samples
munkhuushmgl 0eee982
Merge branch 'master' into translate-v3-batch-samples
munkhuushmgl 89fb849
fixed small nit
munkhuushmgl 3dc4fd5
Merge branch 'translate-v3-batch-samples' of https://github.com/munkh…
munkhuushmgl d20b328
Merge branch 'master' into translate-v3-batch-samples
munkhuushmgl File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,3 @@ | ||
google-cloud-translate==2.0.0 | ||
google-cloud-storage==1.19.1 | ||
google-cloud-automl==0.9.0 |
67 changes: 67 additions & 0 deletions
67
translate/automl/translate_v3_batch_translate_text_with_model.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
# Copyright 2020 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
|
||
# [START translate_v3_batch_translate_text_with_model] | ||
from google.cloud import translate | ||
|
||
|
||
def batch_translate_text_with_model( | ||
input_uri="gs://YOUR_BUCKET_ID/path/to/your/file.txt", | ||
output_uri="gs://YOUR_BUCKET_ID/path/to/save/results/", | ||
project_id="YOUR_PROJECT_ID", | ||
model_id="YOUR_MODEL_ID", | ||
): | ||
"""Batch translate text using Translation model. | ||
Model can be AutoML or General[built-in] model. """ | ||
|
||
client = translate.TranslationServiceClient() | ||
|
||
# Supported file types: https://cloud.google.com/translate/docs/supported-formats | ||
gcs_source = {"input_uri": input_uri} | ||
location = "us-central1" | ||
|
||
input_configs_element = { | ||
"gcs_source": gcs_source, | ||
"mime_type": "text/plain" # Can be "text/plain" or "text/html". | ||
} | ||
gcs_destination = {"output_uri_prefix": output_uri} | ||
output_config = {"gcs_destination": gcs_destination} | ||
parent = client.location_path(project_id, location) | ||
|
||
model_path = "projects/{}/locations/{}/models/{}".format( | ||
project_id, "us-central1", model_id # The location of AutoML model. | ||
) | ||
|
||
# Supported language codes: https://cloud.google.com/translate/docs/languages | ||
models = {"ja": model_path} # takes a target lang as key. | ||
|
||
operation = client.batch_translate_text( | ||
parent=parent, | ||
source_language_code="en", | ||
target_language_codes=["ja"], # Up to 10 language codes here. | ||
input_configs=[input_configs_element], | ||
output_config=output_config, | ||
models=models, | ||
) | ||
|
||
print(u"Waiting for operation to complete...") | ||
response = operation.result() | ||
|
||
# Display the translation for each input text provided. | ||
print(u"Total Characters: {}".format(response.total_characters)) | ||
print(u"Translated Characters: {}".format(response.translated_characters)) | ||
|
||
|
||
# [END translate_v3_batch_translate_text_with_model] |
46 changes: 46 additions & 0 deletions
46
translate/automl/translate_v3_batch_translate_text_with_model_test.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# Copyright 2020 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import os | ||
import pytest | ||
import uuid | ||
import translate_v3_batch_translate_text_with_model | ||
from google.cloud import storage | ||
|
||
PROJECT_ID = os.environ["GCLOUD_PROJECT"] | ||
MODEL_ID = "TRL3128559826197068699" | ||
|
||
|
||
@pytest.fixture(scope="function") | ||
def bucket(): | ||
"""Create a temporary bucket to store annotation output.""" | ||
bucket_name = str(uuid.uuid1()) | ||
storage_client = storage.Client() | ||
bucket = storage_client.create_bucket(bucket_name) | ||
|
||
yield bucket | ||
|
||
bucket.delete(force=True) | ||
|
||
|
||
def test_batch_translate_text_with_model(capsys, bucket): | ||
translate_v3_batch_translate_text_with_model.batch_translate_text_with_model( | ||
"gs://cloud-samples-data/translation/custom_model_text.txt", | ||
"gs://{}/translation/BATCH_TRANSLATION_OUTPUT/".format(bucket.name), | ||
PROJECT_ID, | ||
MODEL_ID, | ||
) | ||
out, _ = capsys.readouterr() | ||
assert "Total Characters: 15" in out | ||
assert "Translated Characters: 15" in out |
55 changes: 55 additions & 0 deletions
55
translate/cloud-client/translate_v3_batch_translate_text.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# Copyright 2020 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# [START translate_v3_batch_translate_text] | ||
from google.cloud import translate | ||
|
||
|
||
def batch_translate_text( | ||
input_uri="gs://YOUR_BUCKET_ID/path/to/your/file.txt", | ||
output_uri="gs://YOUR_BUCKET_ID/path/to/save/results/", | ||
project_id="YOUR_PROJECT_ID" | ||
): | ||
"""Translates a batch of texts on GCS and stores the result in a GCS location.""" | ||
|
||
client = translate.TranslationServiceClient() | ||
|
||
location = "us-central1" | ||
# Supported file types: https://cloud.google.com/translate/docs/supported-formats | ||
gcs_source = {"input_uri": input_uri} | ||
|
||
input_configs_element = { | ||
"gcs_source": gcs_source, | ||
"mime_type": "text/plain" # Can be "text/plain" or "text/html". | ||
} | ||
gcs_destination = {"output_uri_prefix": output_uri} | ||
output_config = {"gcs_destination": gcs_destination} | ||
parent = client.location_path(project_id, location) | ||
|
||
# Supported language codes: https://cloud.google.com/translate/docs/language | ||
operation = client.batch_translate_text( | ||
parent=parent, | ||
source_language_code="en", | ||
target_language_codes=["ja"], # Up to 10 language codes here. | ||
input_configs=[input_configs_element], | ||
output_config=output_config) | ||
|
||
print(u"Waiting for operation to complete...") | ||
response = operation.result(90) | ||
|
||
print(u"Total Characters: {}".format(response.total_characters)) | ||
print(u"Translated Characters: {}".format(response.translated_characters)) | ||
|
||
|
||
# [END translate_v3_batch_translate_text] |
43 changes: 43 additions & 0 deletions
43
translate/cloud-client/translate_v3_batch_translate_text_test.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# Copyright 2020 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import os | ||
import pytest | ||
import translate_v3_batch_translate_text | ||
import uuid | ||
from google.cloud import storage | ||
|
||
PROJECT_ID = os.environ["GCLOUD_PROJECT"] | ||
|
||
|
||
@pytest.fixture(scope="function") | ||
def bucket(): | ||
"""Create a temporary bucket to store annotation output.""" | ||
bucket_name = str(uuid.uuid1()) | ||
storage_client = storage.Client() | ||
bucket = storage_client.create_bucket(bucket_name) | ||
|
||
yield bucket | ||
|
||
bucket.delete(force=True) | ||
|
||
|
||
def test_batch_translate_text(capsys, bucket): | ||
translate_v3_batch_translate_text.batch_translate_text( | ||
"gs://cloud-samples-data/translation/text.txt", | ||
"gs://{}/translation/BATCH_TRANSLATION_OUTPUT/".format(bucket.name), | ||
PROJECT_ID, | ||
) | ||
out, _ = capsys.readouterr() | ||
assert "Total Characters" in out |
74 changes: 74 additions & 0 deletions
74
translate/cloud-client/translate_v3_batch_translate_text_with_glossary.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# Copyright 2020 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
|
||
# [START translate_v3_batch_translate_text_with_glossary] | ||
from google.cloud import translate | ||
|
||
|
||
def batch_translate_text_with_glossary( | ||
input_uri="gs://YOUR_BUCKET_ID/path/to/your/file.txt", | ||
output_uri="gs://YOUR_BUCKET_ID/path/to/save/results/", | ||
project_id="YOUR_PROJECT_ID", | ||
glossary_id="YOUR_GLOSSARY_ID", | ||
): | ||
"""Translates a batch of texts on GCS and stores the result in a GCS location. | ||
Glossary is applied for translation.""" | ||
|
||
client = translate.TranslationServiceClient() | ||
|
||
# Supported language codes: https://cloud.google.com/translate/docs/languages | ||
location = "us-central1" | ||
|
||
# Supported file types: https://cloud.google.com/translate/docs/supported-formats | ||
gcs_source = {"input_uri": input_uri} | ||
|
||
input_configs_element = { | ||
"gcs_source": gcs_source, | ||
"mime_type": "text/plain" # Can be "text/plain" or "text/html". | ||
} | ||
gcs_destination = {"output_uri_prefix": output_uri} | ||
output_config = {"gcs_destination": gcs_destination} | ||
|
||
parent = client.location_path(project_id, location) | ||
|
||
# glossary is a custom dictionary Translation API uses | ||
# to translate the domain-specific terminology. | ||
glossary_path = client.glossary_path( | ||
project_id, "us-central1", glossary_id # The location of the glossary | ||
) | ||
|
||
glossary_config = translate.types.TranslateTextGlossaryConfig( | ||
glossary=glossary_path | ||
) | ||
|
||
glossaries = {"ja": glossary_config} # target lang as key | ||
|
||
operation = client.batch_translate_text( | ||
parent=parent, | ||
source_language_code="en", | ||
target_language_codes=["ja"], # Up to 10 language codes here. | ||
input_configs=[input_configs_element], | ||
glossaries=glossaries, | ||
output_config=output_config, | ||
) | ||
|
||
print(u"Waiting for operation to complete...") | ||
response = operation.result(120) | ||
|
||
print(u"Total Characters: {}".format(response.total_characters)) | ||
print(u"Translated Characters: {}".format(response.translated_characters)) | ||
|
||
|
||
# [END translate_v3_batch_translate_text_with_glossary] |
64 changes: 64 additions & 0 deletions
64
translate/cloud-client/translate_v3_batch_translate_text_with_glossary_test.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
# Copyright 2020 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import os | ||
import pytest | ||
import uuid | ||
import translate_v3_batch_translate_text_with_glossary | ||
import translate_v3_create_glossary | ||
import translate_v3_delete_glossary | ||
from google.cloud import storage | ||
|
||
PROJECT_ID = os.environ["GCLOUD_PROJECT"] | ||
GLOSSARY_INPUT_URI = "gs://cloud-samples-data/translation/glossary_ja.csv" | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def glossary(): | ||
"""Get the ID of a glossary available to session (do not mutate/delete).""" | ||
glossary_id = "must-start-with-letters-" + str(uuid.uuid1()) | ||
translate_v3_create_glossary.create_glossary( | ||
PROJECT_ID, GLOSSARY_INPUT_URI, glossary_id | ||
) | ||
|
||
yield glossary_id | ||
|
||
try: | ||
translate_v3_delete_glossary.delete_glossary(PROJECT_ID, glossary_id) | ||
except Exception: | ||
pass | ||
|
||
|
||
@pytest.fixture(scope="function") | ||
def bucket(): | ||
"""Create a temporary bucket to store annotation output.""" | ||
bucket_name = str(uuid.uuid1()) | ||
storage_client = storage.Client() | ||
bucket = storage_client.create_bucket(bucket_name) | ||
|
||
yield bucket | ||
|
||
bucket.delete(force=True) | ||
|
||
|
||
def test_batch_translate_text_with_glossary(capsys, bucket, glossary): | ||
translate_v3_batch_translate_text_with_glossary.batch_translate_text_with_glossary( | ||
"gs://cloud-samples-data/translation/text_with_glossary.txt", | ||
"gs://{}/translation/BATCH_TRANSLATION_OUTPUT/".format(bucket.name), | ||
PROJECT_ID, | ||
glossary, | ||
) | ||
|
||
out, _ = capsys.readouterr() | ||
assert "Total Characters: 9" in out |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.