diff --git a/language/cloud-client/v1/snippets.py b/language/cloud-client/v1/snippets.py index 94d1db4a23a..31e02ef6505 100644 --- a/language/cloud-client/v1/snippets.py +++ b/language/cloud-client/v1/snippets.py @@ -24,12 +24,16 @@ import argparse from google.cloud import language +import six def sentiment_text(text): """Detects sentiment in the text.""" language_client = language.Client() + if isinstance(text, six.binary_type): + text = text.decode('utf-8') + # Instantiates a plain text document. document = language_client.document_from_text(text) @@ -60,6 +64,9 @@ def entities_text(text): """Detects entities in the text.""" language_client = language.Client() + if isinstance(text, six.binary_type): + text = text.decode('utf-8') + # Instantiates a plain text document. document = language_client.document_from_text(text) @@ -69,11 +76,11 @@ def entities_text(text): for entity in entities: print('=' * 20) - print('{:<16}: {}'.format('name', entity.name)) - print('{:<16}: {}'.format('type', entity.entity_type)) - print('{:<16}: {}'.format('metadata', entity.metadata)) - print('{:<16}: {}'.format('salience', entity.salience)) - print('{:<16}: {}'.format('wikipedia_url', + print(u'{:<16}: {}'.format('name', entity.name)) + print(u'{:<16}: {}'.format('type', entity.entity_type)) + print(u'{:<16}: {}'.format('metadata', entity.metadata)) + print(u'{:<16}: {}'.format('salience', entity.salience)) + print(u'{:<16}: {}'.format('wikipedia_url', entity.metadata.get('wikipedia_url', '-'))) @@ -90,11 +97,11 @@ def entities_file(gcs_uri): for entity in entities: print('=' * 20) - print('{:<16}: {}'.format('name', entity.name)) - print('{:<16}: {}'.format('type', entity.entity_type)) - print('{:<16}: {}'.format('metadata', entity.metadata)) - print('{:<16}: {}'.format('salience', entity.salience)) - print('{:<16}: {}'.format('wikipedia_url', + print(u'{:<16}: {}'.format('name', entity.name)) + print(u'{:<16}: {}'.format('type', entity.entity_type)) + print(u'{:<16}: {}'.format('metadata', entity.metadata)) + print(u'{:<16}: {}'.format('salience', entity.salience)) + print(u'{:<16}: {}'.format('wikipedia_url', entity.metadata.get('wikipedia_url', '-'))) @@ -102,6 +109,9 @@ def syntax_text(text): """Detects syntax in the text.""" language_client = language.Client() + if isinstance(text, six.binary_type): + text = text.decode('utf-8') + # Instantiates a plain text document. document = language_client.document_from_text(text) @@ -110,7 +120,7 @@ def syntax_text(text): tokens = document.analyze_syntax().tokens for token in tokens: - print('{}: {}'.format(token.part_of_speech, token.text_content)) + print(u'{}: {}'.format(token.part_of_speech, token.text_content)) def syntax_file(gcs_uri): @@ -125,7 +135,7 @@ def syntax_file(gcs_uri): tokens = document.analyze_syntax().tokens for token in tokens: - print('{}: {}'.format(token.part_of_speech, token.text_content)) + print(u'{}: {}'.format(token.part_of_speech, token.text_content)) if __name__ == '__main__': diff --git a/language/cloud-client/v1beta2/snippets.py b/language/cloud-client/v1beta2/snippets.py index af472118956..2e6745d2c94 100644 --- a/language/cloud-client/v1beta2/snippets.py +++ b/language/cloud-client/v1beta2/snippets.py @@ -27,12 +27,16 @@ from google.cloud.gapic.language.v1beta2 import enums from google.cloud.gapic.language.v1beta2 import language_service_client from google.cloud.proto.language.v1beta2 import language_service_pb2 +import six def sentiment_text(text): """Detects sentiment in the text.""" language_client = language.Client(api_version='v1beta2') + if isinstance(text, six.binary_type): + text = text.decode('utf-8') + # Instantiates a plain text document. document = language_client.document_from_text(text) @@ -40,8 +44,8 @@ def sentiment_text(text): # document.doc_type == language.Document.HTML sentiment = document.analyze_sentiment().sentiment - print('Score: {}'.format(sentiment.score)) - print('Magnitude: {}'.format(sentiment.magnitude)) + print(u'Score: {}'.format(sentiment.score)) + print(u'Magnitude: {}'.format(sentiment.magnitude)) def sentiment_file(gcs_uri): @@ -55,14 +59,17 @@ def sentiment_file(gcs_uri): # document.doc_type == language.Document.HTML sentiment = document.analyze_sentiment().sentiment - print('Score: {}'.format(sentiment.score)) - print('Magnitude: {}'.format(sentiment.magnitude)) + print(u'Score: {}'.format(sentiment.score)) + print(u'Magnitude: {}'.format(sentiment.magnitude)) def entities_text(text): """Detects entities in the text.""" language_client = language.Client(api_version='v1beta2') + if isinstance(text, six.binary_type): + text = text.decode('utf-8') + # Instantiates a plain text document. document = language_client.document_from_text(text) @@ -71,12 +78,12 @@ def entities_text(text): entities = document.analyze_entities().entities for entity in entities: - print('=' * 20) - print('{:<16}: {}'.format('name', entity.name)) - print('{:<16}: {}'.format('type', entity.entity_type)) - print('{:<16}: {}'.format('metadata', entity.metadata)) - print('{:<16}: {}'.format('salience', entity.salience)) - print('{:<16}: {}'.format('wikipedia_url', + print(u'=' * 20) + print(u'{:<16}: {}'.format('name', entity.name)) + print(u'{:<16}: {}'.format('type', entity.entity_type)) + print(u'{:<16}: {}'.format('metadata', entity.metadata)) + print(u'{:<16}: {}'.format('salience', entity.salience)) + print(u'{:<16}: {}'.format('wikipedia_url', entity.metadata.get('wikipedia_url', '-'))) @@ -105,6 +112,9 @@ def syntax_text(text): """Detects syntax in the text.""" language_client = language.Client(api_version='v1beta2') + if isinstance(text, six.binary_type): + text = text.decode('utf-8') + # Instantiates a plain text document. document = language_client.document_from_text(text) @@ -113,7 +123,7 @@ def syntax_text(text): tokens = document.analyze_syntax().tokens for token in tokens: - print('{}: {}'.format(token.part_of_speech, token.text_content)) + print(u'{}: {}'.format(token.part_of_speech, token.text_content)) def syntax_file(gcs_uri): @@ -128,7 +138,7 @@ def syntax_file(gcs_uri): tokens = document.analyze_syntax().tokens for token in tokens: - print('{}: {}'.format(token.part_of_speech, token.text_content)) + print(u'{}: {}'.format(token.part_of_speech, token.text_content)) def entity_sentiment_text(text): @@ -136,6 +146,9 @@ def entity_sentiment_text(text): language_client = language_service_client.LanguageServiceClient() document = language_service_pb2.Document() + if isinstance(text, six.binary_type): + text = text.decode('utf-8') + document.content = text.encode('utf-8') document.type = enums.Document.Type.PLAIN_TEXT @@ -144,15 +157,15 @@ def entity_sentiment_text(text): for entity in result.entities: print('Mentions: ') - print('Name: "{}"'.format(entity.name)) + print(u'Name: "{}"'.format(entity.name)) for mention in entity.mentions: - print(' Begin Offset : {}'.format(mention.text.begin_offset)) - print(' Content : {}'.format(mention.text.content)) - print(' Magnitude : {}'.format(mention.sentiment.magnitude)) - print(' Sentiment : {}'.format(mention.sentiment.score)) - print(' Type : {}'.format(mention.type)) - print('Salience: {}'.format(entity.salience)) - print('Sentiment: {}\n'.format(entity.sentiment)) + print(u' Begin Offset : {}'.format(mention.text.begin_offset)) + print(u' Content : {}'.format(mention.text.content)) + print(u' Magnitude : {}'.format(mention.sentiment.magnitude)) + print(u' Sentiment : {}'.format(mention.sentiment.score)) + print(u' Type : {}'.format(mention.type)) + print(u'Salience: {}'.format(entity.salience)) + print(u'Sentiment: {}\n'.format(entity.sentiment)) def entity_sentiment_file(gcs_uri): @@ -167,15 +180,15 @@ def entity_sentiment_file(gcs_uri): document, enums.EncodingType.UTF8) for entity in result.entities: - print('Name: "{}"'.format(entity.name)) + print(u'Name: "{}"'.format(entity.name)) for mention in entity.mentions: - print(' Begin Offset : {}'.format(mention.text.begin_offset)) - print(' Content : {}'.format(mention.text.content)) - print(' Magnitude : {}'.format(mention.sentiment.magnitude)) - print(' Sentiment : {}'.format(mention.sentiment.score)) - print(' Type : {}'.format(mention.type)) - print('Salience: {}'.format(entity.salience)) - print('Sentiment: {}\n'.format(entity.sentiment)) + print(u' Begin Offset : {}'.format(mention.text.begin_offset)) + print(u' Content : {}'.format(mention.text.content)) + print(u' Magnitude : {}'.format(mention.sentiment.magnitude)) + print(u' Sentiment : {}'.format(mention.sentiment.score)) + print(u' Type : {}'.format(mention.type)) + print(u'Salience: {}'.format(entity.salience)) + print(u'Sentiment: {}\n'.format(entity.sentiment)) if __name__ == '__main__': diff --git a/language/cloud-client/v1beta2/snippets_test.py b/language/cloud-client/v1beta2/snippets_test.py index d1e6abd0cf6..8db7aa1dbdf 100644 --- a/language/cloud-client/v1beta2/snippets_test.py +++ b/language/cloud-client/v1beta2/snippets_test.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Copyright 2017 Google, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -26,6 +27,15 @@ def test_sentiment_text(capsys): assert 'Score: 0' in out +def test_sentiment_utf(capsys): + snippets.sentiment_text( + u'1er site d\'information. Les articles du journal et toute l\'' + + u'actualité en continu : International, France, Société, Economie, ' + + u'Culture, Environnement') + out, _ = capsys.readouterr() + assert 'Score: 0' in out + + def test_sentiment_file(capsys): snippets.sentiment_file(TEST_FILE_URL) out, _ = capsys.readouterr()