Skip to content

Commit 78f4bc9

Browse files
gguussbusunkim96
authored andcommitted
* Fixes for non-ASCII encodings * Adds test for UTF * Style fix
1 parent 24905ee commit 78f4bc9

File tree

3 files changed

+73
-40
lines changed

3 files changed

+73
-40
lines changed

packages/google-cloud-language/samples/snippets/cloud-client/v1/snippets.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,16 @@
2424
import argparse
2525

2626
from google.cloud import language
27+
import six
2728

2829

2930
def sentiment_text(text):
3031
"""Detects sentiment in the text."""
3132
language_client = language.Client()
3233

34+
if isinstance(text, six.binary_type):
35+
text = text.decode('utf-8')
36+
3337
# Instantiates a plain text document.
3438
document = language_client.document_from_text(text)
3539

@@ -60,6 +64,9 @@ def entities_text(text):
6064
"""Detects entities in the text."""
6165
language_client = language.Client()
6266

67+
if isinstance(text, six.binary_type):
68+
text = text.decode('utf-8')
69+
6370
# Instantiates a plain text document.
6471
document = language_client.document_from_text(text)
6572

@@ -69,11 +76,11 @@ def entities_text(text):
6976

7077
for entity in entities:
7178
print('=' * 20)
72-
print('{:<16}: {}'.format('name', entity.name))
73-
print('{:<16}: {}'.format('type', entity.entity_type))
74-
print('{:<16}: {}'.format('metadata', entity.metadata))
75-
print('{:<16}: {}'.format('salience', entity.salience))
76-
print('{:<16}: {}'.format('wikipedia_url',
79+
print(u'{:<16}: {}'.format('name', entity.name))
80+
print(u'{:<16}: {}'.format('type', entity.entity_type))
81+
print(u'{:<16}: {}'.format('metadata', entity.metadata))
82+
print(u'{:<16}: {}'.format('salience', entity.salience))
83+
print(u'{:<16}: {}'.format('wikipedia_url',
7784
entity.metadata.get('wikipedia_url', '-')))
7885

7986

@@ -90,18 +97,21 @@ def entities_file(gcs_uri):
9097

9198
for entity in entities:
9299
print('=' * 20)
93-
print('{:<16}: {}'.format('name', entity.name))
94-
print('{:<16}: {}'.format('type', entity.entity_type))
95-
print('{:<16}: {}'.format('metadata', entity.metadata))
96-
print('{:<16}: {}'.format('salience', entity.salience))
97-
print('{:<16}: {}'.format('wikipedia_url',
100+
print(u'{:<16}: {}'.format('name', entity.name))
101+
print(u'{:<16}: {}'.format('type', entity.entity_type))
102+
print(u'{:<16}: {}'.format('metadata', entity.metadata))
103+
print(u'{:<16}: {}'.format('salience', entity.salience))
104+
print(u'{:<16}: {}'.format('wikipedia_url',
98105
entity.metadata.get('wikipedia_url', '-')))
99106

100107

101108
def syntax_text(text):
102109
"""Detects syntax in the text."""
103110
language_client = language.Client()
104111

112+
if isinstance(text, six.binary_type):
113+
text = text.decode('utf-8')
114+
105115
# Instantiates a plain text document.
106116
document = language_client.document_from_text(text)
107117

@@ -110,7 +120,7 @@ def syntax_text(text):
110120
tokens = document.analyze_syntax().tokens
111121

112122
for token in tokens:
113-
print('{}: {}'.format(token.part_of_speech, token.text_content))
123+
print(u'{}: {}'.format(token.part_of_speech, token.text_content))
114124

115125

116126
def syntax_file(gcs_uri):
@@ -125,7 +135,7 @@ def syntax_file(gcs_uri):
125135
tokens = document.analyze_syntax().tokens
126136

127137
for token in tokens:
128-
print('{}: {}'.format(token.part_of_speech, token.text_content))
138+
print(u'{}: {}'.format(token.part_of_speech, token.text_content))
129139

130140

131141
if __name__ == '__main__':

packages/google-cloud-language/samples/snippets/cloud-client/v1beta2/snippets.py

Lines changed: 41 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -27,21 +27,25 @@
2727
from google.cloud.gapic.language.v1beta2 import enums
2828
from google.cloud.gapic.language.v1beta2 import language_service_client
2929
from google.cloud.proto.language.v1beta2 import language_service_pb2
30+
import six
3031

3132

3233
def sentiment_text(text):
3334
"""Detects sentiment in the text."""
3435
language_client = language.Client(api_version='v1beta2')
3536

37+
if isinstance(text, six.binary_type):
38+
text = text.decode('utf-8')
39+
3640
# Instantiates a plain text document.
3741
document = language_client.document_from_text(text)
3842

3943
# Detects sentiment in the document. You can also analyze HTML with:
4044
# document.doc_type == language.Document.HTML
4145
sentiment = document.analyze_sentiment().sentiment
4246

43-
print('Score: {}'.format(sentiment.score))
44-
print('Magnitude: {}'.format(sentiment.magnitude))
47+
print(u'Score: {}'.format(sentiment.score))
48+
print(u'Magnitude: {}'.format(sentiment.magnitude))
4549

4650

4751
def sentiment_file(gcs_uri):
@@ -55,14 +59,17 @@ def sentiment_file(gcs_uri):
5559
# document.doc_type == language.Document.HTML
5660
sentiment = document.analyze_sentiment().sentiment
5761

58-
print('Score: {}'.format(sentiment.score))
59-
print('Magnitude: {}'.format(sentiment.magnitude))
62+
print(u'Score: {}'.format(sentiment.score))
63+
print(u'Magnitude: {}'.format(sentiment.magnitude))
6064

6165

6266
def entities_text(text):
6367
"""Detects entities in the text."""
6468
language_client = language.Client(api_version='v1beta2')
6569

70+
if isinstance(text, six.binary_type):
71+
text = text.decode('utf-8')
72+
6673
# Instantiates a plain text document.
6774
document = language_client.document_from_text(text)
6875

@@ -71,12 +78,12 @@ def entities_text(text):
7178
entities = document.analyze_entities().entities
7279

7380
for entity in entities:
74-
print('=' * 20)
75-
print('{:<16}: {}'.format('name', entity.name))
76-
print('{:<16}: {}'.format('type', entity.entity_type))
77-
print('{:<16}: {}'.format('metadata', entity.metadata))
78-
print('{:<16}: {}'.format('salience', entity.salience))
79-
print('{:<16}: {}'.format('wikipedia_url',
81+
print(u'=' * 20)
82+
print(u'{:<16}: {}'.format('name', entity.name))
83+
print(u'{:<16}: {}'.format('type', entity.entity_type))
84+
print(u'{:<16}: {}'.format('metadata', entity.metadata))
85+
print(u'{:<16}: {}'.format('salience', entity.salience))
86+
print(u'{:<16}: {}'.format('wikipedia_url',
8087
entity.metadata.get('wikipedia_url', '-')))
8188

8289

@@ -105,6 +112,9 @@ def syntax_text(text):
105112
"""Detects syntax in the text."""
106113
language_client = language.Client(api_version='v1beta2')
107114

115+
if isinstance(text, six.binary_type):
116+
text = text.decode('utf-8')
117+
108118
# Instantiates a plain text document.
109119
document = language_client.document_from_text(text)
110120

@@ -113,7 +123,7 @@ def syntax_text(text):
113123
tokens = document.analyze_syntax().tokens
114124

115125
for token in tokens:
116-
print('{}: {}'.format(token.part_of_speech, token.text_content))
126+
print(u'{}: {}'.format(token.part_of_speech, token.text_content))
117127

118128

119129
def syntax_file(gcs_uri):
@@ -128,14 +138,17 @@ def syntax_file(gcs_uri):
128138
tokens = document.analyze_syntax().tokens
129139

130140
for token in tokens:
131-
print('{}: {}'.format(token.part_of_speech, token.text_content))
141+
print(u'{}: {}'.format(token.part_of_speech, token.text_content))
132142

133143

134144
def entity_sentiment_text(text):
135145
"""Detects entity sentiment in the provided text."""
136146
language_client = language_service_client.LanguageServiceClient()
137147
document = language_service_pb2.Document()
138148

149+
if isinstance(text, six.binary_type):
150+
text = text.decode('utf-8')
151+
139152
document.content = text.encode('utf-8')
140153
document.type = enums.Document.Type.PLAIN_TEXT
141154

@@ -144,15 +157,15 @@ def entity_sentiment_text(text):
144157

145158
for entity in result.entities:
146159
print('Mentions: ')
147-
print('Name: "{}"'.format(entity.name))
160+
print(u'Name: "{}"'.format(entity.name))
148161
for mention in entity.mentions:
149-
print(' Begin Offset : {}'.format(mention.text.begin_offset))
150-
print(' Content : {}'.format(mention.text.content))
151-
print(' Magnitude : {}'.format(mention.sentiment.magnitude))
152-
print(' Sentiment : {}'.format(mention.sentiment.score))
153-
print(' Type : {}'.format(mention.type))
154-
print('Salience: {}'.format(entity.salience))
155-
print('Sentiment: {}\n'.format(entity.sentiment))
162+
print(u' Begin Offset : {}'.format(mention.text.begin_offset))
163+
print(u' Content : {}'.format(mention.text.content))
164+
print(u' Magnitude : {}'.format(mention.sentiment.magnitude))
165+
print(u' Sentiment : {}'.format(mention.sentiment.score))
166+
print(u' Type : {}'.format(mention.type))
167+
print(u'Salience: {}'.format(entity.salience))
168+
print(u'Sentiment: {}\n'.format(entity.sentiment))
156169

157170

158171
def entity_sentiment_file(gcs_uri):
@@ -167,15 +180,15 @@ def entity_sentiment_file(gcs_uri):
167180
document, enums.EncodingType.UTF8)
168181

169182
for entity in result.entities:
170-
print('Name: "{}"'.format(entity.name))
183+
print(u'Name: "{}"'.format(entity.name))
171184
for mention in entity.mentions:
172-
print(' Begin Offset : {}'.format(mention.text.begin_offset))
173-
print(' Content : {}'.format(mention.text.content))
174-
print(' Magnitude : {}'.format(mention.sentiment.magnitude))
175-
print(' Sentiment : {}'.format(mention.sentiment.score))
176-
print(' Type : {}'.format(mention.type))
177-
print('Salience: {}'.format(entity.salience))
178-
print('Sentiment: {}\n'.format(entity.sentiment))
185+
print(u' Begin Offset : {}'.format(mention.text.begin_offset))
186+
print(u' Content : {}'.format(mention.text.content))
187+
print(u' Magnitude : {}'.format(mention.sentiment.magnitude))
188+
print(u' Sentiment : {}'.format(mention.sentiment.score))
189+
print(u' Type : {}'.format(mention.type))
190+
print(u'Salience: {}'.format(entity.salience))
191+
print(u'Sentiment: {}\n'.format(entity.sentiment))
179192

180193

181194
if __name__ == '__main__':

packages/google-cloud-language/samples/snippets/cloud-client/v1beta2/snippets_test.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
# Copyright 2017 Google, Inc.
23
#
34
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -26,6 +27,15 @@ def test_sentiment_text(capsys):
2627
assert 'Score: 0' in out
2728

2829

30+
def test_sentiment_utf(capsys):
31+
snippets.sentiment_text(
32+
u'1er site d\'information. Les articles du journal et toute l\'' +
33+
u'actualité en continu : International, France, Société, Economie, ' +
34+
u'Culture, Environnement')
35+
out, _ = capsys.readouterr()
36+
assert 'Score: 0' in out
37+
38+
2939
def test_sentiment_file(capsys):
3040
snippets.sentiment_file(TEST_FILE_URL)
3141
out, _ = capsys.readouterr()

0 commit comments

Comments
 (0)