Skip to content

Commit b91f9af

Browse files
dizcologyJon Wayne Parrott
authored and
Jon Wayne Parrott
committed
Language classify (#1095)
* add classify text samples and tests * use longer text * move entity sentiment to v1 * flake * year when first written * year first written
1 parent b5d5cad commit b91f9af

File tree

5 files changed

+141
-69
lines changed

5 files changed

+141
-69
lines changed

language/cloud-client/v1/snippets.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@
2222
"""
2323

2424
import argparse
25+
import sys
2526

2627
from google.cloud import language
2728
from google.cloud.language import enums
2829
from google.cloud.language import types
30+
2931
import six
3032

3133

@@ -192,12 +194,80 @@ def syntax_file(gcs_uri):
192194
# [END def_syntax_file]
193195

194196

197+
# [START def_entity_sentiment_text]
198+
def entity_sentiment_text(text):
199+
"""Detects entity sentiment in the provided text."""
200+
client = language.LanguageServiceClient()
201+
202+
if isinstance(text, six.binary_type):
203+
text = text.decode('utf-8')
204+
205+
document = types.Document(
206+
content=text.encode('utf-8'),
207+
type=enums.Document.Type.PLAIN_TEXT)
208+
209+
# Detect and send native Python encoding to receive correct word offsets.
210+
encoding = enums.EncodingType.UTF32
211+
if sys.maxunicode == 65535:
212+
encoding = enums.EncodingType.UTF16
213+
214+
result = client.analyze_entity_sentiment(document, encoding)
215+
216+
for entity in result.entities:
217+
print('Mentions: ')
218+
print(u'Name: "{}"'.format(entity.name))
219+
for mention in entity.mentions:
220+
print(u' Begin Offset : {}'.format(mention.text.begin_offset))
221+
print(u' Content : {}'.format(mention.text.content))
222+
print(u' Magnitude : {}'.format(mention.sentiment.magnitude))
223+
print(u' Sentiment : {}'.format(mention.sentiment.score))
224+
print(u' Type : {}'.format(mention.type))
225+
print(u'Salience: {}'.format(entity.salience))
226+
print(u'Sentiment: {}\n'.format(entity.sentiment))
227+
# [END def_entity_sentiment_text]
228+
229+
230+
def entity_sentiment_file(gcs_uri):
231+
"""Detects entity sentiment in a Google Cloud Storage file."""
232+
client = language.LanguageServiceClient()
233+
234+
document = types.Document(
235+
gcs_content_uri=gcs_uri,
236+
type=enums.Document.Type.PLAIN_TEXT)
237+
238+
# Detect and send native Python encoding to receive correct word offsets.
239+
encoding = enums.EncodingType.UTF32
240+
if sys.maxunicode == 65535:
241+
encoding = enums.EncodingType.UTF16
242+
243+
result = client.analyze_entity_sentiment(document, encoding)
244+
245+
for entity in result.entities:
246+
print(u'Name: "{}"'.format(entity.name))
247+
for mention in entity.mentions:
248+
print(u' Begin Offset : {}'.format(mention.text.begin_offset))
249+
print(u' Content : {}'.format(mention.text.content))
250+
print(u' Magnitude : {}'.format(mention.sentiment.magnitude))
251+
print(u' Sentiment : {}'.format(mention.sentiment.score))
252+
print(u' Type : {}'.format(mention.type))
253+
print(u'Salience: {}'.format(entity.salience))
254+
print(u'Sentiment: {}\n'.format(entity.sentiment))
255+
256+
195257
if __name__ == '__main__':
196258
parser = argparse.ArgumentParser(
197259
description=__doc__,
198260
formatter_class=argparse.RawDescriptionHelpFormatter)
199261
subparsers = parser.add_subparsers(dest='command')
200262

263+
sentiment_entities_text_parser = subparsers.add_parser(
264+
'sentiment-entities-text', help=entity_sentiment_text.__doc__)
265+
sentiment_entities_text_parser.add_argument('text')
266+
267+
sentiment_entities_file_parser = subparsers.add_parser(
268+
'sentiment-entities-file', help=entity_sentiment_file.__doc__)
269+
sentiment_entities_file_parser.add_argument('gcs_uri')
270+
201271
sentiment_text_parser = subparsers.add_parser(
202272
'sentiment-text', help=sentiment_text.__doc__)
203273
sentiment_text_parser.add_argument('text')
@@ -236,3 +306,7 @@ def syntax_file(gcs_uri):
236306
syntax_text(args.text)
237307
elif args.command == 'syntax-file':
238308
syntax_file(args.gcs_uri)
309+
elif args.command == 'sentiment-entities-text':
310+
entity_sentiment_text(args.text)
311+
elif args.command == 'sentiment-entities-file':
312+
entity_sentiment_file(args.gcs_uri)

language/cloud-client/v1/snippets_test.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# Copyright 2016 Google, Inc.
1+
# -*- coding: utf-8 -*-
2+
# Copyright 2017 Google, Inc.
23
#
34
# Licensed under the Apache License, Version 2.0 (the "License");
45
# you may not use this file except in compliance with the License.
@@ -56,3 +57,23 @@ def test_syntax_file(capsys):
5657
snippets.syntax_file(TEST_FILE_URL)
5758
out, _ = capsys.readouterr()
5859
assert 'NOUN: President' in out
60+
61+
62+
def test_sentiment_entities_text(capsys):
63+
snippets.entity_sentiment_text(
64+
'President Obama is speaking at the White House.')
65+
out, _ = capsys.readouterr()
66+
assert 'Content : White House' in out
67+
68+
69+
def test_sentiment_entities_file(capsys):
70+
snippets.entity_sentiment_file(TEST_FILE_URL)
71+
out, _ = capsys.readouterr()
72+
assert 'Content : White House' in out
73+
74+
75+
def test_sentiment_entities_utf(capsys):
76+
snippets.entity_sentiment_text(
77+
'foo→bar')
78+
out, _ = capsys.readouterr()
79+
assert 'Begin Offset : 4' in out
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Android is a mobile operating system developed by Google, based on the Linux kernel and designed primarily for touchscreen mobile devices such as smartphones and tablets.

language/cloud-client/v1beta2/snippets.py

Lines changed: 32 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python
22

3-
# Copyright 2017 Google, Inc.
3+
# Copyright 2016 Google, Inc.
44
#
55
# Licensed under the Apache License, Version 2.0 (the "License");
66
# you may not use this file except in compliance with the License.
@@ -22,7 +22,6 @@
2222
"""
2323

2424
import argparse
25-
import sys
2625

2726
# [START beta_import]
2827
from google.cloud import language_v1beta2
@@ -173,9 +172,9 @@ def syntax_file(gcs_uri):
173172
token.text.content))
174173

175174

176-
# [START def_entity_sentiment_text]
177-
def entity_sentiment_text(text):
178-
"""Detects entity sentiment in the provided text."""
175+
# [START def_classify_text]
176+
def classify_text(text):
177+
"""Classifies the provided text."""
179178
# [START beta_client]
180179
client = language_v1beta2.LanguageServiceClient()
181180
# [END beta_client]
@@ -187,52 +186,31 @@ def entity_sentiment_text(text):
187186
content=text.encode('utf-8'),
188187
type=enums.Document.Type.PLAIN_TEXT)
189188

190-
# Pass in encoding type to get useful offsets in the response.
191-
encoding = enums.EncodingType.UTF32
192-
if sys.maxunicode == 65535:
193-
encoding = enums.EncodingType.UTF16
194-
195-
result = client.analyze_entity_sentiment(document, encoding)
196-
197-
for entity in result.entities:
198-
print('Mentions: ')
199-
print(u'Name: "{}"'.format(entity.name))
200-
for mention in entity.mentions:
201-
print(u' Begin Offset : {}'.format(mention.text.begin_offset))
202-
print(u' Content : {}'.format(mention.text.content))
203-
print(u' Magnitude : {}'.format(mention.sentiment.magnitude))
204-
print(u' Sentiment : {}'.format(mention.sentiment.score))
205-
print(u' Type : {}'.format(mention.type))
206-
print(u'Salience: {}'.format(entity.salience))
207-
print(u'Sentiment: {}\n'.format(entity.sentiment))
208-
# [END def_entity_sentiment_text]
209-
210-
211-
def entity_sentiment_file(gcs_uri):
212-
"""Detects entity sentiment in a Google Cloud Storage file."""
189+
categories = client.classify_text(document).categories
190+
191+
for category in categories:
192+
print(u'=' * 20)
193+
print(u'{:<16}: {}'.format('name', category.name))
194+
print(u'{:<16}: {}'.format('confidence', category.confidence))
195+
# [END def_classify_text]
196+
197+
198+
# [START def_classify_file]
199+
def classify_file(gcs_uri):
200+
"""Classifies the text in a Google Cloud Storage file."""
213201
client = language_v1beta2.LanguageServiceClient()
214202

215203
document = types.Document(
216204
gcs_content_uri=gcs_uri,
217205
type=enums.Document.Type.PLAIN_TEXT)
218206

219-
# Pass in encoding type to get useful offsets in the response.
220-
encoding = enums.EncodingType.UTF32
221-
if sys.maxunicode == 65535:
222-
encoding = enums.EncodingType.UTF16
223-
224-
result = client.analyze_entity_sentiment(document, encoding)
207+
categories = client.classify_text(document).categories
225208

226-
for entity in result.entities:
227-
print(u'Name: "{}"'.format(entity.name))
228-
for mention in entity.mentions:
229-
print(u' Begin Offset : {}'.format(mention.text.begin_offset))
230-
print(u' Content : {}'.format(mention.text.content))
231-
print(u' Magnitude : {}'.format(mention.sentiment.magnitude))
232-
print(u' Sentiment : {}'.format(mention.sentiment.score))
233-
print(u' Type : {}'.format(mention.type))
234-
print(u'Salience: {}'.format(entity.salience))
235-
print(u'Sentiment: {}\n'.format(entity.sentiment))
209+
for category in categories:
210+
print(u'=' * 20)
211+
print(u'{:<16}: {}'.format('name', category.name))
212+
print(u'{:<16}: {}'.format('confidence', category.confidence))
213+
# [END def_classify_file]
236214

237215

238216
if __name__ == '__main__':
@@ -241,13 +219,13 @@ def entity_sentiment_file(gcs_uri):
241219
formatter_class=argparse.RawDescriptionHelpFormatter)
242220
subparsers = parser.add_subparsers(dest='command')
243221

244-
sentiment_entities_text_parser = subparsers.add_parser(
245-
'sentiment-entities-text', help=entity_sentiment_text.__doc__)
246-
sentiment_entities_text_parser.add_argument('text')
222+
classify_text_parser = subparsers.add_parser(
223+
'classify-text', help=classify_text.__doc__)
224+
classify_text_parser.add_argument('text')
247225

248-
sentiment_entities_file_parser = subparsers.add_parser(
249-
'sentiment-entities-file', help=entity_sentiment_file.__doc__)
250-
sentiment_entities_file_parser.add_argument('gcs_uri')
226+
classify_text_parser = subparsers.add_parser(
227+
'classify-file', help=classify_file.__doc__)
228+
classify_text_parser.add_argument('gcs_uri')
251229

252230
sentiment_text_parser = subparsers.add_parser(
253231
'sentiment-text', help=sentiment_text.__doc__)
@@ -287,7 +265,7 @@ def entity_sentiment_file(gcs_uri):
287265
syntax_text(args.text)
288266
elif args.command == 'syntax-file':
289267
syntax_file(args.gcs_uri)
290-
elif args.command == 'sentiment-entities-text':
291-
entity_sentiment_text(args.text)
292-
elif args.command == 'sentiment-entities-file':
293-
entity_sentiment_file(args.gcs_uri)
268+
elif args.command == 'classify-text':
269+
classify_text(args.text)
270+
elif args.command == 'classify-file':
271+
classify_file(args.gcs_uri)

language/cloud-client/v1beta2/snippets_test.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
BUCKET = os.environ['CLOUD_STORAGE_BUCKET']
2121
TEST_FILE_URL = 'gs://{}/text.txt'.format(BUCKET)
22+
LONG_TEST_FILE_URL = 'gs://{}/android_text.txt'.format(BUCKET)
2223

2324

2425
def test_sentiment_text(capsys):
@@ -68,21 +69,18 @@ def test_syntax_file(capsys):
6869
assert 'NOUN: President' in out
6970

7071

71-
def test_sentiment_entities_text(capsys):
72-
snippets.entity_sentiment_text(
73-
'President Obama is speaking at the White House.')
72+
def test_classify_text(capsys):
73+
snippets.classify_text(
74+
'Android is a mobile operating system developed by Google, '
75+
'based on the Linux kernel and designed primarily for touchscreen '
76+
'mobile devices such as smartphones and tablets.')
7477
out, _ = capsys.readouterr()
75-
assert 'Content : White House' in out
76-
77-
78-
def test_sentiment_entities_file(capsys):
79-
snippets.entity_sentiment_file(TEST_FILE_URL)
80-
out, _ = capsys.readouterr()
81-
assert 'Content : White House' in out
78+
assert 'name' in out
79+
assert '/Computers & Electronics' in out
8280

8381

84-
def test_sentiment_entities_utf(capsys):
85-
snippets.entity_sentiment_text(
86-
'foo→bar')
82+
def test_classify_file(capsys):
83+
snippets.classify_file(LONG_TEST_FILE_URL)
8784
out, _ = capsys.readouterr()
88-
assert 'Begin Offset : 4' in out
85+
assert 'name' in out
86+
assert '/Computers & Electronics' in out

0 commit comments

Comments
 (0)