Skip to content

Fixes for text encoding #913

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 22 additions & 12 deletions language/cloud-client/v1/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,16 @@
import argparse

from google.cloud import language
import six


def sentiment_text(text):
"""Detects sentiment in the text."""
language_client = language.Client()

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

# Instantiates a plain text document.
document = language_client.document_from_text(text)

Expand Down Expand Up @@ -60,6 +64,9 @@ def entities_text(text):
"""Detects entities in the text."""
language_client = language.Client()

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

# Instantiates a plain text document.
document = language_client.document_from_text(text)

Expand All @@ -69,11 +76,11 @@ def entities_text(text):

for entity in entities:
print('=' * 20)
print('{:<16}: {}'.format('name', entity.name))
print('{:<16}: {}'.format('type', entity.entity_type))
print('{:<16}: {}'.format('metadata', entity.metadata))
print('{:<16}: {}'.format('salience', entity.salience))
print('{:<16}: {}'.format('wikipedia_url',
print(u'{:<16}: {}'.format('name', entity.name))
print(u'{:<16}: {}'.format('type', entity.entity_type))
print(u'{:<16}: {}'.format('metadata', entity.metadata))
print(u'{:<16}: {}'.format('salience', entity.salience))
print(u'{:<16}: {}'.format('wikipedia_url',
entity.metadata.get('wikipedia_url', '-')))


Expand All @@ -90,18 +97,21 @@ def entities_file(gcs_uri):

for entity in entities:
print('=' * 20)
print('{:<16}: {}'.format('name', entity.name))
print('{:<16}: {}'.format('type', entity.entity_type))
print('{:<16}: {}'.format('metadata', entity.metadata))
print('{:<16}: {}'.format('salience', entity.salience))
print('{:<16}: {}'.format('wikipedia_url',
print(u'{:<16}: {}'.format('name', entity.name))
print(u'{:<16}: {}'.format('type', entity.entity_type))
print(u'{:<16}: {}'.format('metadata', entity.metadata))
print(u'{:<16}: {}'.format('salience', entity.salience))
print(u'{:<16}: {}'.format('wikipedia_url',
entity.metadata.get('wikipedia_url', '-')))


def syntax_text(text):
"""Detects syntax in the text."""
language_client = language.Client()

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

# Instantiates a plain text document.
document = language_client.document_from_text(text)

Expand All @@ -110,7 +120,7 @@ def syntax_text(text):
tokens = document.analyze_syntax().tokens

for token in tokens:
print('{}: {}'.format(token.part_of_speech, token.text_content))
print(u'{}: {}'.format(token.part_of_speech, token.text_content))


def syntax_file(gcs_uri):
Expand All @@ -125,7 +135,7 @@ def syntax_file(gcs_uri):
tokens = document.analyze_syntax().tokens

for token in tokens:
print('{}: {}'.format(token.part_of_speech, token.text_content))
print(u'{}: {}'.format(token.part_of_speech, token.text_content))


if __name__ == '__main__':
Expand Down
69 changes: 41 additions & 28 deletions language/cloud-client/v1beta2/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,25 @@
from google.cloud.gapic.language.v1beta2 import enums
from google.cloud.gapic.language.v1beta2 import language_service_client
from google.cloud.proto.language.v1beta2 import language_service_pb2
import six


def sentiment_text(text):
"""Detects sentiment in the text."""
language_client = language.Client(api_version='v1beta2')

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

# Instantiates a plain text document.
document = language_client.document_from_text(text)

# Detects sentiment in the document. You can also analyze HTML with:
# document.doc_type == language.Document.HTML
sentiment = document.analyze_sentiment().sentiment

print('Score: {}'.format(sentiment.score))
print('Magnitude: {}'.format(sentiment.magnitude))
print(u'Score: {}'.format(sentiment.score))
print(u'Magnitude: {}'.format(sentiment.magnitude))


def sentiment_file(gcs_uri):
Expand All @@ -55,14 +59,17 @@ def sentiment_file(gcs_uri):
# document.doc_type == language.Document.HTML
sentiment = document.analyze_sentiment().sentiment

print('Score: {}'.format(sentiment.score))
print('Magnitude: {}'.format(sentiment.magnitude))
print(u'Score: {}'.format(sentiment.score))
print(u'Magnitude: {}'.format(sentiment.magnitude))


def entities_text(text):
"""Detects entities in the text."""
language_client = language.Client(api_version='v1beta2')

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

# Instantiates a plain text document.
document = language_client.document_from_text(text)

Expand All @@ -71,12 +78,12 @@ def entities_text(text):
entities = document.analyze_entities().entities

for entity in entities:
print('=' * 20)
print('{:<16}: {}'.format('name', entity.name))
print('{:<16}: {}'.format('type', entity.entity_type))
print('{:<16}: {}'.format('metadata', entity.metadata))
print('{:<16}: {}'.format('salience', entity.salience))
print('{:<16}: {}'.format('wikipedia_url',
print(u'=' * 20)
print(u'{:<16}: {}'.format('name', entity.name))
print(u'{:<16}: {}'.format('type', entity.entity_type))
print(u'{:<16}: {}'.format('metadata', entity.metadata))
print(u'{:<16}: {}'.format('salience', entity.salience))
print(u'{:<16}: {}'.format('wikipedia_url',
entity.metadata.get('wikipedia_url', '-')))


Expand Down Expand Up @@ -105,6 +112,9 @@ def syntax_text(text):
"""Detects syntax in the text."""
language_client = language.Client(api_version='v1beta2')

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

# Instantiates a plain text document.
document = language_client.document_from_text(text)

Expand All @@ -113,7 +123,7 @@ def syntax_text(text):
tokens = document.analyze_syntax().tokens

for token in tokens:
print('{}: {}'.format(token.part_of_speech, token.text_content))
print(u'{}: {}'.format(token.part_of_speech, token.text_content))


def syntax_file(gcs_uri):
Expand All @@ -128,14 +138,17 @@ def syntax_file(gcs_uri):
tokens = document.analyze_syntax().tokens

for token in tokens:
print('{}: {}'.format(token.part_of_speech, token.text_content))
print(u'{}: {}'.format(token.part_of_speech, token.text_content))


def entity_sentiment_text(text):
"""Detects entity sentiment in the provided text."""
language_client = language_service_client.LanguageServiceClient()
document = language_service_pb2.Document()

if isinstance(text, six.binary_type):
text = text.decode('utf-8')

document.content = text.encode('utf-8')
document.type = enums.Document.Type.PLAIN_TEXT

Expand All @@ -144,15 +157,15 @@ def entity_sentiment_text(text):

for entity in result.entities:
print('Mentions: ')
print('Name: "{}"'.format(entity.name))
print(u'Name: "{}"'.format(entity.name))
for mention in entity.mentions:
print(' Begin Offset : {}'.format(mention.text.begin_offset))
print(' Content : {}'.format(mention.text.content))
print(' Magnitude : {}'.format(mention.sentiment.magnitude))
print(' Sentiment : {}'.format(mention.sentiment.score))
print(' Type : {}'.format(mention.type))
print('Salience: {}'.format(entity.salience))
print('Sentiment: {}\n'.format(entity.sentiment))
print(u' Begin Offset : {}'.format(mention.text.begin_offset))
print(u' Content : {}'.format(mention.text.content))
print(u' Magnitude : {}'.format(mention.sentiment.magnitude))
print(u' Sentiment : {}'.format(mention.sentiment.score))
print(u' Type : {}'.format(mention.type))
print(u'Salience: {}'.format(entity.salience))
print(u'Sentiment: {}\n'.format(entity.sentiment))


def entity_sentiment_file(gcs_uri):
Expand All @@ -167,15 +180,15 @@ def entity_sentiment_file(gcs_uri):
document, enums.EncodingType.UTF8)

for entity in result.entities:
print('Name: "{}"'.format(entity.name))
print(u'Name: "{}"'.format(entity.name))
for mention in entity.mentions:
print(' Begin Offset : {}'.format(mention.text.begin_offset))
print(' Content : {}'.format(mention.text.content))
print(' Magnitude : {}'.format(mention.sentiment.magnitude))
print(' Sentiment : {}'.format(mention.sentiment.score))
print(' Type : {}'.format(mention.type))
print('Salience: {}'.format(entity.salience))
print('Sentiment: {}\n'.format(entity.sentiment))
print(u' Begin Offset : {}'.format(mention.text.begin_offset))
print(u' Content : {}'.format(mention.text.content))
print(u' Magnitude : {}'.format(mention.sentiment.magnitude))
print(u' Sentiment : {}'.format(mention.sentiment.score))
print(u' Type : {}'.format(mention.type))
print(u'Salience: {}'.format(entity.salience))
print(u'Sentiment: {}\n'.format(entity.sentiment))


if __name__ == '__main__':
Expand Down
10 changes: 10 additions & 0 deletions language/cloud-client/v1beta2/snippets_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
# Copyright 2017 Google, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -26,6 +27,15 @@ def test_sentiment_text(capsys):
assert 'Score: 0' in out


def test_sentiment_utf(capsys):
snippets.sentiment_text(
u'1er site d\'information. Les articles du journal et toute l\'' +
u'actualité en continu : International, France, Société, Economie, ' +
u'Culture, Environnement')
out, _ = capsys.readouterr()
assert 'Score: 0' in out


def test_sentiment_file(capsys):
snippets.sentiment_file(TEST_FILE_URL)
out, _ = capsys.readouterr()
Expand Down