Skip to content

Feature/translation - add an functionality to translate event data etc. #167

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions app/database/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class User(Base):
email = Column(String, unique=True, nullable=False)
password = Column(String, nullable=False)
full_name = Column(String)
language = Column(String)
description = Column(String, default="Happy new user!")
avatar = Column(String, default="profile.png")
telegram_id = Column(String, unique=True)
Expand Down
82 changes: 82 additions & 0 deletions app/internal/translation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from typing import Optional

from iso639 import languages
from textblob import TextBlob, download_corpora
from textblob.exceptions import NotTranslated

from app.database.database import SessionLocal
from loguru import logger

from app.routers.user import get_users

download_corpora.download_all()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe change it to a lazy call? (so we will call it in the first time it's needed and not always)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the same as other nltk uses in other pull requests
also will only take time on the first time
after that is fast



def translate_text(text: str,
target_lang: str,
original_lang: Optional[str] = None
) -> str:
"""
Translate text to the target language
optionally given the original language
"""
if not text.strip():
return ""
if original_lang is None:
original_lang = _detect_text_language(text)
else:
original_lang = _lang_full_to_short(original_lang)

if original_lang == _lang_full_to_short(target_lang):
return text

try:
return str(TextBlob(text).translate(
from_lang=original_lang,
to=_lang_full_to_short(target_lang)))
except NotTranslated:
return text


def _detect_text_language(text: str) -> str:
"""
Gets some text and returns the language it is in
Uses external API
"""
return str(TextBlob(text).detect_language())


def _get_user_language(user_id: int, session: SessionLocal) -> str:
"""
Gets a user-id and returns the language he speaks
Uses the DB"""
try:
user = get_users(session, id=user_id)[0]
except IndexError:
logger.exception(
"User was not found in the database."
)
return ""
else:
return user.language


def translate_text_for_user(text: str,
session: SessionLocal,
user_id: int) -> str:
"""
Gets a text and a user-id and returns the text,
translated to the language the user speaks
"""
target_lang = _get_user_language(user_id, session)
if not target_lang:
return text
return translate_text(text, target_lang)


def _lang_full_to_short(full_lang: str) -> str:
"""
Gets the full language name and
converts it to a two-letter language name
"""
return languages.get(name=full_lang.capitalize()).alpha2
1 change: 0 additions & 1 deletion app/routers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import nltk


nltk.download('punkt')
8 changes: 5 additions & 3 deletions app/routers/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ def get_placeholder_user():
email='[email protected]',
password='1a2s3d4f5g6',
full_name='My Name',
telegram_id=''
telegram_id='',
language='english',
)


Expand Down Expand Up @@ -110,6 +111,7 @@ async def upload_user_photo(
# Save to database
user.avatar = await process_image(pic, user)
session.commit()

finally:
url = router.url_path_for("profile")
return RedirectResponse(url=url, status_code=HTTP_302_FOUND)
Expand Down Expand Up @@ -145,6 +147,6 @@ async def process_image(image, user):
def get_image_crop_area(width, height):
if width > height:
delta = (width - height) // 2
return (delta, 0, width - delta, height)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the sograim are marked redundant, but has no effect on code

return delta, 0, width - delta, height
delta = (height - width) // 2
return (0, delta, width, width + delta)
return 0, delta, width, width + delta
7 changes: 6 additions & 1 deletion app/routers/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,18 @@
from app.internal.utils import save


def create_user(username, password, email, session: Session) -> User:
def create_user(username: str,
password: str,
email: str,
language: str,
session: Session) -> User:
"""Creates and saves a new user."""

user = User(
username=username,
password=password,
email=email,
language=language
)
save(user, session=session)
return user
Expand Down
Binary file modified requirements.txt
Binary file not shown.
154 changes: 154 additions & 0 deletions tests/test_translation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import pytest
from iso639 import languages
from textblob import TextBlob

from app.internal.translation import (
translate_text,
translate_text_for_user,
_get_user_language,
_lang_full_to_short,
_detect_text_language
)


@pytest.mark.parametrize("text, target_lang, original_lang",
[("Привет мой друг", "english", "russian"),
("Hola mi amigo", "english", "spanish"),
("Bonjour, mon ami", "english", "french"),
("Hallo, mein Freund", "english", "german"),
])
def test_translate_text_with_original_lang(text, target_lang, original_lang):
answer = translate_text(text, target_lang, original_lang)
assert "Hello my friend" == answer
assert TextBlob(text).detect_language() == languages.get(
name=original_lang.capitalize()).alpha2
assert TextBlob(answer).detect_language() == languages.get(
name=target_lang.capitalize()).alpha2


@pytest.mark.parametrize("text, target_lang",
[("Привет мой друг", "english"),
("Bonjour, mon ami", "english"),
("Hallo, mein Freund", "english"),
])
def test_translate_text_without_original_lang(text, target_lang):
answer = translate_text(text, target_lang)
assert "Hello my friend" == answer
assert TextBlob(answer).detect_language() == languages.get(
name=target_lang.capitalize()).alpha2


@pytest.mark.parametrize("text, target_lang, original_lang",
[("Привет мой друг", "russian", "russian"),
("Hola mi amigo", "spanish", "spanish"),
("Bonjour, mon ami", "french", "french"),
("Hallo, mein Freund", "german", "german"),
("Ciao amico", "italian", "italian")
])
def test_translate_text_with_same_original_target_lang_with_original_lang(
text,
target_lang,
original_lang):
answer = translate_text(text, target_lang, original_lang)
assert answer == text


@pytest.mark.parametrize("text, target_lang",
[("Привет мой друг", "russian"),
("Hola mi amigo", "spanish"),
("Bonjour, mon ami", "french"),
("Hallo, mein Freund", "german"),
("Ciao amico", "italian")
])
def test_translate_text_with_same_original_target_lang_without_original_lang(
text,
target_lang):
answer = translate_text(text, target_lang)
assert answer == text


def test_translate_text_without_text_with_original_target_lang():
answer = translate_text("", "english", "russian")
assert answer == ""


def test_translate_text_without_text_without_original_lang():
answer = translate_text("", "english")
assert answer == ""


def test_lang_short_to_full():
answer = _lang_full_to_short("english")
assert answer == "en"


def test_get_user_language(user, session):
user_id = user.id
answer = _get_user_language(user_id, session=session)
assert user_id == 1
assert answer.lower() == "english"


@pytest.mark.parametrize("text", ["Привет мой друг",
"Bonjour, mon ami",
"Hello my friend"]
)
def test_translate_text_for_good_user(text, user, session):
user_id = user.id
answer = translate_text_for_user(text, session, user_id)
assert answer == "Hello my friend"


def test_translate_text_for_bed_user(user, session):
user_id = user.id
answer = translate_text_for_user("Привет мой друг", session, user_id + 1)
assert answer == "Привет мой друг"


def test_detect_text_language():
answer = _detect_text_language("Hello my friend")
assert answer == "en"


@pytest.mark.parametrize("text, target_lang, original_lang",
[("Hoghhflaff", "english", "spanish"),
("Bdonfdjourr", "english", "french"),
("Hafdllnnc", "english", "german"),
])
def test_translate_text_with_text_impossible_to_translate(
text,
target_lang,
original_lang):
answer = translate_text(text, target_lang, original_lang)
assert answer == text


@pytest.mark.parametrize("text, target_lang, original_lang",
[("@Здравствуй#мой$друг!", "english", "russian"),
("@Hola#mi$amigo!", "english", "spanish"),
("@Bonjour#mon$ami!", "english", "french"),
("@Hallo#mein$Freund!", "english", "german"),
])
def test_translate_text_with_symbols(text, target_lang, original_lang):
answer = translate_text(text, target_lang, original_lang)
assert "@ Hello # my $ friend!" == answer


@pytest.mark.parametrize("text, target_lang, original_lang",
[("Привет мой друг", "italian", "spanish"),
("Hola mi amigo", "english", "russian"),
("Bonjour, mon ami", "russian", "german"),
("Ciao amico", "french", "german")
])
def test_translate_text_with_with_incorrect_lang(
text,
target_lang,
original_lang):
answer = translate_text(text, target_lang, original_lang)
assert answer == text


def test_get_user_language_for_bed_user(user, session):
user_id = user.id + 1
answer = _get_user_language(user_id, session=session)
assert not answer
2 changes: 2 additions & 0 deletions tests/test_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ def test_create_user(self, session):
username='new_test_username',
password='new_test_password',
email='[email protected]',
language='english'
)
assert user.username == 'new_test_username'
assert user.password == 'new_test_password'
assert user.email == '[email protected]'
assert user.language == 'english'
session.delete(user)
session.commit()

Expand Down
2 changes: 2 additions & 0 deletions tests/user_fixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def user(session: Session) -> User:
username='test_username',
password='test_password',
email='[email protected]',
language='english'
)
yield test_user
delete_instance(session, test_user)
Expand All @@ -24,6 +25,7 @@ def sender(session: Session) -> User:
username='sender_username',
password='sender_password',
email='[email protected]',
language='english'
)
yield sender
delete_instance(session, sender)