diff --git a/python/.gitignore b/python/.gitignore deleted file mode 100644 index d2457101..00000000 --- a/python/.gitignore +++ /dev/null @@ -1,154 +0,0 @@ -# Python gitignore from: https://github.com/github/gitignore/blob/main/Python.gitignore - -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ diff --git a/python/text-to-speech/main.py b/python/text-to-speech/main.py index 887e2387..b593667d 100644 --- a/python/text-to-speech/main.py +++ b/python/text-to-speech/main.py @@ -1,3 +1,4 @@ +"""Synthesize text to speech using Google, Azure and AWS API.""" # Standard library import base64 import abc @@ -9,9 +10,6 @@ import azure.cognitiveservices.speech as speechsdk import boto3 -# Local imports -import secret - class TextToSpeech(): """Base class for Text to Speech.""" @@ -20,7 +18,7 @@ def __init__(self, req: requests) -> None: self.validate_request(req) @abc.abstractmethod - def validate_request(self, req: requests): + def validate_request(self, req: requests) -> None: """Abstract validate request method for providers.""" @abc.abstractmethod @@ -47,7 +45,7 @@ def validate_request(self, req: requests) -> None: self.api_key = req.variables.get("API_KEY") self.project_id = req.variables.get("PROJECT_ID") - def speech(self, text, language) -> bytes: + def speech(self, text: str, language: str) -> bytes: """ Converts the given text into speech with the Google text to speech API. @@ -59,25 +57,33 @@ def speech(self, text, language) -> bytes: bytes: The synthezied speech in bytes. """ # Instantiate a client. - client = texttospeech.TextToSpeechClient(client_options={"api_key": self.api_key, "quota_project_id": self.project_id}) + client = texttospeech.TextToSpeechClient(client_options={ + "api_key": self.api_key, + "quota_project_id": self.project_id, + }) # Set the text input to be synthesized. synthesis_input = texttospeech.SynthesisInput(text=text) - # Build the voice request, select the language code ("en-US") and the ssml voice gender is neutral. - voice = texttospeech.VoiceSelectionParams(language_code=language, ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL) + # Build the voice request, select the language code ("en-US") + # and the ssml voice gender is neutral. + voice = texttospeech.VoiceSelectionParams( + language_code=language, + ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL + ) # Select the type of audio file you want returned. - audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3) - # Perform the text-to-speech request on the text input with the selected voice parameters and audio file type. - response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) + audio_config = texttospeech.AudioConfig( + audio_encoding=texttospeech.AudioEncoding.MP3) + # Perform the text-to-speech request on the text input + # with the selected voice parameters and audio file type. + response = client.synthesize_speech( + input=synthesis_input, + voice=voice, + audio_config=audio_config + ) return response.audio_content class Azure(TextToSpeech): - """ - This class represents the implementation of Azure text to speech. - """ - api_key = None - region_key = None - + """This class represents the implementation of Azure text to speech."" def validate_request(self, req: requests) -> None: """ This method validates the request data for Azure text to speech. @@ -88,13 +94,13 @@ def validate_request(self, req: requests) -> None: ValueError: If any required value is missing or invalid. """ if not req.variables.get("API_KEY"): - raise ValueError("Missing API_KEY") + raise ValueError("Missing API_KEY.") if not req.variables.get("REGION_KEY"): - raise ValueError("Missing region") + raise ValueError("Missing REGION_KEY.") self.api_key = req.variables.get("API_KEY") self.region_key = req.variables.get("REGION_KEY") - def speech(self, text, language) -> bytes: + def speech(self, text: str, language: str) -> bytes: """ Converts the given text into speech with the Google text to speech API. @@ -106,22 +112,25 @@ def speech(self, text, language) -> bytes: bytes: The synthezied speech in bytes. """ # Set the speech configuration to speech key and region key. - speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.region_key) + speech_config = speechsdk.SpeechConfig( + subscription=self.api_key, + region=self.region_key + ) # The language of the voice that speaks. speech_config.speech_synthesis_language = language # Set the speech. - speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=None) + speech_synthesizer = speechsdk.SpeechSynthesizer( + speech_config=speech_config, + audio_config=None + ) # Response for the speech synthesizer. response = speech_synthesizer.speak_text_async(text).get().audio_data return response class AWS(TextToSpeech): - """ - This class represents the implementation of AWS text to speech. - """ - api_key = None - secret_api_key = None + """This class represents the implementation of AWS text to speech. """ + voice_id = "Joanna" def validate_request(self, req: requests) -> None: """ @@ -139,7 +148,7 @@ def validate_request(self, req: requests) -> None: self.api_key = req.payload.get("API_KEY") self.secret_api_key = req.payload.get("SECRET_API_KEY") - def speech(self, text, language) -> bytes: + def speech(self, text: str, language: str) -> bytes: """ Converts the given text into speech with the AWS text to speech API. @@ -150,12 +159,27 @@ def speech(self, text, language) -> bytes: Returns: bytes: The synthezied speech in bytes. """ - polly_client = boto3.Session(aws_access_key_id=self.api_key, aws_secret_access_key=self.secret_api_key, region_name="us-west-2").client("polly") - response = polly_client.synthesize_speech(VoiceId="Joanna", OutputFormat="mp3", Text=text, LanguageCode=language) - return response["AudioStream"].read().decode() + # Call polly client using boto3.session + polly_client = boto3.Session( + aws_access_key_id=self.api_key, + aws_secret_access_key=self.secret_api_key, + region_name="us-west-2" + ).client("polly") + # Get response from polly client + response = polly_client.synthesize_speech( + VoiceId=AWS.voice_id, + OutputFormat="mp3", + Text=text, + LanguageCode=language + ) + return response["Audiostream"].read() + + +list_of_providers = ["google", "azure", "aws"] + +def validate_common(req: requests) -> tuple[str]: -def validate_common(req: requests) -> tuple: """ This function validates the common fields in the request data that are independent of the text-to-speech provider. @@ -176,11 +200,15 @@ def validate_common(req: requests) -> tuple: # Check if variables is empty. if not req.variables: - raise ValueError("Missing variables.") + raise ValueError("Missing Variables.") # Check if provider is empty. if not req.payload.get("provider"): - raise ValueError("Missing provider") + raise ValueError("Missing Provider.") + + # Check if provider is in the list + if req.payload.get("provider").lower not in list_of_providers: + raise ValueError("Invalid Provider.") # Check if text is empty. if not req.payload.get("text"): @@ -191,14 +219,8 @@ def validate_common(req: requests) -> tuple: raise ValueError("Missing Language.") # Return the text and langage. - return (req.payload.get("text"), req.payload.get("language")) - - -IMPLEMENTATIONS = { - "google": Google, - "azure": Azure, - "aws": AWS, -} + return (req.payload.get("provider").lower(), + req.payload.get("text"), req.payload.get("language")) def main(req: requests, res: json) -> json: @@ -214,26 +236,28 @@ def main(req: requests, res: json) -> json: containing the synthesized audio in base64 encoded format. """ try: - text, language = validate_common(req) - provider_class = IMPLEMENTATIONS[req.payload.get("provider")](req) + provider, text, language = validate_common(req) + if provider == "google": + provider_class = Google(req) + elif provider == "azure": + provider_class = Azure(req) + else: + provider_class = AWS(req) except (ValueError) as value_error: return res.json({ "success": False, - "error": f"{value_error}", + "error": str(value_error), }) try: - audio_stream = provider_class.speech(text, language) + audio_bytes = provider_class.speech(text, language) except Exception as error: return res.json({ "success": False, "error": f"{type(error).__name__}: {error}", }) - # f = open("python/text-to-speech/results/azure.txt", "w") - # f.write(base64.b64encode(audio_stream).decode()) - return res.json({ "success": True, - "audio_stream": base64.b64encode(audio_stream).decode(), - }) \ No newline at end of file + "audio_bytes": base64.b64encode(audio_bytes).decode(), + }) diff --git a/python/text-to-speech/test_main.py b/python/text-to-speech/test_main.py index ee7cbeb7..62a7ab58 100644 --- a/python/text-to-speech/test_main.py +++ b/python/text-to-speech/test_main.py @@ -24,6 +24,7 @@ read_text(encoding="utf-8")) + def get_instance(provider, key, project_id): IMPLEMENTATIONS = { "google": main.Google,