feat(speech): Add Speech-to-Text On-Prem sample (#4223)

vinnysenthil · leahecole · web-flow · commit e67b0147513b · 2020-07-22T15:52:44.000-07:00
* feat: adds speech-to-text onprem sample

* Remove client options and change to f-strings

* Move to onprem folder, add resource and README

Co-authored-by: Leah E. Cole &lt;6719667+leahecole@users.noreply.github.com&gt;
diff --git a/speech/cloud-client/resources/two_channel_16k.wav b/speech/cloud-client/resources/two_channel_16k.wav
diff --git a/speech/cloud-client/transcribe_onprem/README.rst b/speech/cloud-client/transcribe_onprem/README.rst
@@ -0,0 +1,111 @@
+.. This file is automatically generated. Do not edit this file directly.
+
+Google Cloud Speech-to-Text On-Prem Python Samples
+===============================================================================
+
+
+.. warning:: This product is only available to customers that have been granted access. Please `contact us`_ to request access to the Speech-to-Text On-Prem feature.
+
+This directory contains samples for `Google Cloud Speech-to-Text On-Prem`_. Speech-to-Text On-Prem enables easy integration of Google speech recognition technologies into your on-prem solution.
+
+
+.. _Google Cloud Speech-to-Text On-Prem:  https://cloud.google.com/speech-to-text/on-prem/priv/docs
+
+.. _contact us: https://cloud.google.com/contact
+
+.. _Google Cloud Speech-to-Text On-Prem:  https://cloud.google.com/speech-to-text/on-prem/priv/docs
+
+Setup
+-------------------------------------------------------------------------------
+
+
+Prepare and Deploy API
++++++++++++++++++++++++
+
+This sample requires you to have a Kubernetes cluster with the Speech-to-Text On-Prem service deployed. Follow the quickstart steps listed below:
+
+#. `Setup IAM, Kubernetes, Billing`_
+
+#. `Deploy the API using the UI or command line`_
+
+#. `Query the API to ensure it's working`_
+
+
+.. _Query the API to ensure it's working:
+    https://cloud.google.com/speech-to-text/on-prem/priv/docs/query
+
+.. _Deploy the API using the UI or command line:
+    https://cloud.google.com/speech-to-text/on-prem/priv/docs/deploy
+
+.. _Setup IAM, Kubernetes, Billing:
+    https://cloud.google.com/speech-to-text/on-prem/priv/docs/before-you-begin
+
+Install Dependencies
+++++++++++++++++++++
+
+#. Clone python-docs-samples and change directory to the sample directory you want to use.
+
+    .. code-block:: bash
+
+        $ git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git
+        $ cd python-doc-samples/speech/cloud-client
+
+#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions.
+
+   .. _Python Development Environment Setup Guide:
+       https://cloud.google.com/python/setup
+
+#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+.
+
+    .. code-block:: bash
+
+        $ virtualenv env
+        $ source env/bin/activate
+
+#. Install the dependencies needed to run the samples.
+
+    .. code-block:: bash
+
+        $ pip install -r requirements.txt
+
+.. _pip: https://pip.pypa.io/
+.. _virtualenv: https://virtualenv.pypa.io/
+
+Samples
+-------------------------------------------------------------------------------
+
+transcribe_onprem
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+You can run this sample one of two ways, using a **public IP**:
+
+.. code-block:: bash
+
+    # Using a Public IP
+    $ python transcribe_onprem.py --file_path="../resources/two_channel_16k.wav" --api_endpoint=${PUBLIC_IP}:443
+
+or by using a **cluster level IP**:
+
+.. code-block:: bash
+
+    # Using a cluster level IP
+    $ kubectl port-forward -n $NAMESPACE $POD 10000:443
+    $ python transcribe_onprem.py --file_path="../resources/two_channel_16k.wav" --api_endpoint="0.0.0.0:10000"
+
+The client library
+-------------------------------------------------------------------------------
+
+This sample uses the `Google Cloud Client Library for Python`_.
+You can read the documentation for more details on API usage and use GitHub
+to `browse the source`_ and  `report issues`_.
+
+.. _Google Cloud Client Library for Python:
+    https://googlecloudplatform.github.io/google-cloud-python/
+.. _browse the source:
+    https://github.com/GoogleCloudPlatform/google-cloud-python
+.. _report issues:
+    https://github.com/GoogleCloudPlatform/google-cloud-python/issues
+
+
+.. _Google Cloud SDK: https://cloud.google.com/sdk/
diff --git a/speech/cloud-client/transcribe_onprem/transcribe_onprem.py b/speech/cloud-client/transcribe_onprem/transcribe_onprem.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+
+# Copyright 2020, Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+
+# [START speech_transcribe_onprem]
+def transcribe_onprem(local_file_path, api_endpoint):
+    """
+    Transcribe a short audio file using synchronous speech recognition on-prem
+
+    Args:
+      local_file_path: The path to local audio file, e.g. /path/audio.wav
+      api_endpoint: Endpoint to call for speech recognition, e.g. 0.0.0.0:10000
+    """
+    from google.cloud import speech_v1p1beta1
+    from google.cloud.speech_v1p1beta1 import enums
+    import grpc
+    import io
+
+    # api_endpoint = '0.0.0.0:10000'
+    # local_file_path = '../resources/two_channel_16k.raw'
+
+    # Create a gRPC channel to your server
+    channel = grpc.insecure_channel(target=api_endpoint)
+
+    client = speech_v1p1beta1.SpeechClient(channel=channel)
+
+    # The language of the supplied audio
+    language_code = "en-US"
+
+    # Sample rate in Hertz of the audio data sent
+    sample_rate_hertz = 16000
+
+    # Encoding of audio data sent. This sample sets this explicitly.
+    # This field is optional for FLAC and WAV audio formats.
+    encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16
+    config = {
+        "encoding": encoding,
+        "language_code": language_code,
+        "sample_rate_hertz": sample_rate_hertz,
+    }
+    with io.open(local_file_path, "rb") as f:
+        content = f.read()
+    audio = {"content": content}
+
+    response = client.recognize(config, audio)
+    for result in response.results:
+        # First alternative is the most probable result
+        alternative = result.alternatives[0]
+        print(f"Transcript: {alternative.transcript}")
+# [END speech_transcribe_onprem]
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    parser.add_argument(
+        "--file_path",
+        required=True,
+        help="Path to local audio file to be recognized, e.g. /path/audio.wav",
+    )
+    parser.add_argument(
+        "--api_endpoint",
+        required=True,
+        help="Endpoint to call for speech recognition, e.g. 0.0.0.0:10000",
+    )
+
+    args = parser.parse_args()
+    transcribe_onprem(
+        local_file_path=args.file_path, api_endpoint=args.api_endpoint
+    )