Skip to content

Commit 955916b

Browse files
jerjoutelpirion
authored andcommitted
Speech sample using microphone [(#1013)](#1013)
* Add sample for transcribing from microphone. * Remove error handling that probably won't work * Add test. * Fix lint. * Increment copyright date
1 parent 940cce1 commit 955916b

File tree

3 files changed

+237
-0
lines changed

3 files changed

+237
-0
lines changed

speech/snippets/resources/quit.raw

160 KB
Binary file not shown.
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2017 Google Inc. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""Google Cloud Speech API sample application using the streaming API.
18+
19+
NOTE: This module requires the additional dependency `pyaudio`. To install
20+
using pip:
21+
22+
pip install pyaudio
23+
24+
Example usage:
25+
python transcribe_streaming_mic.py
26+
"""
27+
28+
# [START import_libraries]
29+
from __future__ import division
30+
31+
import re
32+
import sys
33+
34+
from google.cloud import speech
35+
import pyaudio
36+
from six.moves import queue
37+
# [END import_libraries]
38+
39+
# Audio recording parameters
40+
RATE = 16000
41+
CHUNK = int(RATE / 10) # 100ms
42+
43+
44+
class MicAsFile(object):
45+
"""Opens a recording stream as a file-like object."""
46+
def __init__(self, rate, chunk):
47+
self._rate = rate
48+
self._chunk = chunk
49+
50+
# Create a thread-safe buffer of audio data
51+
self._buff = queue.Queue()
52+
self.closed = True
53+
54+
def __enter__(self):
55+
self._audio_interface = pyaudio.PyAudio()
56+
self._audio_stream = self._audio_interface.open(
57+
format=pyaudio.paInt16,
58+
# The API currently only supports 1-channel (mono) audio
59+
# https://goo.gl/z757pE
60+
channels=1, rate=self._rate,
61+
input=True, frames_per_buffer=self._chunk,
62+
# Run the audio stream asynchronously to fill the buffer object.
63+
# This is necessary so that the input device's buffer doesn't
64+
# overflow while the calling thread makes network requests, etc.
65+
stream_callback=self._fill_buffer,
66+
)
67+
68+
self.closed = False
69+
70+
return self
71+
72+
def __exit__(self, type, value, traceback):
73+
self._audio_stream.stop_stream()
74+
self._audio_stream.close()
75+
self.closed = True
76+
# Flush out the read, just in case
77+
self._buff.put(None)
78+
self._audio_interface.terminate()
79+
80+
def _fill_buffer(self, in_data, frame_count, time_info, status_flags):
81+
"""Continuously collect data from the audio stream, into the buffer."""
82+
self._buff.put(in_data)
83+
return None, pyaudio.paContinue
84+
85+
def read(self, chunk_size):
86+
if self.closed:
87+
return
88+
89+
# Use a blocking get() to ensure there's at least one chunk of data.
90+
data = [self._buff.get()]
91+
92+
# Now consume whatever other data's still buffered.
93+
while True:
94+
try:
95+
data.append(self._buff.get(block=False))
96+
except queue.Empty:
97+
break
98+
99+
if self.closed:
100+
return
101+
return b''.join(data)
102+
# [END audio_stream]
103+
104+
105+
def listen_print_loop(results_gen):
106+
"""Iterates through server responses and prints them.
107+
108+
The results_gen passed is a generator that will block until a response
109+
is provided by the server. When the transcription response comes, print it.
110+
111+
In this case, responses are provided for interim results as well. If the
112+
response is an interim one, print a line feed at the end of it, to allow
113+
the next result to overwrite it, until the response is a final one. For the
114+
final one, print a newline to preserve the finalized transcription.
115+
"""
116+
num_chars_printed = 0
117+
for result in results_gen:
118+
if not result.alternatives:
119+
continue
120+
121+
# Display the top transcription
122+
transcript = result.transcript
123+
124+
# Display interim results, but with a carriage return at the end of the
125+
# line, so subsequent lines will overwrite them.
126+
#
127+
# If the previous result was longer than this one, we need to print
128+
# some extra spaces to overwrite the previous result
129+
overwrite_chars = ' ' * max(0, num_chars_printed - len(transcript))
130+
131+
if not result.is_final:
132+
sys.stdout.write(transcript + overwrite_chars + '\r')
133+
sys.stdout.flush()
134+
135+
num_chars_printed = len(transcript)
136+
137+
else:
138+
print(transcript + overwrite_chars)
139+
140+
# Exit recognition if any of the transcribed phrases could be
141+
# one of our keywords.
142+
if re.search(r'\b(exit|quit)\b', transcript, re.I):
143+
print('Exiting..')
144+
break
145+
146+
num_chars_printed = 0
147+
148+
149+
def main():
150+
speech_client = speech.Client()
151+
152+
with MicAsFile(RATE, CHUNK) as stream:
153+
audio_sample = speech_client.sample(
154+
stream=stream,
155+
encoding=speech.encoding.Encoding.LINEAR16,
156+
sample_rate_hertz=RATE)
157+
# See http://g.co/cloud/speech/docs/languages
158+
# for a list of supported languages.
159+
language_code = 'en-US' # a BCP-47 language tag
160+
results_gen = audio_sample.streaming_recognize(
161+
language_code=language_code, interim_results=True)
162+
163+
# Now, put the transcription responses to use.
164+
listen_print_loop(results_gen)
165+
166+
167+
if __name__ == '__main__':
168+
main()
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Copyright 2017, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import os
15+
import re
16+
import threading
17+
import time
18+
19+
import mock
20+
21+
RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
22+
23+
24+
class MockPyAudio(object):
25+
def __init__(self, audio_filename):
26+
self.audio_filename = audio_filename
27+
28+
def __call__(self, *args):
29+
return self
30+
31+
def open(self, stream_callback, rate, *args, **kwargs):
32+
self.rate = rate
33+
self.closed = threading.Event()
34+
self.stream_thread = threading.Thread(
35+
target=self.stream_audio, args=(
36+
self.audio_filename, stream_callback, self.closed))
37+
self.stream_thread.start()
38+
return self
39+
40+
def close(self):
41+
self.closed.set()
42+
43+
def stop_stream(self):
44+
pass
45+
46+
def terminate(self):
47+
pass
48+
49+
def stream_audio(self, audio_filename, callback, closed, num_frames=512):
50+
with open(audio_filename, 'rb') as audio_file:
51+
while not closed.is_set():
52+
# Approximate realtime by sleeping for the appropriate time for
53+
# the requested number of frames
54+
time.sleep(num_frames / float(self.rate))
55+
# audio is 16-bit samples, whereas python byte is 8-bit
56+
num_bytes = 2 * num_frames
57+
chunk = audio_file.read(num_bytes) or b'\0' * num_bytes
58+
callback(chunk, None, None, None)
59+
60+
61+
@mock.patch.dict('sys.modules', pyaudio=mock.MagicMock(
62+
PyAudio=MockPyAudio(os.path.join(RESOURCES, 'quit.raw'))))
63+
def test_main(capsys):
64+
import transcribe_streaming_mic
65+
66+
transcribe_streaming_mic.main()
67+
out, err = capsys.readouterr()
68+
69+
assert re.search(r'quit', out, re.DOTALL | re.I)

0 commit comments

Comments
 (0)