Skip to content

Commit 97f39e7

Browse files
busunkim96dandhlee
authored andcommitted
feat!: migrate to microgenerator (#61)
1 parent f28c853 commit 97f39e7

28 files changed

+510
-444
lines changed

speech/microphone/transcribe_streaming_infinite.py

Lines changed: 45 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@
4141
SAMPLE_RATE = 16000
4242
CHUNK_SIZE = int(SAMPLE_RATE / 10) # 100ms
4343

44-
RED = '\033[0;31m'
45-
GREEN = '\033[0;32m'
46-
YELLOW = '\033[0;33m'
44+
RED = "\033[0;31m"
45+
GREEN = "\033[0;32m"
46+
YELLOW = "\033[0;33m"
4747

4848

4949
def get_current_time():
@@ -123,12 +123,14 @@ def generator(self):
123123
if self.bridging_offset > self.final_request_end_time:
124124
self.bridging_offset = self.final_request_end_time
125125

126-
chunks_from_ms = round((self.final_request_end_time -
127-
self.bridging_offset) / chunk_time)
126+
chunks_from_ms = round(
127+
(self.final_request_end_time - self.bridging_offset)
128+
/ chunk_time
129+
)
128130

129-
self.bridging_offset = (round((
130-
len(self.last_audio_input) - chunks_from_ms)
131-
* chunk_time))
131+
self.bridging_offset = round(
132+
(len(self.last_audio_input) - chunks_from_ms) * chunk_time
133+
)
132134

133135
for i in range(chunks_from_ms, len(self.last_audio_input)):
134136
data.append(self.last_audio_input[i])
@@ -157,7 +159,7 @@ def generator(self):
157159
except queue.Empty:
158160
break
159161

160-
yield b''.join(data)
162+
yield b"".join(data)
161163

162164

163165
def listen_print_loop(responses, stream):
@@ -201,35 +203,37 @@ def listen_print_loop(responses, stream):
201203
if result.result_end_time.nanos:
202204
result_nanos = result.result_end_time.nanos
203205

204-
stream.result_end_time = int((result_seconds * 1000)
205-
+ (result_nanos / 1000000))
206+
stream.result_end_time = int((result_seconds * 1000) + (result_nanos / 1000000))
206207

207-
corrected_time = (stream.result_end_time - stream.bridging_offset
208-
+ (STREAMING_LIMIT * stream.restart_counter))
208+
corrected_time = (
209+
stream.result_end_time
210+
- stream.bridging_offset
211+
+ (STREAMING_LIMIT * stream.restart_counter)
212+
)
209213
# Display interim results, but with a carriage return at the end of the
210214
# line, so subsequent lines will overwrite them.
211215

212216
if result.is_final:
213217

214218
sys.stdout.write(GREEN)
215-
sys.stdout.write('\033[K')
216-
sys.stdout.write(str(corrected_time) + ': ' + transcript + '\n')
219+
sys.stdout.write("\033[K")
220+
sys.stdout.write(str(corrected_time) + ": " + transcript + "\n")
217221

218222
stream.is_final_end_time = stream.result_end_time
219223
stream.last_transcript_was_final = True
220224

221225
# Exit recognition if any of the transcribed phrases could be
222226
# one of our keywords.
223-
if re.search(r'\b(exit|quit)\b', transcript, re.I):
227+
if re.search(r"\b(exit|quit)\b", transcript, re.I):
224228
sys.stdout.write(YELLOW)
225-
sys.stdout.write('Exiting...\n')
229+
sys.stdout.write("Exiting...\n")
226230
stream.closed = True
227231
break
228232

229233
else:
230234
sys.stdout.write(RED)
231-
sys.stdout.write('\033[K')
232-
sys.stdout.write(str(corrected_time) + ': ' + transcript + '\r')
235+
sys.stdout.write("\033[K")
236+
sys.stdout.write(str(corrected_time) + ": " + transcript + "\r")
233237

234238
stream.last_transcript_was_final = False
235239

@@ -238,37 +242,42 @@ def main():
238242
"""start bidirectional streaming from microphone input to speech API"""
239243

240244
client = speech.SpeechClient()
241-
config = speech.types.RecognitionConfig(
242-
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
245+
config = speech.RecognitionConfig(
246+
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
243247
sample_rate_hertz=SAMPLE_RATE,
244-
language_code='en-US',
245-
max_alternatives=1)
246-
streaming_config = speech.types.StreamingRecognitionConfig(
247-
config=config,
248-
interim_results=True)
248+
language_code="en-US",
249+
max_alternatives=1,
250+
)
251+
streaming_config = speech.StreamingRecognitionConfig(
252+
config=config, interim_results=True
253+
)
249254

250255
mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE)
251256
print(mic_manager.chunk_size)
252257
sys.stdout.write(YELLOW)
253258
sys.stdout.write('\nListening, say "Quit" or "Exit" to stop.\n\n')
254-
sys.stdout.write('End (ms) Transcript Results/Status\n')
255-
sys.stdout.write('=====================================================\n')
259+
sys.stdout.write("End (ms) Transcript Results/Status\n")
260+
sys.stdout.write("=====================================================\n")
256261

257262
with mic_manager as stream:
258263

259264
while not stream.closed:
260265
sys.stdout.write(YELLOW)
261-
sys.stdout.write('\n' + str(
262-
STREAMING_LIMIT * stream.restart_counter) + ': NEW REQUEST\n')
266+
sys.stdout.write(
267+
"\n" + str(STREAMING_LIMIT * stream.restart_counter) + ": NEW REQUEST\n"
268+
)
263269

264270
stream.audio_input = []
265271
audio_generator = stream.generator()
266272

267-
requests = (speech.types.StreamingRecognizeRequest(
268-
audio_content=content)for content in audio_generator)
273+
requests = (
274+
speech.StreamingRecognizeRequest(audio_content=content)
275+
for content in audio_generator
276+
)
269277

270-
responses = client.streaming_recognize(streaming_config,
271-
requests)
278+
responses = client.streaming_recognize(
279+
requests=requests, config=streaming_config
280+
)
272281

273282
# Now, put the transcription responses to use.
274283
listen_print_loop(responses, stream)
@@ -282,11 +291,11 @@ def main():
282291
stream.restart_counter = stream.restart_counter + 1
283292

284293
if not stream.last_transcript_was_final:
285-
sys.stdout.write('\n')
294+
sys.stdout.write("\n")
286295
stream.new_stream = True
287296

288297

289-
if __name__ == '__main__':
298+
if __name__ == "__main__":
290299

291300
main()
292301

speech/microphone/transcribe_streaming_mic.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,6 @@
3232
import sys
3333

3434
from google.cloud import speech
35-
from google.cloud.speech import enums
36-
from google.cloud.speech import types
3735
import pyaudio
3836
from six.moves import queue
3937

@@ -44,6 +42,7 @@
4442

4543
class MicrophoneStream(object):
4644
"""Opens a recording stream as a generator yielding the audio chunks."""
45+
4746
def __init__(self, rate, chunk):
4847
self._rate = rate
4948
self._chunk = chunk
@@ -58,8 +57,10 @@ def __enter__(self):
5857
format=pyaudio.paInt16,
5958
# The API currently only supports 1-channel (mono) audio
6059
# https://goo.gl/z757pE
61-
channels=1, rate=self._rate,
62-
input=True, frames_per_buffer=self._chunk,
60+
channels=1,
61+
rate=self._rate,
62+
input=True,
63+
frames_per_buffer=self._chunk,
6364
# Run the audio stream asynchronously to fill the buffer object.
6465
# This is necessary so that the input device's buffer doesn't
6566
# overflow while the calling thread makes network requests, etc.
@@ -104,7 +105,7 @@ def generator(self):
104105
except queue.Empty:
105106
break
106107

107-
yield b''.join(data)
108+
yield b"".join(data)
108109

109110

110111
def listen_print_loop(responses):
@@ -142,10 +143,10 @@ def listen_print_loop(responses):
142143
#
143144
# If the previous result was longer than this one, we need to print
144145
# some extra spaces to overwrite the previous result
145-
overwrite_chars = ' ' * (num_chars_printed - len(transcript))
146+
overwrite_chars = " " * (num_chars_printed - len(transcript))
146147

147148
if not result.is_final:
148-
sys.stdout.write(transcript + overwrite_chars + '\r')
149+
sys.stdout.write(transcript + overwrite_chars + "\r")
149150
sys.stdout.flush()
150151

151152
num_chars_printed = len(transcript)
@@ -155,8 +156,8 @@ def listen_print_loop(responses):
155156

156157
# Exit recognition if any of the transcribed phrases could be
157158
# one of our keywords.
158-
if re.search(r'\b(exit|quit)\b', transcript, re.I):
159-
print('Exiting..')
159+
if re.search(r"\b(exit|quit)\b", transcript, re.I):
160+
print("Exiting..")
160161
break
161162

162163
num_chars_printed = 0
@@ -165,28 +166,33 @@ def listen_print_loop(responses):
165166
def main():
166167
# See http://g.co/cloud/speech/docs/languages
167168
# for a list of supported languages.
168-
language_code = 'en-US' # a BCP-47 language tag
169+
language_code = "en-US" # a BCP-47 language tag
169170

170171
client = speech.SpeechClient()
171-
config = types.RecognitionConfig(
172-
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
172+
config = speech.RecognitionConfig(
173+
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
173174
sample_rate_hertz=RATE,
174-
language_code=language_code)
175-
streaming_config = types.StreamingRecognitionConfig(
176-
config=config,
177-
interim_results=True)
175+
language_code=language_code,
176+
)
177+
streaming_config = speech.StreamingRecognitionConfig(
178+
config=config, interim_results=True
179+
)
178180

179181
with MicrophoneStream(RATE, CHUNK) as stream:
180182
audio_generator = stream.generator()
181-
requests = (types.StreamingRecognizeRequest(audio_content=content)
182-
for content in audio_generator)
183+
requests = (
184+
speech.StreamingRecognizeRequest(audio_content=content)
185+
for content in audio_generator
186+
)
183187

184-
responses = client.streaming_recognize(streaming_config, requests)
188+
responses = client.streaming_recognize(
189+
requests=requests, config=streaming_config
190+
)
185191

186192
# Now, put the transcription responses to use.
187193
listen_print_loop(responses)
188194

189195

190-
if __name__ == '__main__':
196+
if __name__ == "__main__":
191197
main()
192198
# [END speech_transcribe_streaming_mic]

speech/microphone/transcribe_streaming_mic_test.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import mock
2020

21-
RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
21+
RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
2222

2323

2424
class MockPyAudio(object):
@@ -32,8 +32,9 @@ def open(self, stream_callback, rate, *args, **kwargs):
3232
self.rate = rate
3333
self.closed = threading.Event()
3434
self.stream_thread = threading.Thread(
35-
target=self.stream_audio, args=(
36-
self.audio_filename, stream_callback, self.closed))
35+
target=self.stream_audio,
36+
args=(self.audio_filename, stream_callback, self.closed),
37+
)
3738
self.stream_thread.start()
3839
return self
3940

@@ -47,23 +48,25 @@ def terminate(self):
4748
pass
4849

4950
def stream_audio(self, audio_filename, callback, closed, num_frames=512):
50-
with open(audio_filename, 'rb') as audio_file:
51+
with open(audio_filename, "rb") as audio_file:
5152
while not closed.is_set():
5253
# Approximate realtime by sleeping for the appropriate time for
5354
# the requested number of frames
5455
time.sleep(num_frames / float(self.rate))
5556
# audio is 16-bit samples, whereas python byte is 8-bit
5657
num_bytes = 2 * num_frames
57-
chunk = audio_file.read(num_bytes) or b'\0' * num_bytes
58+
chunk = audio_file.read(num_bytes) or b"\0" * num_bytes
5859
callback(chunk, None, None, None)
5960

6061

61-
@mock.patch.dict('sys.modules', pyaudio=mock.MagicMock(
62-
PyAudio=MockPyAudio(os.path.join(RESOURCES, 'quit.raw'))))
62+
@mock.patch.dict(
63+
"sys.modules",
64+
pyaudio=mock.MagicMock(PyAudio=MockPyAudio(os.path.join(RESOURCES, "quit.raw"))),
65+
)
6366
def test_main(capsys):
6467
import transcribe_streaming_mic
6568

6669
transcribe_streaming_mic.main()
6770
out, err = capsys.readouterr()
6871

69-
assert re.search(r'quit', out, re.DOTALL | re.I)
72+
assert re.search(r"quit", out, re.DOTALL | re.I)

0 commit comments

Comments
 (0)