Skip to content

Commit 144a75f

Browse files
authored
Merge pull request zhayujie#385 from wanggang1987/google_voice
Voice support
2 parents c03706d + e87c29b commit 144a75f

File tree

12 files changed

+254
-13
lines changed

12 files changed

+254
-13
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ venv*
66
config.json
77
QR.png
88
nohup.out
9+
tmp

README.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,14 @@ cd chatgpt-on-wechat/
7171
```bash
7272
pip3 install itchat-uos==1.5.0.dev0
7373
pip3 install --upgrade openai
74+
75+
如果使用百度的语音识别,需要安装百度的pythonSDK
76+
pip3 install baidu-aip chardet
77+
如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg和espeak
78+
pip3 install SpeechRecognition
79+
--在MacOS中安装ffmpeg,brew install ffmpeg espeak
80+
--在Windows中安装ffmpeg,下载ffmpeg.exe
81+
--在Linux中安装ffmpeg,apt-get install ffmpeg espeak
7482
```
7583
注:`itchat-uos`使用指定版本1.5.0.dev0,`openai`使用最新版本,需高于0.27.0。
7684

@@ -112,7 +120,11 @@ cp config-template.json config.json
112120
+ 默认只要被人 @ 就会触发机器人自动回复;另外群聊天中只要检测到以 "@bot" 开头的内容,同样会自动回复(方便自己触发),这对应配置项 `group_chat_prefix`
113121
+ 可选配置: `group_name_keyword_white_list`配置项支持模糊匹配群名称,`group_chat_keyword`配置项则支持模糊匹配群消息内容,用法与上述两个配置项相同。(Contributed by [evolay](https://github.com/evolay))
114122

115-
**3.其他配置**
123+
**3.语音识别**
124+
+ 配置`speech_recognition=true`开启语音识别,默认使用openai的whisper模型
125+
+ 配置`voice_reply_voice=true`语音回复语音,但是需要配置对应语音合成平台的key,由于itchat协议的限制,只能发送语音mp3文件。使用wechaty则回复的是微信语音。
126+
127+
**4.其他配置**
116128

117129
+ `proxy`:由于目前 `openai` 接口国内无法访问,需配置代理客户端的地址,详情参考 [#351](https://github.com/zhayujie/chatgpt-on-wechat/issues/351)
118130
+ 对于图像生成,在满足个人或群组触发条件外,还需要额外的关键词前缀来触发,对应配置 `image_create_prefix `

bridge/bridge.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from bot import bot_factory
2+
from voice import voice_factory
23

34

45
class Bridge(object):
@@ -7,3 +8,9 @@ def __init__(self):
78

89
def fetch_reply_content(self, query, context):
910
return bot_factory.create_bot("chatGPT").reply(query, context)
11+
12+
def fetch_voice_to_text(self, voiceFile):
13+
return voice_factory.create_voice("openai").voiceToText(voiceFile)
14+
15+
def fetch_text_to_voice(self, text):
16+
return voice_factory.create_voice("baidu").textToVoice(text)

channel/channel.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def startup(self):
1111
"""
1212
raise NotImplementedError
1313

14-
def handle(self, msg):
14+
def handle_text(self, msg):
1515
"""
1616
process received msg
1717
:param msg: message object
@@ -29,3 +29,9 @@ def send(self, msg, receiver):
2929

3030
def build_reply_content(self, query, context=None):
3131
return Bridge().fetch_reply_content(query, context)
32+
33+
def build_voice_to_text(self, voice_file):
34+
return Bridge().fetch_voice_to_text(voice_file)
35+
36+
def build_text_to_voice(self, text):
37+
return Bridge().fetch_text_to_voice(text)

channel/wechat/wechat_channel.py

Lines changed: 51 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33
"""
44
wechat channel
55
"""
6+
67
import itchat
78
import json
89
from itchat.content import *
910
from channel.channel import Channel
1011
from concurrent.futures import ThreadPoolExecutor
1112
from common.log import logger
13+
from common.tmp_dir import TmpDir
1214
from config import conf
1315
import requests
1416
import io
@@ -18,7 +20,7 @@
1820

1921
@itchat.msg_register(TEXT)
2022
def handler_single_msg(msg):
21-
WechatChannel().handle(msg)
23+
WechatChannel().handle_text(msg)
2224
return None
2325

2426

@@ -28,6 +30,12 @@ def handler_group_msg(msg):
2830
return None
2931

3032

33+
@itchat.msg_register(VOICE)
34+
def handler_single_voice(msg):
35+
WechatChannel().handle_voice(msg)
36+
return None
37+
38+
3139
class WechatChannel(Channel):
3240
def __init__(self):
3341
pass
@@ -39,12 +47,27 @@ def startup(self):
3947
# start message listener
4048
itchat.run()
4149

42-
def handle(self, msg):
43-
logger.debug("[WX]receive msg: " + json.dumps(msg, ensure_ascii=False))
50+
def handle_voice(self, msg):
51+
if conf().get('speech_recognition') != True :
52+
return
53+
logger.debug("[WX]receive voice msg: " + msg['FileName'])
54+
thread_pool.submit(self._do_handle_voice, msg)
55+
56+
def _do_handle_voice(self, msg):
57+
fileName = TmpDir().path() + msg['FileName']
58+
msg.download(fileName)
59+
content = super().build_voice_to_text(fileName)
60+
self._handle_single_msg(msg, content, conf().get('voice_reply_voice'))
61+
62+
def handle_text(self, msg):
63+
logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False))
64+
content = msg['Text']
65+
self._handle_single_msg(msg, content, False)
66+
67+
def _handle_single_msg(self, msg, content, reply_voice=False):
4468
from_user_id = msg['FromUserName']
4569
to_user_id = msg['ToUserName'] # 接收人id
4670
other_user_id = msg['User']['UserName'] # 对手方id
47-
content = msg['Text']
4871
match_prefix = self.check_prefix(content, conf().get('single_chat_prefix'))
4972
if "」\n- - - - - - - - - - - - - - -" in content:
5073
logger.debug("[WX]reference query skipped")
@@ -60,9 +83,10 @@ def handle(self, msg):
6083
if img_match_prefix:
6184
content = content.split(img_match_prefix, 1)[1].strip()
6285
thread_pool.submit(self._do_send_img, content, from_user_id)
63-
else:
64-
thread_pool.submit(self._do_send, content, from_user_id)
65-
86+
elif reply_voice:
87+
thread_pool.submit(self._do_send_voice, content, from_user_id)
88+
else :
89+
thread_pool.submit(self._do_send_text, content, from_user_id)
6690
elif to_user_id == other_user_id and match_prefix:
6791
# 自己给好友发送消息
6892
str_list = content.split(match_prefix, 1)
@@ -72,8 +96,10 @@ def handle(self, msg):
7296
if img_match_prefix:
7397
content = content.split(img_match_prefix, 1)[1].strip()
7498
thread_pool.submit(self._do_send_img, content, to_user_id)
99+
elif reply_voice:
100+
thread_pool.submit(self._do_send_voice, content, to_user_id)
75101
else:
76-
thread_pool.submit(self._do_send, content, to_user_id)
102+
thread_pool.submit(self._do_send_text, content, to_user_id)
77103

78104

79105
def handle_group(self, msg):
@@ -105,10 +131,24 @@ def handle_group(self, msg):
105131
thread_pool.submit(self._do_send_group, content, msg)
106132

107133
def send(self, msg, receiver):
108-
logger.info('[WX] sendMsg={}, receiver={}'.format(msg, receiver))
109134
itchat.send(msg, toUserName=receiver)
135+
logger.info('[WX] sendMsg={}, receiver={}'.format(msg, receiver))
110136

111-
def _do_send(self, query, reply_user_id):
137+
def _do_send_voice(self, query, reply_user_id):
138+
try:
139+
if not query:
140+
return
141+
context = dict()
142+
context['from_user_id'] = reply_user_id
143+
reply_text = super().build_reply_content(query, context)
144+
if reply_text:
145+
replyFile = super().build_text_to_voice(reply_text)
146+
itchat.send_file(replyFile, toUserName=reply_user_id)
147+
logger.info('[WX] sendFile={}, receiver={}'.format(replyFile, reply_user_id))
148+
except Exception as e:
149+
logger.exception(e)
150+
151+
def _do_send_text(self, query, reply_user_id):
112152
try:
113153
if not query:
114154
return
@@ -138,8 +178,8 @@ def _do_send_img(self, query, reply_user_id):
138178
image_storage.seek(0)
139179

140180
# 图片发送
141-
logger.info('[WX] sendImage, receiver={}'.format(reply_user_id))
142181
itchat.send_image(image_storage, reply_user_id)
182+
logger.info('[WX] sendImage, receiver={}'.format(reply_user_id))
143183
except Exception as e:
144184
logger.exception(e)
145185

common/tmp_dir.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
2+
import os
3+
import pathlib
4+
from config import conf
5+
6+
7+
class TmpDir(object):
8+
"""A temporary directory that is deleted when the object is destroyed.
9+
"""
10+
11+
tmpFilePath = pathlib.Path('./tmp/')
12+
13+
def __init__(self):
14+
pathExists = os.path.exists(self.tmpFilePath)
15+
if not pathExists and conf().get('speech_recognition') == True:
16+
os.makedirs(self.tmpFilePath)
17+
18+
def path(self):
19+
return str(self.tmpFilePath) + '/'
20+

config-template.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
"group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"],
88
"image_create_prefix": ["", "", ""],
99
"conversation_max_tokens": 1000,
10+
"speech_recognition": false,
11+
"voice_reply_voice": false,
12+
"baidu_app_id": "YOUR BAIDU APP ID",
13+
"baidu_api_key": "YOUR BAIDU API KEY",
14+
"baidu_secret_key": "YOUR BAIDU SERVICE KEY",
1015
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。",
1116
"expires_in_seconds": 3600
1217
}

voice/baidu/baidu_voice.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
2+
"""
3+
baidu voice service
4+
"""
5+
import time
6+
from aip import AipSpeech
7+
from common.log import logger
8+
from common.tmp_dir import TmpDir
9+
from voice.voice import Voice
10+
from config import conf
11+
12+
class BaiduVoice(Voice):
13+
APP_ID = conf().get('baidu_app_id')
14+
API_KEY = conf().get('baidu_api_key')
15+
SECRET_KEY = conf().get('baidu_secret_key')
16+
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
17+
18+
def __init__(self):
19+
pass
20+
21+
def voiceToText(self, voice_file):
22+
pass
23+
24+
def textToVoice(self, text):
25+
result = self.client.synthesis(text, 'zh', 1, {
26+
'spd': 5, 'pit': 5, 'vol': 5, 'per': 111
27+
})
28+
if not isinstance(result, dict):
29+
fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
30+
with open(fileName, 'wb') as f:
31+
f.write(result)
32+
logger.info('[Baidu] textToVoice text={} voice file name={}'.format(text, fileName))
33+
return fileName
34+
else:
35+
logger.error('[Baidu] textToVoice error={}'.format(result))
36+
return None

voice/google/google_voice.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
2+
"""
3+
google voice service
4+
"""
5+
6+
import pathlib
7+
import subprocess
8+
import time
9+
import speech_recognition
10+
import pyttsx3
11+
from common.log import logger
12+
from common.tmp_dir import TmpDir
13+
from voice.voice import Voice
14+
15+
16+
class GoogleVoice(Voice):
17+
recognizer = speech_recognition.Recognizer()
18+
engine = pyttsx3.init()
19+
20+
def __init__(self):
21+
# 语速
22+
self.engine.setProperty('rate', 125)
23+
# 音量
24+
self.engine.setProperty('volume', 1.0)
25+
# 0为男声,1为女声
26+
voices = self.engine.getProperty('voices')
27+
self.engine.setProperty('voice', voices[1].id)
28+
29+
def voiceToText(self, voice_file):
30+
new_file = voice_file.replace('.mp3', '.wav')
31+
subprocess.call('ffmpeg -i ' + voice_file +
32+
' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True)
33+
with speech_recognition.AudioFile(new_file) as source:
34+
audio = self.recognizer.record(source)
35+
try:
36+
text = self.recognizer.recognize_google(audio, language='zh-CN')
37+
logger.info(
38+
'[Google] voiceToText text={} voice file name={}'.format(text, voice_file))
39+
return text
40+
except speech_recognition.UnknownValueError:
41+
return "抱歉,我听不懂。"
42+
except speech_recognition.RequestError as e:
43+
return "抱歉,无法连接到 Google 语音识别服务;{0}".format(e)
44+
45+
def textToVoice(self, text):
46+
textFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
47+
self.engine.save_to_file(text, textFile)
48+
self.engine.runAndWait()
49+
logger.info(
50+
'[Google] textToVoice text={} voice file name={}'.format(text, textFile))
51+
return textFile

voice/openai/openai_voice.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
2+
"""
3+
google voice service
4+
"""
5+
import json
6+
import openai
7+
from config import conf
8+
from common.log import logger
9+
from voice.voice import Voice
10+
11+
12+
class OpenaiVoice(Voice):
13+
def __init__(self):
14+
openai.api_key = conf().get('open_ai_api_key')
15+
16+
def voiceToText(self, voice_file):
17+
logger.debug(
18+
'[Openai] voice file name={}'.format(voice_file))
19+
file = open(voice_file, "rb")
20+
reply = openai.Audio.transcribe("whisper-1", file)
21+
text = reply["text"]
22+
logger.info(
23+
'[Openai] voiceToText text={} voice file name={}'.format(text, voice_file))
24+
return text
25+
26+
def textToVoice(self, text):
27+
pass

0 commit comments

Comments
 (0)