AI语音聊天机器人APP(使用webrtc、语音识别、TTL、langchain、大语语模型、uniapp)
AI发展越来越成熟,像chatgpt可以语音聊天,还可以带眼晴的功能,所以本博文是参照chatgpt功能实现的,已实现功能,(1)语音聊天,(2)打开摄像头(视频数据已传入后台,未实现视频数据识别,后面再加)
说明:本例子APP端是使用uniapp写的一个h5页面(可以打包成APP),来模仿APP端,将APP端的语音数据、视频数据通过webrtc推流技术,推送到python后端,后端收到语音数据之后,进行语音识别转文字(使用阿里的sensevoice,本地布署),文字传给大模型(使用通信千问,本地布署),大模型推理之后的结果数据再转为语音(文字转语音使用微软的edge-tts技术,也可以使用其它的),然后将语音使用webrtc推流给APP端播放,整个流程结束
具体使用的技术如下:
uniapp:写APP端
webrtc:前后端音视频推流
fastapi:WEB框架
langchain: 集成大语言模型通义千问qwen
sensevoice:语音识别
ollama:布署qwen
qwen大模型
edge-tts:文字转语音
redis:保存用户上下文信息,用户信息记忆功能
一、先看演示效果
https://www.bilibili.com/video/BV1JUXyYYEp7?t=6.3
https://www.bilibili.com/video/BV1zUXyYYEim?t=12.3
https://www.bilibili.com/video/BV1JUXyYYEA6?t=25.8
二、环境准备
1、准备senseVoice语音识别(我是虚拟机centos7)
senseVoice对应的docker镜像在后面下载压缩包中
docker load -i sensevoice.tar docker run -d --name sensevoice --gpus all -p 7860:7860 sensevoice:1 # 有nvidia显卡 docker run -d --name sensevoice -p 7860:7860 sensevoice:1 # 没有nvdia显卡使用cpu运算
2、安装ollama环境(我是windows安装)
https://ollama.com/download/windows
直接安装后,使用cmd命令,进入window命令行,执行下载通信千问模型
ollama pull qwen2.5:0.5b
3、准备redis环境(我是虚拟机centos7)
docker pull redis:5.0.10
配置文件redis.conf
requirepass 123456 appendonly yes
docker启动(相关路径,修改为你自己的)
docker run -d --restart always --name redis \ -p 6379:6379 \ -v /home/soft/redis/redis.conf:/etc/redis/redis.conf \ -v /home/soft/redis/data:/data \ redis:5.0.10 redis-server /etc/redis/redis.conf
三、代码运行
压缩后面可下载,压缩包中有APP端代码(ai-voice-app),服务端代码(ai-voice-server)
1、运行APP端代码
使用HBuilder X软件打开,然后访问 http://localhost:8090/ ,然后谷歌浏览器调成手机模式
2、运行后端代码
(1)、使用anaconda创建python环境
conda create -n aiapp python=3.10 conda activate aiapp cd 后端路径/ai-voice-server pip install -r requirements.txt
然后pycharm配置conda环境
(2)、修改配置文件settings.py
# 当前项目路径 CURRENT_DIR = os.path.dirname(__file__) # 音频文件存储路径 STORAGE_DIR = "D:/temp/" # Redis连接配置 REDIS_URL = "redis://'':123456@127.127.0.1:6379/3" # 大模型名称 LLM_MODEL= "qwen2.5:0.5b" # senseVoice识别功能 SENSE_VOICE="http://127.127.0.1:7860/"
(3)运行main.py
四、相关源代码解释
1、前端代码
this.pc = new RTCPeerConnection(); let constraints = {audio: true,video: this.openVidoStream}; await navigator.mediaDevices.getUserMedia(constraints).then((stream) => { console.log('发送多媒体流') this.localStream = stream stream.getTracks().forEach((track) => { // console.log('trace类型', track) // const level = this.localStream.getAudioLevel(); // console.log('level', level); this.pc.addTrack(track, stream);//发送 }); }, (err) => { alert('Could not acquire media: ' + err); }); this.pc.addEventListener('track', (evt) => { if (evt.track.kind == 'video'){ if(this.videoObj==null){ const video = document.createElement('video'); video.autoplay = true; video.playsInline = true; // 对于iOS很重要,确保视频在页面内播放 document.getElementById('videoContainer').appendChild(video); this.videoObj = video } this.videoObj.srcObject = evt.streams[0]; }else{ console.log('进入到audio中***********************') if(this.audioObj==null){ const audio = document.createElement('audio'); audio.autoplay = true; audio.playsInline = true; // 对于iOS很重要,确保视频在页面内播放 document.getElementById('audioContainer').appendChild(audio); this.audioObj = audio const audioStream = evt.streams[0]; const audioTrack = audioStream.getAudioTracks()[0]; console.log('音频轨道信息',audioTrack); // 查看音频轨道信息 } this.audioObj.srcObject = evt.streams[0]; } }); // 监听数据通道 this.pc.ondatachannel = (event) => { const receiver = event.channel; receiver.onmessage = (event) => { console.log('', event.data); }; receiver.onopen = () => { console.log('Data channel is open'); }; receiver.onclose = () => { console.log('Data channel is closed'); }; }; var parameters = {"ordered": true} let dc = this.pc.createDataChannel('chat', parameters); dc.addEventListener('close', () => { clearInterval(this.dcInterval); }); dc.addEventListener('open', () => { this.dcInterval = setInterval(() => { // console.log('心跳******') let data = 'ping ' + this.currentStamp(); let obj = {"header":{"accountId":123456},"target":'TS',"method": "heartBeat","data":data} let objStr = JSON.stringify(obj) // console.log('发送数据:',objStr) dc.send(objStr); }, 1000); }); dc.addEventListener('message', (evt) => { // console.log('收到消息:',evt.data) //主要是心跳数据 }); this.dc=dc
2、后端代码:
offer = RTCSessionDescription(sdp=data.sdp, type=data.type) pc = RTCPeerConnection() pcs.add(pc) # 发送文本消息 data_channel = pc.createDataChannel("chat") # prepare local media # player = MediaPlayer(os.path.join(ROOT, "demo-instruct.wav")) video_relay = MediaRelay() dir_name = public_utils.mk_user_dir_file(request) file_name = dir_name+'.mp4' video_recorder = MediaRecorder(file=file_name) # 录视频 video_track = VideoTransformTrack() # 视频拍照 audio_track = AudioTransformTrack() # await audio_track.init_audio("test.mp3") await audio_track.init_silense() @pc.on("datachannel") def on_datachannel(channel): @channel.on("message") async def on_message(res): # print('进入了message事件********',res) await deal_data(channel, res) @pc.on("connectionstatechange") async def on_connectionstatechange(): print('connectionstatechange事件:', pc.connectionState) if pc.connectionState == "failed": await pc.close() pcs.discard(pc) elif pc.connectionState == "connected": print("连接成功") elif pc.connectionState == "closed": print("连接关闭") await video_recorder.stop() @pc.on("track") def on_track(track): if track.kind == "audio": print('进入了audio事件********', track) # pc.addTrack(player.audio) audio_track.set_frame(track) pc.addTrack(audio_track) elif track.kind == "video": print('进入了video事件********', track) video_track.set_frame(video_relay.subscribe(track)) pc.addTrack(video_track) video_recorder.addTrack(video_relay.subscribe(track)) @track.on("ended") async def on_ended(): print('进入了ended事件********') pcs.discard(pc) await pc.close() await video_recorder.stop()
五、代码下载,因文件过大,所以放在网盘
百度网盘 请输入提取码