voice recognition kinda works??

This commit is contained in:
YasinOnm08 2024-09-27 13:59:27 +02:00
parent 233a173697
commit 8090ce969e
3 changed files with 30 additions and 31 deletions

View file

@ -167,16 +167,14 @@ const InputOutputBackend: React.FC = () => {
mediaRecorder.onstop = async () => { mediaRecorder.onstop = async () => {
const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" }) const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" })
console.log(audioBlob);
const url = URL.createObjectURL(audioBlob)
const audio = new Audio(url);
audio.play().catch(error => console.error("Error playing audio:", error));
console.log(url);
setAudioURL(url)
audioChunks.current = [] audioChunks.current = []
const wavBlob = await convertOggToWav(audioBlob) // console.log(audioBlob);
// const url = URL.createObjectURL(audioBlob)
// const audio = new Audio(url);
// audio.play().catch(error => console.error("Error playing audio:", error));
const remote = new VoiceSend() const remote = new VoiceSend()
remote.sendToVoiceRecognition(wavBlob) remote.sendToVoiceRecognition(audioBlob)
} }
mediaRecorder.start() mediaRecorder.start()
@ -276,14 +274,9 @@ const InputOutputBackend: React.FC = () => {
onMicClick={handleMicClick} onMicClick={handleMicClick}
inputDisabled={inputDisabled} inputDisabled={inputDisabled}
isRecording={isRecording} isRecording={isRecording}
/> />
</div> </div>
) )
} }
export default InputOutputBackend export default InputOutputBackend

View file

@ -1,4 +1,4 @@
from flask i mport Flask, request, jsonify from flask import Flask, request, jsonify
from flask_cors import CORS from flask_cors import CORS
import secrets import secrets
import threading import threading

View file

@ -1,26 +1,32 @@
import io import io
import numpy as np import numpy as np
import whisper from faster_whisper import WhisperModel
from pydub import AudioSegment from pydub import AudioSegment
class VoiceRecognition: class VoiceRecognition:
@staticmethod @staticmethod
def recognition(audio): def recognition(audio):
audio_data = audio.read() audio_buffer = io.BytesIO(audio.read())
with io.BytesIO(audio_data) as audio_buffer:
audio_segment = AudioSegment.from_ogg(audio_buffer)
raw_data = np.array(audio_segment.get_array_of_samples())
if audio_segment.channels > 1:
raw_data = raw_data.reshape((-1, audio_segment.channels))[:, 0]
audio_data = raw_data.astype(np.float32) / (2 ** (audio_segment.sample_width * 8 - 1))
model = whisper.load_model("base") try:
result = model.transcribe(audio_data) audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
print(result["text"])
return result["text"] wav_io = io.BytesIO()
audio_segment.export(wav_io, format="wav")
wav_io.seek(0)
except:
print("audio to wav failed")
model_size = "base"
model = WhisperModel(model_size, device="cpu", compute_type="int8")
segments, _ = model.transcribe(wav_io)
transcription = ""
for segment in segments:
transcription += segment.text + " "
result = transcription.strip()
print(result)
return result
# npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt # npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt