Merge pull request 'Changes' (#22) from React-Group/interstellar_ai:main into main

Reviewed-on: https://interstellardevelopment.org/code/code/sageTheDm/interstellar_ai/pulls/22
This commit is contained in:
sageTheDm 2024-09-26 13:42:44 +02:00
commit 2cac162ad6
4 changed files with 18 additions and 7 deletions

View file

@ -151,7 +151,7 @@ const InputOutputBackend: React.FC = () => {
}
mediaRecorder.onstop = () => {
const audioBlob = new Blob(audioChunks.current, { type: "audio/wav" })
const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" })
const url = URL.createObjectURL(audioBlob)
console.log(url);
setAudioURL(url)

View file

@ -4,13 +4,9 @@ import axios from "axios";
class VoiceSend {
sendToVoiceRecognition(audio_data: Blob) {
console.log("sending recording...");
console.log(typeof (audio_data));
console.log(audio_data instanceof Blob);
const formdata = new FormData()
formdata.append("audio", audio_data)
formdata.append("option", "offline")
formdata.append("type", "basic")
const dataSend = { option:"offline", type:"basic",audio:audio_data }
axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", formdata)

View file

@ -15,3 +15,4 @@ pyttsx3
pip-licenses
openai-whisper
pydub
ffmpeg

View file

@ -1,10 +1,24 @@
import io
import numpy as np
import whisper
from pydub import AudioSegment
class VoiceRecognition:
@staticmethod
def recognition(audio):
audio_data = audio.read()
with io.BytesIO(audio_data) as audio_buffer:
audio_segment = AudioSegment.from_ogg(audio_buffer)
raw_data = np.array(audio_segment.get_array_of_samples())
if audio_segment.channels > 1:
raw_data = raw_data.reshape((-1, audio_segment.channels))[:, 0]
audio_data = raw_data.astype(np.float32) / (2 ** (audio_segment.sample_width * 8 - 1))
model = whisper.load_model("base")
result = model.transcribe(audio)
result = model.transcribe(audio_data)
print(result["text"])
return result["text"]