Merge pull request 'main' (#54) from YasinOnm08/interstellar_ai:main into main
Reviewed-on: https://interstellardevelopment.org/code/code/React-Group/interstellar_ai/pulls/54
This commit is contained in:
commit
fcc12ccb3e
4 changed files with 18 additions and 7 deletions
|
@ -151,7 +151,7 @@ const InputOutputBackend: React.FC = () => {
|
||||||
}
|
}
|
||||||
|
|
||||||
mediaRecorder.onstop = () => {
|
mediaRecorder.onstop = () => {
|
||||||
const audioBlob = new Blob(audioChunks.current, { type: "audio/wav" })
|
const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" })
|
||||||
const url = URL.createObjectURL(audioBlob)
|
const url = URL.createObjectURL(audioBlob)
|
||||||
console.log(url);
|
console.log(url);
|
||||||
setAudioURL(url)
|
setAudioURL(url)
|
||||||
|
|
|
@ -4,13 +4,9 @@ import axios from "axios";
|
||||||
class VoiceSend {
|
class VoiceSend {
|
||||||
sendToVoiceRecognition(audio_data: Blob) {
|
sendToVoiceRecognition(audio_data: Blob) {
|
||||||
console.log("sending recording...");
|
console.log("sending recording...");
|
||||||
console.log(typeof (audio_data));
|
|
||||||
console.log(audio_data instanceof Blob);
|
|
||||||
|
|
||||||
const formdata = new FormData()
|
const formdata = new FormData()
|
||||||
formdata.append("audio", audio_data)
|
formdata.append("audio", audio_data)
|
||||||
formdata.append("option", "offline")
|
|
||||||
formdata.append("type", "basic")
|
|
||||||
|
|
||||||
const dataSend = { option:"offline", type:"basic",audio:audio_data }
|
const dataSend = { option:"offline", type:"basic",audio:audio_data }
|
||||||
axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", formdata)
|
axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", formdata)
|
||||||
|
|
|
@ -14,4 +14,5 @@ pycouchdb
|
||||||
pyttsx3
|
pyttsx3
|
||||||
pip-licenses
|
pip-licenses
|
||||||
openai-whisper
|
openai-whisper
|
||||||
pydub
|
pydub
|
||||||
|
ffmpeg
|
16
py/voice.py
16
py/voice.py
|
@ -1,10 +1,24 @@
|
||||||
|
import io
|
||||||
|
import numpy as np
|
||||||
import whisper
|
import whisper
|
||||||
|
from pydub import AudioSegment
|
||||||
|
|
||||||
|
|
||||||
class VoiceRecognition:
|
class VoiceRecognition:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def recognition(audio):
|
def recognition(audio):
|
||||||
|
audio_data = audio.read()
|
||||||
|
with io.BytesIO(audio_data) as audio_buffer:
|
||||||
|
audio_segment = AudioSegment.from_ogg(audio_buffer)
|
||||||
|
|
||||||
|
raw_data = np.array(audio_segment.get_array_of_samples())
|
||||||
|
|
||||||
|
if audio_segment.channels > 1:
|
||||||
|
raw_data = raw_data.reshape((-1, audio_segment.channels))[:, 0]
|
||||||
|
|
||||||
|
audio_data = raw_data.astype(np.float32) / (2 ** (audio_segment.sample_width * 8 - 1))
|
||||||
|
|
||||||
model = whisper.load_model("base")
|
model = whisper.load_model("base")
|
||||||
result = model.transcribe(audio)
|
result = model.transcribe(audio_data)
|
||||||
print(result["text"])
|
print(result["text"])
|
||||||
return result["text"]
|
return result["text"]
|
||||||
|
|
Loading…
Reference in a new issue