From 16177ccaccbcc964119dc37c038851788b86f78a Mon Sep 17 00:00:00 2001 From: Patrick_Pluto Date: Wed, 25 Sep 2024 12:33:52 +0200 Subject: [PATCH 01/11] voice recorder (WIP) --- app/backend/InputOutputHandler.tsx | 48 ++++++++++++++++++++++++++++-- app/backend/voice_backend.ts | 28 +++++++++++++++++ 2 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 app/backend/voice_backend.ts diff --git a/app/backend/InputOutputHandler.tsx b/app/backend/InputOutputHandler.tsx index 02a227b..3b00707 100644 --- a/app/backend/InputOutputHandler.tsx +++ b/app/backend/InputOutputHandler.tsx @@ -2,9 +2,11 @@ import React, { useEffect, useRef, useState } from "react"; import ConversationFrontend from "../components/ConversationFrontend"; import InputFrontend from "../components/InputFrontend"; +import VoiceSend from "./voice_backend" import axios from "axios"; import { skip } from "node:test"; + const InputOutputBackend: React.FC = () => { type Message = { role: string @@ -19,6 +21,10 @@ const InputOutputBackend: React.FC = () => { const [inputMessage, setInputMessage] = useState("") const [inputDisabled, setInputDisabled] = useState(false) const [lastMessage, setLastMessage] = useState({ role: "user", content: "Not supposed to happen." }) + const [isRecording, setIsRecording] = useState(false); + const mediaRecorderRef = useRef(null); + const audioChunksRef = useRef([]); + console.log(messages); @@ -136,9 +142,47 @@ const InputOutputBackend: React.FC = () => { } } + const startRecording = async () => { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + mediaRecorderRef.current = new MediaRecorder(stream); + + mediaRecorderRef.current.ondataavailable = (event) => { + audioChunksRef.current.push(event.data); + }; + + mediaRecorderRef.current.onstop = () => { + const audioBlob = new Blob(audioChunksRef.current, { type: 'audio/wav' }); + audioChunksRef.current = []; // Clear the chunks for the next recording + // Call your existing function to send the audioBlob + // Example: sendAudioToApi(audioBlob); + }; + + mediaRecorderRef.current.start(); + setIsRecording(true); + + // Automatically stop recording after 10 seconds + setTimeout(() => { + stopRecording(); + }, 10000); + }; + + const stopRecording = () => { + if (mediaRecorderRef.current) { + mediaRecorderRef.current.stop(); + setIsRecording(false); + var remote = new VoiceSend() + remote.sendToVoiceRecognition(new Blob(audioChunksRef.current, { type: 'audio/wav' }), remote.voiceDataTemplate); + } + }; + + const handleMicClick = () => { - // do stuff - } + if (!isRecording) { + startRecording(); + } else { + stopRecording(); + } + }; const handleResendClick = () => { var temporary_message = messages[messages.length - 2]['content'] diff --git a/app/backend/voice_backend.ts b/app/backend/voice_backend.ts new file mode 100644 index 0000000..650afeb --- /dev/null +++ b/app/backend/voice_backend.ts @@ -0,0 +1,28 @@ +import axios from "axios"; + +class VoiceSend { + + voiceDataTemplate = { + type: "basic", + audio_data: null, + option: "offline" + } + + sendToVoiceRecognition(audio_data: Blob, data: any) { + var dataSend = data + dataSend['audio_data'] = audio_data + axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", dataSend) + .then((response: any) => { + console.log(response['response']) + return response['response'] + }) + .catch(error => { + console.log("Error calling API:", error) + postMessage({ status: 500 }) + }) + } + +} + + +export default VoiceSend; \ No newline at end of file From 42b12f73b4aec70fe0df5e8a97299dede262ce6e Mon Sep 17 00:00:00 2001 From: Patrick_Pluto Date: Wed, 25 Sep 2024 16:34:02 +0200 Subject: [PATCH 02/11] TTS --- py/api.py | 7 +++++++ py/requirements.txt | 4 +++- py/tts.py | 10 ++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 py/tts.py diff --git a/py/api.py b/py/api.py index 01e99b1..5fba195 100644 --- a/py/api.py +++ b/py/api.py @@ -6,6 +6,7 @@ from ai import AI from db import DB from weather import Weather from voice import VoiceRecognition +from tts import TTS class API: @@ -17,6 +18,7 @@ class API: self.db = DB() self.weather = Weather() self.voice = VoiceRecognition() + self.tts = TTS() self.db.load_database() self.ai_response_lock = threading.Lock() CORS(self.app) @@ -113,6 +115,11 @@ class API: self.app.run(debug=True, host='0.0.0.0', port=5000) + @self.app.route('/interstellar_ai/api/tts', methods=['POST']) + def tts(): + text = request.args.get('text') + return jsonify({'status': 200, 'response': self.tts.gen_tts(text)}) + api = API() api.run() diff --git a/py/requirements.txt b/py/requirements.txt index 6cd3616..47523c5 100644 --- a/py/requirements.txt +++ b/py/requirements.txt @@ -10,4 +10,6 @@ PocketSphinx google-cloud-speech google-generativeai python-weather -pycouchdb \ No newline at end of file +pycouchdb +pyttsx3 +pip-licenses \ No newline at end of file diff --git a/py/tts.py b/py/tts.py new file mode 100644 index 0000000..93f7fa4 --- /dev/null +++ b/py/tts.py @@ -0,0 +1,10 @@ +import pyttsx3 + + +class TTS: + @staticmethod + def gen_tts(text): + engine = pyttsx3.init() + engine.setProperty('rate', 70) + engine.say(text) + engine.runAndWait() From 6d03e1fafcd0feff0bc1f0e2123eaca9f27410a7 Mon Sep 17 00:00:00 2001 From: Patrick_Pluto Date: Thu, 26 Sep 2024 08:33:11 +0200 Subject: [PATCH 03/11] fixed stuff? --- py/api.py | 2 +- py/voice.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/py/api.py b/py/api.py index 5fba195..1c4e089 100644 --- a/py/api.py +++ b/py/api.py @@ -100,7 +100,7 @@ class API: @self.app.route('/interstellar_ai/api/voice_recognition', methods=['POST']) def voice_recognition(): recognition_type = request.args.get('type') - audio = request.args.get('audio_data') + audio = request.files['audio'] option = request.args.get('option') if recognition_type == "basic": return jsonify({'status': 200, 'response': self.voice.basic_recognition(audio, option)}) diff --git a/py/voice.py b/py/voice.py index 7ead0a5..d589eab 100644 --- a/py/voice.py +++ b/py/voice.py @@ -6,8 +6,10 @@ class VoiceRecognition: def basic_recognition(audio, option): r = sr.Recognizer() if option == "online": - return r.recognize_google_cloud(audio) + with audio as source: + return r.recognize_google_cloud(source) elif option == "offline": - return r.recognize_sphinx(audio) + with audio as source: + return r.recognize_sphinx(source) return False From 0d84454a1716d71368c4e790d0a72f683b718717 Mon Sep 17 00:00:00 2001 From: YasinOnm08 Date: Thu, 26 Sep 2024 08:57:28 +0200 Subject: [PATCH 04/11] voice recognition trial 1 --- app/backend/AudioRecorder(not yet).tsx | 39 --------------- app/backend/AudioRecorder.ts | 34 +++++++++++++ app/backend/InputOutputHandler.tsx | 68 ++++++++++++-------------- app/backend/voice_backend.ts | 14 ++---- app/components/InputFrontend.tsx | 7 +-- 5 files changed, 74 insertions(+), 88 deletions(-) delete mode 100644 app/backend/AudioRecorder(not yet).tsx create mode 100644 app/backend/AudioRecorder.ts diff --git a/app/backend/AudioRecorder(not yet).tsx b/app/backend/AudioRecorder(not yet).tsx deleted file mode 100644 index af6a941..0000000 --- a/app/backend/AudioRecorder(not yet).tsx +++ /dev/null @@ -1,39 +0,0 @@ -// import React, { useState, useRef } from 'react' - -// const AudioRecorder: React.FC = () => { -// const [isRecording, setIsRecording] = useState(false) -// const [audioURL, setAudioURL] = useState(null) -// const medaRecorderRef = useRef(null) -// const audioChunks = useRef([]) - -// const startRecording = async () => { -// const stream = await navigator.mediaDevices.getUserMedia({ audio: true }) -// const mediaRecorder = new MediaRecorder(stream) -// medaRecorderRef.current = mediaRecorder - -// mediaRecorder.ondataavailable = (event) => { -// audioChunks.current.push(event.data) -// } - -// mediaRecorder.onstop = () => { -// const audioBlob = new Blob(audioChunks.current, { type: "audio/wav" }) -// const url = URL.createObjectURL(audioBlob) -// setAudioURL(url) -// audioChunks.current = [] -// } - -// mediaRecorder.start() -// setIsRecording(true) - -// const stopRecording = () => { -// medaRecorderRef.current?.stop() -// setIsRecording(false) -// } - -// return ( -//
-// ) -// } -// } - -// export default AudioRecorder \ No newline at end of file diff --git a/app/backend/AudioRecorder.ts b/app/backend/AudioRecorder.ts new file mode 100644 index 0000000..459674e --- /dev/null +++ b/app/backend/AudioRecorder.ts @@ -0,0 +1,34 @@ +import React, { useState, useRef } from 'react' + + export const AudioRecorder= () => { + const [isRecording, setIsRecording] = useState(false) + const [audioURL, setAudioURL] = useState(null) + const mediaRecorderRef = useRef(null) + const audioChunks = useRef([]) + + const startRecording = async () => { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }) + const mediaRecorder = new MediaRecorder(stream) + mediaRecorderRef.current = mediaRecorder + + mediaRecorder.ondataavailable = (event) => { + audioChunks.current.push(event.data) + } + + mediaRecorder.onstop = () => { + const audioBlob = new Blob(audioChunks.current, { type: "audio/wav" }) + const url = URL.createObjectURL(audioBlob) + setAudioURL(url) + audioChunks.current = [] + } + + mediaRecorder.start() + setIsRecording(true) + + } + + const stopRecording = () => { + mediaRecorderRef.current?.stop() + setIsRecording(false) + } + } \ No newline at end of file diff --git a/app/backend/InputOutputHandler.tsx b/app/backend/InputOutputHandler.tsx index 3b00707..36b3fac 100644 --- a/app/backend/InputOutputHandler.tsx +++ b/app/backend/InputOutputHandler.tsx @@ -3,8 +3,8 @@ import React, { useEffect, useRef, useState } from "react"; import ConversationFrontend from "../components/ConversationFrontend"; import InputFrontend from "../components/InputFrontend"; import VoiceSend from "./voice_backend" +import { AudioRecorder } from "./AudioRecorder"; import axios from "axios"; -import { skip } from "node:test"; const InputOutputBackend: React.FC = () => { @@ -20,10 +20,10 @@ const InputOutputBackend: React.FC = () => { const [liveMessage, setLiveMessage] = useState("") const [inputMessage, setInputMessage] = useState("") const [inputDisabled, setInputDisabled] = useState(false) - const [lastMessage, setLastMessage] = useState({ role: "user", content: "Not supposed to happen." }) - const [isRecording, setIsRecording] = useState(false); - const mediaRecorderRef = useRef(null); - const audioChunksRef = useRef([]); + const [isRecording, setIsRecording] = useState(false) + const [audioURL, setAudioURL] = useState(null) + const mediaRecorderRef = useRef(null) + const audioChunks = useRef([]) console.log(messages); @@ -129,7 +129,6 @@ const InputOutputBackend: React.FC = () => { } const handleSendClick = (inputValue: string, override: boolean) => { if (inputValue != "") { - console.log(inputDisabled) if (!inputDisabled || override) { setInputDisabled(true) if (postWorkerRef.current) { @@ -143,37 +142,33 @@ const InputOutputBackend: React.FC = () => { } const startRecording = async () => { - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - mediaRecorderRef.current = new MediaRecorder(stream); - - mediaRecorderRef.current.ondataavailable = (event) => { - audioChunksRef.current.push(event.data); - }; - - mediaRecorderRef.current.onstop = () => { - const audioBlob = new Blob(audioChunksRef.current, { type: 'audio/wav' }); - audioChunksRef.current = []; // Clear the chunks for the next recording - // Call your existing function to send the audioBlob - // Example: sendAudioToApi(audioBlob); - }; - - mediaRecorderRef.current.start(); - setIsRecording(true); - - // Automatically stop recording after 10 seconds - setTimeout(() => { - stopRecording(); - }, 10000); - }; - - const stopRecording = () => { - if (mediaRecorderRef.current) { - mediaRecorderRef.current.stop(); - setIsRecording(false); - var remote = new VoiceSend() - remote.sendToVoiceRecognition(new Blob(audioChunksRef.current, { type: 'audio/wav' }), remote.voiceDataTemplate); + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }) + const mediaRecorder = new MediaRecorder(stream) + mediaRecorderRef.current = mediaRecorder + + mediaRecorder.ondataavailable = (event) => { + audioChunks.current.push(event.data) + } + + mediaRecorder.onstop = () => { + const audioBlob = new Blob(audioChunks.current, { type: "audio/wav" }) + const url = URL.createObjectURL(audioBlob) + console.log(url); + setAudioURL(url) + audioChunks.current = [] + const remote = new VoiceSend() + remote.sendToVoiceRecognition(audioBlob,) + } + + mediaRecorder.start() + setIsRecording(true) + + } + + const stopRecording = () => { + mediaRecorderRef.current?.stop() + setIsRecording(false) } - }; const handleMicClick = () => { @@ -224,6 +219,7 @@ const InputOutputBackend: React.FC = () => { onSendClick={handleSendClick} onMicClick={handleMicClick} inputDisabled={inputDisabled} + isRecording={isRecording} /> ) diff --git a/app/backend/voice_backend.ts b/app/backend/voice_backend.ts index 650afeb..f809a5c 100644 --- a/app/backend/voice_backend.ts +++ b/app/backend/voice_backend.ts @@ -1,16 +1,10 @@ import axios from "axios"; class VoiceSend { - - voiceDataTemplate = { - type: "basic", - audio_data: null, - option: "offline" - } - - sendToVoiceRecognition(audio_data: Blob, data: any) { - var dataSend = data - dataSend['audio_data'] = audio_data + sendToVoiceRecognition(audio_data: Blob) { + console.log("sending recording..."); + + const dataSend = { audio_data } axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", dataSend) .then((response: any) => { console.log(response['response']) diff --git a/app/components/InputFrontend.tsx b/app/components/InputFrontend.tsx index e50e916..ce8ac49 100644 --- a/app/components/InputFrontend.tsx +++ b/app/components/InputFrontend.tsx @@ -4,11 +4,12 @@ interface InputProps { message: string; onSendClick: (message: string, override: boolean) => void; onMicClick: () => void; - inputDisabled: boolean + inputDisabled: boolean; + isRecording:boolean } const InputFrontend = React.forwardRef( - ({ message, onSendClick, onMicClick, inputDisabled }, ref: ForwardedRef) => { + ({ message, onSendClick, onMicClick, inputDisabled, isRecording}, ref: ForwardedRef) => { const [inputValue, setInputValue] = useState(''); useEffect(() => { @@ -42,7 +43,7 @@ const InputFrontend = React.forwardRef( - From 128cc37cf5aa2026366dc3e2a11adccf855a038e Mon Sep 17 00:00:00 2001 From: Patrick_Pluto Date: Thu, 26 Sep 2024 09:27:52 +0200 Subject: [PATCH 05/11] fixed stuff? --- py/api.py | 2 +- py/requirements.txt | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/py/api.py b/py/api.py index 1c4e089..404f41d 100644 --- a/py/api.py +++ b/py/api.py @@ -100,7 +100,7 @@ class API: @self.app.route('/interstellar_ai/api/voice_recognition', methods=['POST']) def voice_recognition(): recognition_type = request.args.get('type') - audio = request.files['audio'] + audio = request.args.get('audio') option = request.args.get('option') if recognition_type == "basic": return jsonify({'status': 200, 'response': self.voice.basic_recognition(audio, option)}) diff --git a/py/requirements.txt b/py/requirements.txt index 47523c5..8c6a016 100644 --- a/py/requirements.txt +++ b/py/requirements.txt @@ -12,4 +12,5 @@ google-generativeai python-weather pycouchdb pyttsx3 -pip-licenses \ No newline at end of file +pip-licenses +openai-whisper \ No newline at end of file From e23da7a57992293f94ddfb44aec6ce6eded523ef Mon Sep 17 00:00:00 2001 From: YasinOnm08 Date: Thu, 26 Sep 2024 09:28:18 +0200 Subject: [PATCH 06/11] voice recognition trial 2 --- app/backend/voice_backend.ts | 2 +- app/components/InputFrontend.tsx | 7 ++++++- app/styles/input.css | 16 ++++++++++++++++ app/styles/variables.css | 1 + 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/app/backend/voice_backend.ts b/app/backend/voice_backend.ts index f809a5c..f5cc97f 100644 --- a/app/backend/voice_backend.ts +++ b/app/backend/voice_backend.ts @@ -4,7 +4,7 @@ class VoiceSend { sendToVoiceRecognition(audio_data: Blob) { console.log("sending recording..."); - const dataSend = { audio_data } + const dataSend = { audio:audio_data, option:"offline", recognition_type: "basic" } axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", dataSend) .then((response: any) => { console.log(response['response']) diff --git a/app/components/InputFrontend.tsx b/app/components/InputFrontend.tsx index ce8ac49..c84124b 100644 --- a/app/components/InputFrontend.tsx +++ b/app/components/InputFrontend.tsx @@ -1,4 +1,5 @@ import React, { useState, ForwardedRef, useEffect } from 'react'; +import "../styles/variables.css" interface InputProps { message: string; @@ -30,6 +31,10 @@ const InputFrontend = React.forwardRef( } }; + const styles = { + + } + return (
( -
diff --git a/app/styles/input.css b/app/styles/input.css index ceeb0b3..ffbffeb 100644 --- a/app/styles/input.css +++ b/app/styles/input.css @@ -59,3 +59,19 @@ background-color: var(--input-button-hover-color); box-shadow: 0 6px 15px rgba(0, 0, 0, 0.2); } + +.microphone-button.red{ + background-color: var(--close-button-color); +} + +.microphone-button.green{ + background-color: var(--button-background-color); +} + +.microphone-button.red:hover{ + background-color: var(--close-button-hover-color); +} + +.microphone-button.green:hover{ + background-color: var(--input-button-hover-color); +} \ No newline at end of file diff --git a/app/styles/variables.css b/app/styles/variables.css index 72c464a..795b4fc 100644 --- a/app/styles/variables.css +++ b/app/styles/variables.css @@ -19,6 +19,7 @@ --conversation-background-color: #79832e; /* Background color for conversation container */ --doc-background-color: #ffffff; /* Background color for documents */ --close-button-color: red; + --close-button-hover-color: #9e0101; /*NEW*/ --burger-menu-background-color: #79832e; /*NEW*/ --overlay-text-color:white; /*NEW*/ From 92d6688d64f6d1f61c4eda0091aac74f239f9435 Mon Sep 17 00:00:00 2001 From: Patrick_Pluto Date: Thu, 26 Sep 2024 09:32:14 +0200 Subject: [PATCH 07/11] super cool fix --- py/api.py | 3 ++- py/voice.py | 12 ++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/py/api.py b/py/api.py index 404f41d..52fe922 100644 --- a/py/api.py +++ b/py/api.py @@ -103,7 +103,8 @@ class API: audio = request.args.get('audio') option = request.args.get('option') if recognition_type == "basic": - return jsonify({'status': 200, 'response': self.voice.basic_recognition(audio, option)}) + text = self.voice.basic_recognition(audio, option) + return jsonify({'status': 200, 'response': text}) return jsonify({'status': 401, 'response': "Invalid type"}) diff --git a/py/voice.py b/py/voice.py index d589eab..ddf2a6d 100644 --- a/py/voice.py +++ b/py/voice.py @@ -4,12 +4,20 @@ import speech_recognition as sr class VoiceRecognition: @staticmethod def basic_recognition(audio, option): + print("preparing") r = sr.Recognizer() if option == "online": + print("online") with audio as source: - return r.recognize_google_cloud(source) + text = r.recognize_google_cloud(source) + print("recognized as: " + text) + return text elif option == "offline": + print("offline") with audio as source: - return r.recognize_sphinx(source) + text = r.recognize_sphinx(source) + print("recognized as: " + text) + return text + print("nothing") return False From 787c158be1ceaddbac0716b8dae308f71f721378 Mon Sep 17 00:00:00 2001 From: Patrick_Pluto Date: Thu, 26 Sep 2024 09:36:28 +0200 Subject: [PATCH 08/11] super cool fix 2 --- py/api.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/py/api.py b/py/api.py index 52fe922..e152cdc 100644 --- a/py/api.py +++ b/py/api.py @@ -99,14 +99,14 @@ class API: @self.app.route('/interstellar_ai/api/voice_recognition', methods=['POST']) def voice_recognition(): - recognition_type = request.args.get('type') + type = request.args.get('type') audio = request.args.get('audio') option = request.args.get('option') - if recognition_type == "basic": + if type == "basic": text = self.voice.basic_recognition(audio, option) return jsonify({'status': 200, 'response': text}) - - return jsonify({'status': 401, 'response': "Invalid type"}) + else: + return jsonify({'status': 401, 'response': "Invalid type"}) @self.app.route('/interstellar_ai/api/weather', methods=['POST']) def get_weather(): From 2517307ffcf738720bfd7aa2da09f0a87b0695ae Mon Sep 17 00:00:00 2001 From: YasinOnm08 Date: Thu, 26 Sep 2024 09:37:33 +0200 Subject: [PATCH 09/11] voice recognition trial 3 --- app/backend/voice_backend.ts | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/app/backend/voice_backend.ts b/app/backend/voice_backend.ts index f5cc97f..d2466c2 100644 --- a/app/backend/voice_backend.ts +++ b/app/backend/voice_backend.ts @@ -1,14 +1,19 @@ import axios from "axios"; + class VoiceSend { sendToVoiceRecognition(audio_data: Blob) { console.log("sending recording..."); + console.log(typeof (audio_data)); + console.log(audio_data instanceof Blob); - const dataSend = { audio:audio_data, option:"offline", recognition_type: "basic" } + + + const dataSend = { option:"offline", type: "basic",audio:audio_data } axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", dataSend) - .then((response: any) => { - console.log(response['response']) - return response['response'] + .then((response) => { + console.log(response.data) + return response.data.response }) .catch(error => { console.log("Error calling API:", error) From ea3fd09ea2107c2636c3f1f87d685c8586ab5a84 Mon Sep 17 00:00:00 2001 From: YasinOnm08 Date: Thu, 26 Sep 2024 09:46:32 +0200 Subject: [PATCH 10/11] voice recognition trial 4 --- app/backend/voice_backend.ts | 13 ++++++++----- py/api.py | 10 ++++++---- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/app/backend/voice_backend.ts b/app/backend/voice_backend.ts index d2466c2..a93fd89 100644 --- a/app/backend/voice_backend.ts +++ b/app/backend/voice_backend.ts @@ -6,11 +6,14 @@ class VoiceSend { console.log("sending recording..."); console.log(typeof (audio_data)); console.log(audio_data instanceof Blob); - - - - const dataSend = { option:"offline", type: "basic",audio:audio_data } - axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", dataSend) + + const formdata = new FormData() + formdata.append("audio", audio_data) + formdata.append("option", "offline") + formdata.append("type", "basic") + + const dataSend = { option:"offline", type:"basic",audio:audio_data } + axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", formdata) .then((response) => { console.log(response.data) return response.data.response diff --git a/py/api.py b/py/api.py index e152cdc..0951717 100644 --- a/py/api.py +++ b/py/api.py @@ -99,10 +99,12 @@ class API: @self.app.route('/interstellar_ai/api/voice_recognition', methods=['POST']) def voice_recognition(): - type = request.args.get('type') - audio = request.args.get('audio') - option = request.args.get('option') - if type == "basic": + print(request.args) + recog_type = request.form.get('type') + print(recog_type) + audio = request.files.get('audio') + option = request.form.get('option') + if recog_type == "basic": text = self.voice.basic_recognition(audio, option) return jsonify({'status': 200, 'response': text}) else: From f12014833cae60fa56dde22b1257a0179bb28e34 Mon Sep 17 00:00:00 2001 From: Patrick_Pluto Date: Thu, 26 Sep 2024 09:52:31 +0200 Subject: [PATCH 11/11] backend audio fix woo --- py/voice.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/py/voice.py b/py/voice.py index ddf2a6d..ec6c407 100644 --- a/py/voice.py +++ b/py/voice.py @@ -4,17 +4,18 @@ import speech_recognition as sr class VoiceRecognition: @staticmethod def basic_recognition(audio, option): + print(type(audio)) print("preparing") r = sr.Recognizer() if option == "online": - print("online") - with audio as source: + with sr.AudioFile(audio) as source: + print("online") text = r.recognize_google_cloud(source) print("recognized as: " + text) return text elif option == "offline": - print("offline") - with audio as source: + with sr.AudioFile(audio) as source: + print("offline") text = r.recognize_sphinx(source) print("recognized as: " + text) return text