diff --git a/app/backend/InputOutputHandler.tsx b/app/backend/InputOutputHandler.tsx index 92fa6df..d4614fd 100644 --- a/app/backend/InputOutputHandler.tsx +++ b/app/backend/InputOutputHandler.tsx @@ -2,7 +2,7 @@ import React, { use, useEffect, useRef, useState } from "react"; import ConversationFrontend from "../components/ConversationFrontend"; import InputFrontend from "../components/InputFrontend"; -import { sendToVoiceRecognition } from "./voice_backend" +import VoiceSend from "./voice_backend" import { AudioRecorder } from "./AudioRecorder"; import axios from "axios"; import { resolve } from "path"; @@ -17,53 +17,41 @@ const InputOutputBackend: React.FC = () => { content: string } - const [preferredCurrency, setPreferredCurrency] = useState(null); - const [preferredLanguage, setPreferredLanguage] = useState(null); - const [timeFormat, setTimeFormat] = useState(null); - const [preferredMeasurement, setPreferredMeasurement] = useState(null); - const [timeZone, setTimeZone] = useState(null); - const [dateFormat, setDateFormat] = useState(null); - const [messages, setMessages] = useState([]); + /* Variables for System-prompt */ + const [preferredCurrency, setPreferredCurrency] = useState("") + const [preferredLanguage, setPreferredLanguage] = useState("") + const [timeFormat, setTimeFormat] = useState("") + const [preferredMeasurement, setPreferredMeasurement] = useState("") + const [timeZone, setTimeZone] = useState("") + const [dateFormat, setDateFormat] = useState("") useEffect(() => { - setPreferredCurrency(localStorage.getItem("preferredCurrency")); - setPreferredLanguage(localStorage.getItem("preferredLanguage")); - setTimeFormat(localStorage.getItem("timeFormat")); - setPreferredMeasurement(localStorage.getItem("preferredMeasurement")); - setTimeZone(localStorage.getItem("timeZone")); - setDateFormat(localStorage.getItem("dateFormat")); - }, []); - - useEffect(() => { - if (preferredCurrency && preferredLanguage && timeFormat && dateFormat && preferredMeasurement && timeZone) { - setMessages([ - { - role: "system", - content: `You are in the timezone: ${timeZone}. - You use the time format ${timeFormat}. - You use the date format ${dateFormat} for all references of dates. - You use the ${preferredMeasurement} system. - You use the currency ${preferredCurrency}. - You will only answer in the language (you will receive the country code) ${preferredLanguage}. - But in the case the user specifically states to answer in another language, do that. Speaking in - another language is not stating you should answer in that language. - Additionally, under no circumstances translate your answer into multiple languages.`, - }, - { role: "assistant", content: "Hello! How can I help you?" }, - ]); - } - }, [preferredCurrency, preferredLanguage, timeFormat, dateFormat, preferredMeasurement, timeZone]); - - + setPreferredCurrency(localStorage.getItem("preferredCurrency")) + setPreferredLanguage(localStorage.getItem("preferredLanguage")) + setTimeFormat(localStorage.getItem("timeFormat")) + setPreferredMeasurement(localStorage.getItem("preferredMeasurement")) + setTimeZone(localStorage.getItem("timeZone")) + setDateFormat(localStorage.getItem("dateFormat")) + }, [preferredCurrency, preferredLanguage, timeFormat, preferredMeasurement, timeZone, dateFormat]) + const [copyClicked, setCopyClicked] = useState(false) const [accessToken, setAccessToken] = useState("") const postWorkerRef = useRef(null) const getWorkerRef = useRef(null) + const [messages, setMessages] = useState([{ role: "system", + content: `You are in the timezone: ${timeZone}. + You use the time format ${timeFormat}. + You use the date format ${dateFormat} for all references of dates. + You use the ${preferredMeasurement} system. You use the currency ${preferredCurrency}. + You will only answer in the language (you will receive the country code) ${preferredLanguage}. + But in the case the user specifically states to answer in an other language do that speaking in a + nother language is not stating you should answer in that language. Additionally do not translate your answer into multiple languages` + },{ role: "assistant", content: "Hello! How can I help you?" }]) const [liveMessage, setLiveMessage] = useState("") const [inputMessage, setInputMessage] = useState("") const [inputDisabled, setInputDisabled] = useState(false) const [isRecording, setIsRecording] = useState(false) - const mediaRecorderRef = useRef(null) + const mediaRecorderRef = useRef(null) const audioChunks = useRef([]) @@ -181,46 +169,40 @@ const InputOutputBackend: React.FC = () => { } } - const startRecording = async (): Promise => { - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - const mediaRecorder = new MediaRecorder(stream); - mediaRecorderRef.current = mediaRecorder; + const startRecording = async () => { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }) + const mediaRecorder = new MediaRecorder(stream) + mediaRecorderRef.current = mediaRecorder + + mediaRecorder.ondataavailable = (event) => { + audioChunks.current.push(event.data) + } + + mediaRecorder.onstop = async () => { + const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" }) + audioChunks.current = [] + // console.log(audioBlob); + // const url = URL.createObjectURL(audioBlob) + // const audio = new Audio(url); + // audio.play().catch(error => console.error("Error playing audio:", error)); - audioChunks.current = []; // Initialize audio chunks - - // Create a promise that resolves when the onstop event is done - const stopRecordingPromise = new Promise((resolve) => { - mediaRecorder.ondataavailable = (event) => { - audioChunks.current.push(event.data); - }; - - mediaRecorder.onstop = async () => { - const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" }); - audioChunks.current = []; - - const text_voice = await sendToVoiceRecognition(audioBlob); - console.log(text_voice); - resolve(text_voice); // Resolve the promise with the recognized text - }; - }); - - mediaRecorder.start(); - setIsRecording(true); - - // Wait for the recording to stop and get the recognized text - return stopRecordingPromise; - }; + const remote = new VoiceSend() + remote.sendToVoiceRecognition(audioBlob) + } + mediaRecorder.start() + setIsRecording(true) + } + const stopRecording = () => { - mediaRecorderRef.current?.stop(); - setIsRecording(false); - }; + mediaRecorderRef.current?.stop() + setIsRecording(false) + } - const handleMicClick = async () => { + + const handleMicClick = () => { if (!isRecording) { - const recognizedText = await startRecording(); - setInputMessage(recognizedText); // Set the recognized text after recording - console.log("Set!") + startRecording(); } else { stopRecording(); } @@ -264,7 +246,7 @@ const InputOutputBackend: React.FC = () => { await wait(1000) setCopyClicked(false) } - + return ( <> @@ -281,7 +263,7 @@ const InputOutputBackend: React.FC = () => { onMicClick={handleMicClick} inputDisabled={inputDisabled} isRecording={isRecording} - /> + /> ) } diff --git a/app/backend/voice_backend.ts b/app/backend/voice_backend.ts index ca8a998..3c4193b 100644 --- a/app/backend/voice_backend.ts +++ b/app/backend/voice_backend.ts @@ -1,20 +1,26 @@ import axios from "axios"; -export const sendToVoiceRecognition = (audio_data: Blob): Promise => { - console.log("sending recording..."); - const formdata = new FormData() - formdata.append("audio", audio_data) +class VoiceSend { + sendToVoiceRecognition(audio_data: Blob) { + console.log("sending recording..."); - const dataSend = { option: "offline", type: "basic", audio: audio_data } - return axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", formdata) - .then((response) => { - console.log(response.data) - return response.data.response - }) - .catch(error => { - console.log("Error calling API:", error) - postMessage({ status: 500 }) - return "Error" - }) -} \ No newline at end of file + const formdata = new FormData() + formdata.append("audio", audio_data) + + const dataSend = { option:"offline", type:"basic",audio:audio_data } + axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", formdata) + .then((response) => { + console.log(response.data) + return response.data.response + }) + .catch(error => { + console.log("Error calling API:", error) + postMessage({ status: 500 }) + }) + } + +} + + +export default VoiceSend; \ No newline at end of file diff --git a/app/components/InputFrontend.tsx b/app/components/InputFrontend.tsx index 5d51834..f02442b 100644 --- a/app/components/InputFrontend.tsx +++ b/app/components/InputFrontend.tsx @@ -6,17 +6,13 @@ interface InputProps { onSendClick: (message: string, override: boolean) => void; onMicClick: () => void; inputDisabled: boolean; - isRecording: boolean + isRecording:boolean } const InputFrontend = React.forwardRef( - ({ message, onSendClick, onMicClick, inputDisabled, isRecording }, ref: ForwardedRef) => { + ({ message, onSendClick, onMicClick, inputDisabled, isRecording}, ref: ForwardedRef) => { const [inputValue, setInputValue] = useState(''); - useEffect(() => { - setInputValue(message); - }, [message]); - const handleInputChange = (e: React.ChangeEvent) => { setInputValue(e.target.value); }; @@ -44,7 +40,7 @@ const InputFrontend = React.forwardRef( - diff --git a/py/voice.py b/py/voice.py index 8aeb3e0..24d12ac 100644 --- a/py/voice.py +++ b/py/voice.py @@ -7,19 +7,25 @@ class VoiceRecognition: @staticmethod def recognition(audio): audio_buffer = io.BytesIO(audio.read()) - - audio_segment = AudioSegment.from_file(audio_buffer, format="ogg") - - wav_io = io.BytesIO() - audio_segment.export(wav_io, format="wav") - wav_io.seek(0) - + + try: + audio_segment = AudioSegment.from_file(audio_buffer, format="ogg") + + wav_io = io.BytesIO() + audio_segment.export(wav_io, format="wav") + wav_io.seek(0) + except: + print("audio to wav failed") + model_size = "base" - model = WhisperModel(model_size, device="cpu", compute_type="int8") - + model = WhisperModel(model_size, device="cpu", compute_type=" ") + segments, _ = model.transcribe(wav_io) transcription = "" for segment in segments: transcription += segment.text + " " result = transcription.strip() + print(result) return result + +# npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt \ No newline at end of file