diff --git a/app/backend/InputOutputHandler.tsx b/app/backend/InputOutputHandler.tsx index 83c998a..1316901 100644 --- a/app/backend/InputOutputHandler.tsx +++ b/app/backend/InputOutputHandler.tsx @@ -1,13 +1,11 @@ "use client" -import React, { use, useEffect, useRef, useState } from "react"; +import React, { useEffect, useRef, useState } from "react"; import ConversationFrontend from "../components/ConversationFrontend"; import InputFrontend from "../components/InputFrontend"; import VoiceSend from "./voice_backend" import { AudioRecorder } from "./AudioRecorder"; import axios from "axios"; import { resolve } from "path"; -import { FFmpeg } from "@ffmpeg/ffmpeg"; -import { fetchFile, toBlobURL } from "@ffmpeg/util" const InputOutputBackend: React.FC = () => { @@ -127,17 +125,6 @@ const InputOutputBackend: React.FC = () => { }); }; - /* Variables for System-prompt */ - const [preferredCurrency, setPreferredCurrency] = useState(localStorage.getItem("preferredCurrency") || "") - const [preferredLanguage, setPreferredLanguage] = useState(localStorage.getItem("preferredLanguage") || "") - const [timeFormat, setTimeFormat] = useState(localStorage.getItem("timeFormat") || "") - const [preferredMeasurement, setPreferredMeasurement] = useState(localStorage.getItem("preferredMeasurement") || "") - const [timeZone, setTimeZone] = useState(localStorage.getItem("timeZone") || "") - const [dateFormat, setDateFormat] = useState(localStorage.getItem("dateFormat") || "") - - useEffect(() => { - - },[preferredCurrency, preferredLanguage, timeFormat, preferredMeasurement, timeZone, dateFormat]) const addMessage = (role: string, content: string) => { setMessages(previous => [...previous, { role, content }]) @@ -165,50 +152,25 @@ const InputOutputBackend: React.FC = () => { audioChunks.current.push(event.data) } - mediaRecorder.onstop = async () => { + mediaRecorder.onstop = () => { const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" }) + const url = URL.createObjectURL(audioBlob) + console.log(url); + setAudioURL(url) audioChunks.current = [] - // console.log(audioBlob); - // const url = URL.createObjectURL(audioBlob) - // const audio = new Audio(url); - // audio.play().catch(error => console.error("Error playing audio:", error)); - const remote = new VoiceSend() - remote.sendToVoiceRecognition(audioBlob) + remote.sendToVoiceRecognition(audioBlob,) } mediaRecorder.start() setIsRecording(true) - } - - const ffmpegRef = useRef(null) - const audioRef = useRef("") - - const loadFFmpeg = async () => { - if (!ffmpegRef.current) { - ffmpegRef.current = new FFmpeg() - await ffmpegRef.current.load() + } - } - - const convertOggToWav = async (oggFile: File | Blob) => { - await loadFFmpeg() - - const ffmpeg = ffmpegRef.current! - - await ffmpeg.writeFile("input.ogg", await fetchFile(oggFile)) - await ffmpeg.exec(["-i", "input.ogg", "output.wav"]) - const wavData = await ffmpeg.readFile("output.wav") - console.log(wavData); - const wavBlob = new Blob([wavData], { type: "audio/wav" }) - audioRef.current = URL.createObjectURL(wavBlob) - return wavBlob - } - const stopRecording = () => { - mediaRecorderRef.current?.stop() - setIsRecording(false) - } + const stopRecording = () => { + mediaRecorderRef.current?.stop() + setIsRecording(false) + } const handleMicClick = () => { @@ -274,9 +236,14 @@ const InputOutputBackend: React.FC = () => { onMicClick={handleMicClick} inputDisabled={inputDisabled} isRecording={isRecording} - /> + /> ) } -export default InputOutputBackend \ No newline at end of file +export default InputOutputBackend + + + + + diff --git a/app/components/ConversationFrontend.tsx b/app/components/ConversationFrontend.tsx index dc3114c..e6bfe39 100644 --- a/app/components/ConversationFrontend.tsx +++ b/app/components/ConversationFrontend.tsx @@ -58,7 +58,7 @@ const ConversationFrontend = React.forwardRef -

Copied!

+

Copied!

diff --git a/app/components/InputFrontend.tsx b/app/components/InputFrontend.tsx index 74539e6..c84124b 100644 --- a/app/components/InputFrontend.tsx +++ b/app/components/InputFrontend.tsx @@ -13,6 +13,10 @@ const InputFrontend = React.forwardRef( ({ message, onSendClick, onMicClick, inputDisabled, isRecording}, ref: ForwardedRef) => { const [inputValue, setInputValue] = useState(''); + useEffect(() => { + setInputValue(message); + }, [message]); + const handleInputChange = (e: React.ChangeEvent) => { setInputValue(e.target.value); }; @@ -27,6 +31,10 @@ const InputFrontend = React.forwardRef( } }; + const styles = { + + } + return (
{metadata.title} {/* Tried adding the favicon here */} - +
{children}
diff --git a/app/styles/output.css b/app/styles/output.css index 15eadae..2abde1d 100644 --- a/app/styles/output.css +++ b/app/styles/output.css @@ -69,7 +69,3 @@ .button-container img { height: 1.5em; } - -#copiedText{ - margin-top: 1em; -} diff --git a/py/api.py b/py/api.py index f3e13ea..c43bf1f 100644 --- a/py/api.py +++ b/py/api.py @@ -1,4 +1,4 @@ -from flask import Flask, request, jsonify +from flask i mport Flask, request, jsonify from flask_cors import CORS import secrets import threading diff --git a/py/voice.py b/py/voice.py index dc0d28b..461da21 100644 --- a/py/voice.py +++ b/py/voice.py @@ -1,32 +1,26 @@ import io import numpy as np -from faster_whisper import WhisperModel +import whisper from pydub import AudioSegment class VoiceRecognition: @staticmethod def recognition(audio): - audio_buffer = io.BytesIO(audio.read()) - - try: - audio_segment = AudioSegment.from_file(audio_buffer, format="ogg") + audio_data = audio.read() + with io.BytesIO(audio_data) as audio_buffer: + audio_segment = AudioSegment.from_ogg(audio_buffer) - wav_io = io.BytesIO() - audio_segment.export(wav_io, format="wav") - wav_io.seek(0) - except: - print("audio to wav failed") + raw_data = np.array(audio_segment.get_array_of_samples()) + + if audio_segment.channels > 1: + raw_data = raw_data.reshape((-1, audio_segment.channels))[:, 0] + + audio_data = raw_data.astype(np.float32) / (2 ** (audio_segment.sample_width * 8 - 1)) - model_size = "base" - model = WhisperModel(model_size, device="cpu", compute_type="int8") - - segments, _ = model.transcribe(wav_io) - transcription = "" - for segment in segments: - transcription += segment.text + " " - result = transcription.strip() - print(result) - return result + model = whisper.load_model("base") + result = model.transcribe(audio_data) + print(result["text"]) + return result["text"] # npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt \ No newline at end of file