From 9663f49dee7b233e4327950145163f4785886b23 Mon Sep 17 00:00:00 2001
From: YasinOnm08 <onmazyasin4@gmail.com>
Date: Fri, 27 Sep 2024 08:03:12 +0200
Subject: [PATCH 1/4] copy feedback fix?

---
 app/components/ConversationFrontend.tsx | 2 +-
 app/components/InputFrontend.tsx        | 8 --------
 app/styles/output.css                   | 4 ++++
 3 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/app/components/ConversationFrontend.tsx b/app/components/ConversationFrontend.tsx
index e6bfe39..dc3114c 100644
--- a/app/components/ConversationFrontend.tsx
+++ b/app/components/ConversationFrontend.tsx
@@ -58,7 +58,7 @@ const ConversationFrontend = React.forwardRef<HTMLDivElement, ConversationProps>
             <button type="button" onClick={onCopyClick}>
               <img src="/img/copy.svg" alt="copy" />
             </button>
-            <p style={{opacity:isClicked?"1":"0", transition:"all 0.3s ease-in-out"}}>Copied!</p>
+            <p id="copiedText" style={{opacity:isClicked?"1":"0", transition:"all 0.3s ease-in-out"}}>Copied!</p>
           </div>
         </div>
       </div>
diff --git a/app/components/InputFrontend.tsx b/app/components/InputFrontend.tsx
index c84124b..74539e6 100644
--- a/app/components/InputFrontend.tsx
+++ b/app/components/InputFrontend.tsx
@@ -13,10 +13,6 @@ const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
   ({ message, onSendClick, onMicClick, inputDisabled, isRecording}, ref: ForwardedRef<HTMLDivElement>) => {
     const [inputValue, setInputValue] = useState('');
 
-    useEffect(() => {
-      setInputValue(message);
-    }, [message]);
-
     const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
       setInputValue(e.target.value);
     };
@@ -31,10 +27,6 @@ const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
       }
     };
 
-    const styles = {
-      
-    }
-
     return (
       <div className="input" id="inputForm" ref={ref}>
         <input
diff --git a/app/styles/output.css b/app/styles/output.css
index 2abde1d..15eadae 100644
--- a/app/styles/output.css
+++ b/app/styles/output.css
@@ -69,3 +69,7 @@
 .button-container img {
     height: 1.5em;
 }
+
+#copiedText{
+    margin-top: 1em;
+}

From e1f77c5b17bb119967cc66ff60bc32ec5a1e03f3 Mon Sep 17 00:00:00 2001
From: YasinOnm08 <onmazyasin4@gmail.com>
Date: Fri, 27 Sep 2024 10:57:18 +0200
Subject: [PATCH 2/4] voice recognition progress 1

---
 app/backend/InputOutputHandler.tsx | 56 +++++++++++++++++++++++++-----
 app/layout.tsx                     |  2 +-
 2 files changed, 49 insertions(+), 9 deletions(-)

diff --git a/app/backend/InputOutputHandler.tsx b/app/backend/InputOutputHandler.tsx
index 1316901..fa8dcf6 100644
--- a/app/backend/InputOutputHandler.tsx
+++ b/app/backend/InputOutputHandler.tsx
@@ -1,11 +1,13 @@
 "use client"
-import React, { useEffect, useRef, useState } from "react";
+import React, { use, useEffect, useRef, useState } from "react";
 import ConversationFrontend from "../components/ConversationFrontend";
 import InputFrontend from "../components/InputFrontend";
 import VoiceSend from "./voice_backend"
 import { AudioRecorder } from "./AudioRecorder";
 import axios from "axios";
 import { resolve } from "path";
+import { FFmpeg } from "@ffmpeg/ffmpeg";
+import { fetchFile, toBlobURL } from "@ffmpeg/util"
 
 
 const InputOutputBackend: React.FC = () => {
@@ -125,6 +127,17 @@ const InputOutputBackend: React.FC = () => {
     });
   };
 
+  /* Variables for System-prompt */
+  const [preferredCurrency, setPreferredCurrency] = useState(localStorage.getItem("preferredCurrency") || "")
+  const [preferredLanguage, setPreferredLanguage] = useState(localStorage.getItem("preferredLanguage") || "")
+  const [timeFormat, setTimeFormat] = useState(localStorage.getItem("timeFormat") || "")
+  const [preferredMeasurement, setPreferredMeasurement] = useState(localStorage.getItem("preferredMeasurement") || "")
+  const [timeZone, setTimeZone] = useState(localStorage.getItem("timeZone") || "")
+  const [dateFormat, setDateFormat] = useState(localStorage.getItem("dateFormat") || "")
+
+  useEffect(() => {
+    
+  },[preferredCurrency, preferredLanguage, timeFormat, preferredMeasurement, timeZone, dateFormat])
 
   const addMessage = (role: string, content: string) => {
     setMessages(previous => [...previous, { role, content }])
@@ -152,25 +165,52 @@ const InputOutputBackend: React.FC = () => {
       audioChunks.current.push(event.data)
     }
         
-    mediaRecorder.onstop = () => {
+    mediaRecorder.onstop = async () => {
       const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" })
+      console.log(audioBlob);
       const url = URL.createObjectURL(audioBlob)
+      const audio = new Audio(url);
+      audio.play().catch(error => console.error("Error playing audio:", error));
       console.log(url);
       setAudioURL(url)
       audioChunks.current = []
+      const wavBlob = await convertOggToWav(audioBlob)
       const remote = new VoiceSend()
-      remote.sendToVoiceRecognition(audioBlob,)
+      remote.sendToVoiceRecognition(wavBlob)
     }
 
     mediaRecorder.start()
     setIsRecording(true)
-        
+  }
+
+  const ffmpegRef = useRef<FFmpeg | null>(null)
+  const audioRef = useRef("")
+
+  const loadFFmpeg = async () => {
+    if (!ffmpegRef.current) {
+      ffmpegRef.current = new FFmpeg()
+      await ffmpegRef.current.load()
     }
+  }
+
+  const convertOggToWav = async (oggFile: File | Blob) => {
+    await loadFFmpeg()
+
+    const ffmpeg = ffmpegRef.current!
+
+    await ffmpeg.writeFile("input.ogg", await fetchFile(oggFile))
+    await ffmpeg.exec(["-i", "input.ogg", "output.wav"])
+    const wavData = await ffmpeg.readFile("output.wav")
+    console.log(wavData);
+    const wavBlob = new Blob([wavData], { type: "audio/wav" })
+    audioRef.current = URL.createObjectURL(wavBlob)
+    return wavBlob
+  }
     
-    const stopRecording = () => {
-      mediaRecorderRef.current?.stop()
-      setIsRecording(false)
-    }
+  const stopRecording = () => {
+    mediaRecorderRef.current?.stop()
+    setIsRecording(false)
+  }
 
 
   const handleMicClick = () => {
diff --git a/app/layout.tsx b/app/layout.tsx
index 8070a08..0896d49 100644
--- a/app/layout.tsx
+++ b/app/layout.tsx
@@ -12,7 +12,7 @@ export default function RootLayout({ children }: { children: ReactNode }) {
         <title>{metadata.title}</title>
         <meta name="description" content={metadata.description} />
         {/* Tried adding the favicon here */}
-        <link rel="icon" href="./public/favicon.ico" type="image/x-icon" />
+        <link rel="icon" href="../public/favicon.ico" type="image/x-icon" />
       </head>
       <body>
         <main>{children}</main>

From 8090ce969e00d3769cadcc77d58f5fac0328d7b8 Mon Sep 17 00:00:00 2001
From: YasinOnm08 <onmazyasin4@gmail.com>
Date: Fri, 27 Sep 2024 13:59:27 +0200
Subject: [PATCH 3/4] voice recognition kinda works??

---
 app/backend/InputOutputHandler.tsx | 23 +++++++------------
 py/api.py                          |  2 +-
 py/voice.py                        | 36 +++++++++++++++++-------------
 3 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/app/backend/InputOutputHandler.tsx b/app/backend/InputOutputHandler.tsx
index fa8dcf6..83c998a 100644
--- a/app/backend/InputOutputHandler.tsx
+++ b/app/backend/InputOutputHandler.tsx
@@ -167,16 +167,14 @@ const InputOutputBackend: React.FC = () => {
         
     mediaRecorder.onstop = async () => {
       const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" })
-      console.log(audioBlob);
-      const url = URL.createObjectURL(audioBlob)
-      const audio = new Audio(url);
-      audio.play().catch(error => console.error("Error playing audio:", error));
-      console.log(url);
-      setAudioURL(url)
       audioChunks.current = []
-      const wavBlob = await convertOggToWav(audioBlob)
+      // console.log(audioBlob);
+      // const url = URL.createObjectURL(audioBlob)
+      // const audio = new Audio(url);
+      // audio.play().catch(error => console.error("Error playing audio:", error));
+
       const remote = new VoiceSend()
-      remote.sendToVoiceRecognition(wavBlob)
+      remote.sendToVoiceRecognition(audioBlob)
     }
 
     mediaRecorder.start()
@@ -276,14 +274,9 @@ const InputOutputBackend: React.FC = () => {
         onMicClick={handleMicClick}
         inputDisabled={inputDisabled}
         isRecording={isRecording}
-      />
+      /> 
     </div>
   )
 }
 
-export default InputOutputBackend
-
-
-
-
-
+export default InputOutputBackend
\ No newline at end of file
diff --git a/py/api.py b/py/api.py
index c43bf1f..f3e13ea 100644
--- a/py/api.py
+++ b/py/api.py
@@ -1,4 +1,4 @@
-from flask i                mport Flask, request, jsonify
+from flask import Flask, request, jsonify
 from flask_cors import CORS
 import secrets
 import threading
diff --git a/py/voice.py b/py/voice.py
index 461da21..dc0d28b 100644
--- a/py/voice.py
+++ b/py/voice.py
@@ -1,26 +1,32 @@
 import io
 import numpy as np
-import whisper
+from faster_whisper import WhisperModel
 from pydub import AudioSegment
 
 
 class VoiceRecognition:
     @staticmethod
     def recognition(audio):
-        audio_data = audio.read()
-        with io.BytesIO(audio_data) as audio_buffer:
-            audio_segment = AudioSegment.from_ogg(audio_buffer)
-            
-            raw_data = np.array(audio_segment.get_array_of_samples())
-            
-            if audio_segment.channels > 1:
-                raw_data = raw_data.reshape((-1, audio_segment.channels))[:, 0]
-            
-            audio_data = raw_data.astype(np.float32) / (2 ** (audio_segment.sample_width * 8 - 1))
+        audio_buffer = io.BytesIO(audio.read())
         
-        model = whisper.load_model("base")
-        result = model.transcribe(audio_data)
-        print(result["text"])
-        return result["text"]
+        try:
+            audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
+            
+            wav_io = io.BytesIO()
+            audio_segment.export(wav_io, format="wav")
+            wav_io.seek(0)
+        except:
+            print("audio to wav failed")
+        
+        model_size = "base"
+        model = WhisperModel(model_size, device="cpu", compute_type="int8")
+        
+        segments, _ = model.transcribe(wav_io)
+        transcription = ""
+        for segment in segments:
+            transcription += segment.text + " "
+        result = transcription.strip()
+        print(result)
+        return result
 
 # npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt
\ No newline at end of file

From 015e9a1064170661392a5b8875042085b0abeba9 Mon Sep 17 00:00:00 2001
From: YasinOnm08 <onmazyasin4@gmail.com>
Date: Fri, 27 Sep 2024 14:03:43 +0200
Subject: [PATCH 4/4] ...

---
 app/layout.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/layout.tsx b/app/layout.tsx
index 0896d49..0425420 100644
--- a/app/layout.tsx
+++ b/app/layout.tsx
@@ -12,7 +12,7 @@ export default function RootLayout({ children }: { children: ReactNode }) {
         <title>{metadata.title}</title>
         <meta name="description" content={metadata.description} />
         {/* Tried adding the favicon here */}
-        <link rel="icon" href="../public/favicon.ico" type="image/x-icon" />
+        <link rel="icon" href="./favicon.ico" type="image/x-icon" />
       </head>
       <body>
         <main>{children}</main>