forked from React-Group/interstellar_ai
		
	Compare commits
	
		
			4 commits
		
	
	
		
			8fcd868236
			...
			8090ce969e
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 8090ce969e | |||
| 233a173697 | |||
| e1f77c5b17 | |||
| 9663f49dee | 
					 7 changed files with 79 additions and 44 deletions
				
			
		|  | @ -1,11 +1,13 @@ | |||
| "use client" | ||||
| import React, { useEffect, useRef, useState } from "react"; | ||||
| import React, { use, useEffect, useRef, useState } from "react"; | ||||
| import ConversationFrontend from "../components/ConversationFrontend"; | ||||
| import InputFrontend from "../components/InputFrontend"; | ||||
| import VoiceSend from "./voice_backend" | ||||
| import { AudioRecorder } from "./AudioRecorder"; | ||||
| import axios from "axios"; | ||||
| import { resolve } from "path"; | ||||
| import { FFmpeg } from "@ffmpeg/ffmpeg"; | ||||
| import { fetchFile, toBlobURL } from "@ffmpeg/util" | ||||
| 
 | ||||
| 
 | ||||
| const InputOutputBackend: React.FC = () => { | ||||
|  | @ -125,6 +127,17 @@ const InputOutputBackend: React.FC = () => { | |||
|     }); | ||||
|   }; | ||||
| 
 | ||||
|   /* Variables for System-prompt */ | ||||
|   const [preferredCurrency, setPreferredCurrency] = useState(localStorage.getItem("preferredCurrency") || "") | ||||
|   const [preferredLanguage, setPreferredLanguage] = useState(localStorage.getItem("preferredLanguage") || "") | ||||
|   const [timeFormat, setTimeFormat] = useState(localStorage.getItem("timeFormat") || "") | ||||
|   const [preferredMeasurement, setPreferredMeasurement] = useState(localStorage.getItem("preferredMeasurement") || "") | ||||
|   const [timeZone, setTimeZone] = useState(localStorage.getItem("timeZone") || "") | ||||
|   const [dateFormat, setDateFormat] = useState(localStorage.getItem("dateFormat") || "") | ||||
| 
 | ||||
|   useEffect(() => { | ||||
|      | ||||
|   },[preferredCurrency, preferredLanguage, timeFormat, preferredMeasurement, timeZone, dateFormat]) | ||||
| 
 | ||||
|   const addMessage = (role: string, content: string) => { | ||||
|     setMessages(previous => [...previous, { role, content }]) | ||||
|  | @ -152,25 +165,50 @@ const InputOutputBackend: React.FC = () => { | |||
|       audioChunks.current.push(event.data) | ||||
|     } | ||||
|          | ||||
|     mediaRecorder.onstop = () => { | ||||
|     mediaRecorder.onstop = async () => { | ||||
|       const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" }) | ||||
|       const url = URL.createObjectURL(audioBlob) | ||||
|       console.log(url); | ||||
|       setAudioURL(url) | ||||
|       audioChunks.current = [] | ||||
|       // console.log(audioBlob);
 | ||||
|       // const url = URL.createObjectURL(audioBlob)
 | ||||
|       // const audio = new Audio(url);
 | ||||
|       // audio.play().catch(error => console.error("Error playing audio:", error));
 | ||||
| 
 | ||||
|       const remote = new VoiceSend() | ||||
|       remote.sendToVoiceRecognition(audioBlob,) | ||||
|       remote.sendToVoiceRecognition(audioBlob) | ||||
|     } | ||||
| 
 | ||||
|     mediaRecorder.start() | ||||
|     setIsRecording(true) | ||||
|          | ||||
|   } | ||||
| 
 | ||||
|   const ffmpegRef = useRef<FFmpeg | null>(null) | ||||
|   const audioRef = useRef("") | ||||
| 
 | ||||
|   const loadFFmpeg = async () => { | ||||
|     if (!ffmpegRef.current) { | ||||
|       ffmpegRef.current = new FFmpeg() | ||||
|       await ffmpegRef.current.load() | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   const convertOggToWav = async (oggFile: File | Blob) => { | ||||
|     await loadFFmpeg() | ||||
| 
 | ||||
|     const ffmpeg = ffmpegRef.current! | ||||
| 
 | ||||
|     await ffmpeg.writeFile("input.ogg", await fetchFile(oggFile)) | ||||
|     await ffmpeg.exec(["-i", "input.ogg", "output.wav"]) | ||||
|     const wavData = await ffmpeg.readFile("output.wav") | ||||
|     console.log(wavData); | ||||
|     const wavBlob = new Blob([wavData], { type: "audio/wav" }) | ||||
|     audioRef.current = URL.createObjectURL(wavBlob) | ||||
|     return wavBlob | ||||
|   } | ||||
|      | ||||
|     const stopRecording = () => { | ||||
|       mediaRecorderRef.current?.stop() | ||||
|       setIsRecording(false) | ||||
|     } | ||||
|   const stopRecording = () => { | ||||
|     mediaRecorderRef.current?.stop() | ||||
|     setIsRecording(false) | ||||
|   } | ||||
| 
 | ||||
| 
 | ||||
|   const handleMicClick = () => { | ||||
|  | @ -236,14 +274,9 @@ const InputOutputBackend: React.FC = () => { | |||
|         onMicClick={handleMicClick} | ||||
|         inputDisabled={inputDisabled} | ||||
|         isRecording={isRecording} | ||||
|       /> | ||||
|       />  | ||||
|     </div> | ||||
|   ) | ||||
| } | ||||
| 
 | ||||
| export default InputOutputBackend | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| export default InputOutputBackend | ||||
|  | @ -58,7 +58,7 @@ const ConversationFrontend = React.forwardRef<HTMLDivElement, ConversationProps> | |||
|             <button type="button" onClick={onCopyClick}> | ||||
|               <img src="/img/copy.svg" alt="copy" /> | ||||
|             </button> | ||||
|             <p style={{opacity:isClicked?"1":"0", transition:"all 0.3s ease-in-out"}}>Copied!</p> | ||||
|             <p id="copiedText" style={{opacity:isClicked?"1":"0", transition:"all 0.3s ease-in-out"}}>Copied!</p> | ||||
|           </div> | ||||
|         </div> | ||||
|       </div> | ||||
|  |  | |||
|  | @ -13,10 +13,6 @@ const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>( | |||
|   ({ message, onSendClick, onMicClick, inputDisabled, isRecording}, ref: ForwardedRef<HTMLDivElement>) => { | ||||
|     const [inputValue, setInputValue] = useState(''); | ||||
| 
 | ||||
|     useEffect(() => { | ||||
|       setInputValue(message); | ||||
|     }, [message]); | ||||
| 
 | ||||
|     const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => { | ||||
|       setInputValue(e.target.value); | ||||
|     }; | ||||
|  | @ -31,10 +27,6 @@ const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>( | |||
|       } | ||||
|     }; | ||||
| 
 | ||||
|     const styles = { | ||||
|        | ||||
|     } | ||||
| 
 | ||||
|     return ( | ||||
|       <div className="input" id="inputForm" ref={ref}> | ||||
|         <input | ||||
|  |  | |||
|  | @ -12,7 +12,7 @@ export default function RootLayout({ children }: { children: ReactNode }) { | |||
|         <title>{metadata.title}</title> | ||||
|         <meta name="description" content={metadata.description} /> | ||||
|         {/* Tried adding the favicon here */} | ||||
|         <link rel="icon" href="./public/favicon.ico" type="image/x-icon" /> | ||||
|         <link rel="icon" href="../public/favicon.ico" type="image/x-icon" /> | ||||
|       </head> | ||||
|       <body> | ||||
|         <main>{children}</main> | ||||
|  |  | |||
|  | @ -69,3 +69,7 @@ | |||
| .button-container img { | ||||
|     height: 1.5em; | ||||
| } | ||||
| 
 | ||||
| #copiedText{ | ||||
|     margin-top: 1em; | ||||
| } | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| from flask i                mport Flask, request, jsonify | ||||
| from flask import Flask, request, jsonify | ||||
| from flask_cors import CORS | ||||
| import secrets | ||||
| import threading | ||||
|  |  | |||
							
								
								
									
										36
									
								
								py/voice.py
									
										
									
									
									
								
							
							
						
						
									
										36
									
								
								py/voice.py
									
										
									
									
									
								
							|  | @ -1,26 +1,32 @@ | |||
| import io | ||||
| import numpy as np | ||||
| import whisper | ||||
| from faster_whisper import WhisperModel | ||||
| from pydub import AudioSegment | ||||
| 
 | ||||
| 
 | ||||
| class VoiceRecognition: | ||||
|     @staticmethod | ||||
|     def recognition(audio): | ||||
|         audio_data = audio.read() | ||||
|         with io.BytesIO(audio_data) as audio_buffer: | ||||
|             audio_segment = AudioSegment.from_ogg(audio_buffer) | ||||
|              | ||||
|             raw_data = np.array(audio_segment.get_array_of_samples()) | ||||
|              | ||||
|             if audio_segment.channels > 1: | ||||
|                 raw_data = raw_data.reshape((-1, audio_segment.channels))[:, 0] | ||||
|              | ||||
|             audio_data = raw_data.astype(np.float32) / (2 ** (audio_segment.sample_width * 8 - 1)) | ||||
|         audio_buffer = io.BytesIO(audio.read()) | ||||
|          | ||||
|         model = whisper.load_model("base") | ||||
|         result = model.transcribe(audio_data) | ||||
|         print(result["text"]) | ||||
|         return result["text"] | ||||
|         try: | ||||
|             audio_segment = AudioSegment.from_file(audio_buffer, format="ogg") | ||||
|              | ||||
|             wav_io = io.BytesIO() | ||||
|             audio_segment.export(wav_io, format="wav") | ||||
|             wav_io.seek(0) | ||||
|         except: | ||||
|             print("audio to wav failed") | ||||
|          | ||||
|         model_size = "base" | ||||
|         model = WhisperModel(model_size, device="cpu", compute_type="int8") | ||||
|          | ||||
|         segments, _ = model.transcribe(wav_io) | ||||
|         transcription = "" | ||||
|         for segment in segments: | ||||
|             transcription += segment.text + " " | ||||
|         result = transcription.strip() | ||||
|         print(result) | ||||
|         return result | ||||
| 
 | ||||
| # npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue