Merge pull request 'main' (#60) from YasinOnm08/interstellar_ai:main into main
Reviewed-on: https://interstellardevelopment.org/code/code/React-Group/interstellar_ai/pulls/60
This commit is contained in:
		
						commit
						25eb2f5a30
					
				
					 6 changed files with 78 additions and 43 deletions
				
			
		|  | @ -1,11 +1,13 @@ | ||||||
| "use client" | "use client" | ||||||
| import React, { useEffect, useRef, useState } from "react"; | import React, { use, useEffect, useRef, useState } from "react"; | ||||||
| import ConversationFrontend from "../components/ConversationFrontend"; | import ConversationFrontend from "../components/ConversationFrontend"; | ||||||
| import InputFrontend from "../components/InputFrontend"; | import InputFrontend from "../components/InputFrontend"; | ||||||
| import VoiceSend from "./voice_backend" | import VoiceSend from "./voice_backend" | ||||||
| import { AudioRecorder } from "./AudioRecorder"; | import { AudioRecorder } from "./AudioRecorder"; | ||||||
| import axios from "axios"; | import axios from "axios"; | ||||||
| import { resolve } from "path"; | import { resolve } from "path"; | ||||||
|  | import { FFmpeg } from "@ffmpeg/ffmpeg"; | ||||||
|  | import { fetchFile, toBlobURL } from "@ffmpeg/util" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| const InputOutputBackend: React.FC = () => { | const InputOutputBackend: React.FC = () => { | ||||||
|  | @ -125,6 +127,17 @@ const InputOutputBackend: React.FC = () => { | ||||||
|     }); |     }); | ||||||
|   }; |   }; | ||||||
| 
 | 
 | ||||||
|  |   /* Variables for System-prompt */ | ||||||
|  |   const [preferredCurrency, setPreferredCurrency] = useState(localStorage.getItem("preferredCurrency") || "") | ||||||
|  |   const [preferredLanguage, setPreferredLanguage] = useState(localStorage.getItem("preferredLanguage") || "") | ||||||
|  |   const [timeFormat, setTimeFormat] = useState(localStorage.getItem("timeFormat") || "") | ||||||
|  |   const [preferredMeasurement, setPreferredMeasurement] = useState(localStorage.getItem("preferredMeasurement") || "") | ||||||
|  |   const [timeZone, setTimeZone] = useState(localStorage.getItem("timeZone") || "") | ||||||
|  |   const [dateFormat, setDateFormat] = useState(localStorage.getItem("dateFormat") || "") | ||||||
|  | 
 | ||||||
|  |   useEffect(() => { | ||||||
|  |      | ||||||
|  |   },[preferredCurrency, preferredLanguage, timeFormat, preferredMeasurement, timeZone, dateFormat]) | ||||||
| 
 | 
 | ||||||
|   const addMessage = (role: string, content: string) => { |   const addMessage = (role: string, content: string) => { | ||||||
|     setMessages(previous => [...previous, { role, content }]) |     setMessages(previous => [...previous, { role, content }]) | ||||||
|  | @ -152,19 +165,44 @@ const InputOutputBackend: React.FC = () => { | ||||||
|       audioChunks.current.push(event.data) |       audioChunks.current.push(event.data) | ||||||
|     } |     } | ||||||
|          |          | ||||||
|     mediaRecorder.onstop = () => { |     mediaRecorder.onstop = async () => { | ||||||
|       const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" }) |       const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" }) | ||||||
|       const url = URL.createObjectURL(audioBlob) |  | ||||||
|       console.log(url); |  | ||||||
|       setAudioURL(url) |  | ||||||
|       audioChunks.current = [] |       audioChunks.current = [] | ||||||
|  |       // console.log(audioBlob);
 | ||||||
|  |       // const url = URL.createObjectURL(audioBlob)
 | ||||||
|  |       // const audio = new Audio(url);
 | ||||||
|  |       // audio.play().catch(error => console.error("Error playing audio:", error));
 | ||||||
|  | 
 | ||||||
|       const remote = new VoiceSend() |       const remote = new VoiceSend() | ||||||
|       remote.sendToVoiceRecognition(audioBlob,) |       remote.sendToVoiceRecognition(audioBlob) | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     mediaRecorder.start() |     mediaRecorder.start() | ||||||
|     setIsRecording(true) |     setIsRecording(true) | ||||||
|  |   } | ||||||
| 
 | 
 | ||||||
|  |   const ffmpegRef = useRef<FFmpeg | null>(null) | ||||||
|  |   const audioRef = useRef("") | ||||||
|  | 
 | ||||||
|  |   const loadFFmpeg = async () => { | ||||||
|  |     if (!ffmpegRef.current) { | ||||||
|  |       ffmpegRef.current = new FFmpeg() | ||||||
|  |       await ffmpegRef.current.load() | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   const convertOggToWav = async (oggFile: File | Blob) => { | ||||||
|  |     await loadFFmpeg() | ||||||
|  | 
 | ||||||
|  |     const ffmpeg = ffmpegRef.current! | ||||||
|  | 
 | ||||||
|  |     await ffmpeg.writeFile("input.ogg", await fetchFile(oggFile)) | ||||||
|  |     await ffmpeg.exec(["-i", "input.ogg", "output.wav"]) | ||||||
|  |     const wavData = await ffmpeg.readFile("output.wav") | ||||||
|  |     console.log(wavData); | ||||||
|  |     const wavBlob = new Blob([wavData], { type: "audio/wav" }) | ||||||
|  |     audioRef.current = URL.createObjectURL(wavBlob) | ||||||
|  |     return wavBlob | ||||||
|   } |   } | ||||||
|      |      | ||||||
|   const stopRecording = () => { |   const stopRecording = () => { | ||||||
|  | @ -242,8 +280,3 @@ const InputOutputBackend: React.FC = () => { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| export default InputOutputBackend | export default InputOutputBackend | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
|  |  | ||||||
|  | @ -58,7 +58,7 @@ const ConversationFrontend = React.forwardRef<HTMLDivElement, ConversationProps> | ||||||
|             <button type="button" onClick={onCopyClick}> |             <button type="button" onClick={onCopyClick}> | ||||||
|               <img src="/img/copy.svg" alt="copy" /> |               <img src="/img/copy.svg" alt="copy" /> | ||||||
|             </button> |             </button> | ||||||
|             <p style={{opacity:isClicked?"1":"0", transition:"all 0.3s ease-in-out"}}>Copied!</p> |             <p id="copiedText" style={{opacity:isClicked?"1":"0", transition:"all 0.3s ease-in-out"}}>Copied!</p> | ||||||
|           </div> |           </div> | ||||||
|         </div> |         </div> | ||||||
|       </div> |       </div> | ||||||
|  |  | ||||||
|  | @ -13,10 +13,6 @@ const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>( | ||||||
|   ({ message, onSendClick, onMicClick, inputDisabled, isRecording}, ref: ForwardedRef<HTMLDivElement>) => { |   ({ message, onSendClick, onMicClick, inputDisabled, isRecording}, ref: ForwardedRef<HTMLDivElement>) => { | ||||||
|     const [inputValue, setInputValue] = useState(''); |     const [inputValue, setInputValue] = useState(''); | ||||||
| 
 | 
 | ||||||
|     useEffect(() => { |  | ||||||
|       setInputValue(message); |  | ||||||
|     }, [message]); |  | ||||||
| 
 |  | ||||||
|     const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => { |     const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => { | ||||||
|       setInputValue(e.target.value); |       setInputValue(e.target.value); | ||||||
|     }; |     }; | ||||||
|  | @ -31,10 +27,6 @@ const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>( | ||||||
|       } |       } | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     const styles = { |  | ||||||
|        |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return ( |     return ( | ||||||
|       <div className="input" id="inputForm" ref={ref}> |       <div className="input" id="inputForm" ref={ref}> | ||||||
|         <input |         <input | ||||||
|  |  | ||||||
|  | @ -69,3 +69,7 @@ | ||||||
| .button-container img { | .button-container img { | ||||||
|     height: 1.5em; |     height: 1.5em; | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | #copiedText{ | ||||||
|  |     margin-top: 1em; | ||||||
|  | } | ||||||
|  |  | ||||||
							
								
								
									
										30
									
								
								py/voice.py
									
										
									
									
									
								
							
							
						
						
									
										30
									
								
								py/voice.py
									
										
									
									
									
								
							|  | @ -1,26 +1,32 @@ | ||||||
| import io | import io | ||||||
| import numpy as np | import numpy as np | ||||||
| import whisper | from faster_whisper import WhisperModel | ||||||
| from pydub import AudioSegment | from pydub import AudioSegment | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class VoiceRecognition: | class VoiceRecognition: | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def recognition(audio): |     def recognition(audio): | ||||||
|         audio_data = audio.read() |         audio_buffer = io.BytesIO(audio.read()) | ||||||
|         with io.BytesIO(audio_data) as audio_buffer: |  | ||||||
|             audio_segment = AudioSegment.from_ogg(audio_buffer) |  | ||||||
|          |          | ||||||
|             raw_data = np.array(audio_segment.get_array_of_samples()) |         try: | ||||||
|  |             audio_segment = AudioSegment.from_file(audio_buffer, format="ogg") | ||||||
|              |              | ||||||
|             if audio_segment.channels > 1: |             wav_io = io.BytesIO() | ||||||
|                 raw_data = raw_data.reshape((-1, audio_segment.channels))[:, 0] |             audio_segment.export(wav_io, format="wav") | ||||||
|  |             wav_io.seek(0) | ||||||
|  |         except: | ||||||
|  |             print("audio to wav failed") | ||||||
|          |          | ||||||
|             audio_data = raw_data.astype(np.float32) / (2 ** (audio_segment.sample_width * 8 - 1)) |         model_size = "base" | ||||||
|  |         model = WhisperModel(model_size, device="cpu", compute_type="int8") | ||||||
|          |          | ||||||
|         model = whisper.load_model("base") |         segments, _ = model.transcribe(wav_io) | ||||||
|         result = model.transcribe(audio_data) |         transcription = "" | ||||||
|         print(result["text"]) |         for segment in segments: | ||||||
|         return result["text"] |             transcription += segment.text + " " | ||||||
|  |         result = transcription.strip() | ||||||
|  |         print(result) | ||||||
|  |         return result | ||||||
| 
 | 
 | ||||||
| # npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt | # npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue