Merge branch 'main' into main

This commit is contained in:
sageTheDm 2024-09-27 14:57:50 +02:00
commit be2a0136e3
7 changed files with 79 additions and 44 deletions

View file

@ -1,11 +1,13 @@
"use client" "use client"
import React, { useEffect, useRef, useState } from "react"; import React, { use, useEffect, useRef, useState } from "react";
import ConversationFrontend from "../components/ConversationFrontend"; import ConversationFrontend from "../components/ConversationFrontend";
import InputFrontend from "../components/InputFrontend"; import InputFrontend from "../components/InputFrontend";
import VoiceSend from "./voice_backend" import VoiceSend from "./voice_backend"
import { AudioRecorder } from "./AudioRecorder"; import { AudioRecorder } from "./AudioRecorder";
import axios from "axios"; import axios from "axios";
import { resolve } from "path"; import { resolve } from "path";
import { FFmpeg } from "@ffmpeg/ffmpeg";
import { fetchFile, toBlobURL } from "@ffmpeg/util"
const InputOutputBackend: React.FC = () => { const InputOutputBackend: React.FC = () => {
@ -125,6 +127,17 @@ const InputOutputBackend: React.FC = () => {
}); });
}; };
/* Variables for System-prompt */
const [preferredCurrency, setPreferredCurrency] = useState(localStorage.getItem("preferredCurrency") || "")
const [preferredLanguage, setPreferredLanguage] = useState(localStorage.getItem("preferredLanguage") || "")
const [timeFormat, setTimeFormat] = useState(localStorage.getItem("timeFormat") || "")
const [preferredMeasurement, setPreferredMeasurement] = useState(localStorage.getItem("preferredMeasurement") || "")
const [timeZone, setTimeZone] = useState(localStorage.getItem("timeZone") || "")
const [dateFormat, setDateFormat] = useState(localStorage.getItem("dateFormat") || "")
useEffect(() => {
},[preferredCurrency, preferredLanguage, timeFormat, preferredMeasurement, timeZone, dateFormat])
const addMessage = (role: string, content: string) => { const addMessage = (role: string, content: string) => {
setMessages(previous => [...previous, { role, content }]) setMessages(previous => [...previous, { role, content }])
@ -152,19 +165,44 @@ const InputOutputBackend: React.FC = () => {
audioChunks.current.push(event.data) audioChunks.current.push(event.data)
} }
mediaRecorder.onstop = () => { mediaRecorder.onstop = async () => {
const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" }) const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" })
const url = URL.createObjectURL(audioBlob)
console.log(url);
setAudioURL(url)
audioChunks.current = [] audioChunks.current = []
// console.log(audioBlob);
// const url = URL.createObjectURL(audioBlob)
// const audio = new Audio(url);
// audio.play().catch(error => console.error("Error playing audio:", error));
const remote = new VoiceSend() const remote = new VoiceSend()
remote.sendToVoiceRecognition(audioBlob,) remote.sendToVoiceRecognition(audioBlob)
} }
mediaRecorder.start() mediaRecorder.start()
setIsRecording(true) setIsRecording(true)
}
const ffmpegRef = useRef<FFmpeg | null>(null)
const audioRef = useRef("")
const loadFFmpeg = async () => {
if (!ffmpegRef.current) {
ffmpegRef.current = new FFmpeg()
await ffmpegRef.current.load()
}
}
const convertOggToWav = async (oggFile: File | Blob) => {
await loadFFmpeg()
const ffmpeg = ffmpegRef.current!
await ffmpeg.writeFile("input.ogg", await fetchFile(oggFile))
await ffmpeg.exec(["-i", "input.ogg", "output.wav"])
const wavData = await ffmpeg.readFile("output.wav")
console.log(wavData);
const wavBlob = new Blob([wavData], { type: "audio/wav" })
audioRef.current = URL.createObjectURL(wavBlob)
return wavBlob
} }
const stopRecording = () => { const stopRecording = () => {
@ -242,8 +280,3 @@ const InputOutputBackend: React.FC = () => {
} }
export default InputOutputBackend export default InputOutputBackend

View file

@ -58,7 +58,7 @@ const ConversationFrontend = React.forwardRef<HTMLDivElement, ConversationProps>
<button type="button" onClick={onCopyClick}> <button type="button" onClick={onCopyClick}>
<img src="/img/copy.svg" alt="copy" /> <img src="/img/copy.svg" alt="copy" />
</button> </button>
<p style={{opacity:isClicked?"1":"0", transition:"all 0.3s ease-in-out"}}>Copied!</p> <p id="copiedText" style={{opacity:isClicked?"1":"0", transition:"all 0.3s ease-in-out"}}>Copied!</p>
</div> </div>
</div> </div>
</div> </div>

View file

@ -13,10 +13,6 @@ const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
({ message, onSendClick, onMicClick, inputDisabled, isRecording}, ref: ForwardedRef<HTMLDivElement>) => { ({ message, onSendClick, onMicClick, inputDisabled, isRecording}, ref: ForwardedRef<HTMLDivElement>) => {
const [inputValue, setInputValue] = useState(''); const [inputValue, setInputValue] = useState('');
useEffect(() => {
setInputValue(message);
}, [message]);
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => { const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
setInputValue(e.target.value); setInputValue(e.target.value);
}; };
@ -31,10 +27,6 @@ const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
} }
}; };
const styles = {
}
return ( return (
<div className="input" id="inputForm" ref={ref}> <div className="input" id="inputForm" ref={ref}>
<input <input

View file

@ -69,3 +69,7 @@
.button-container img { .button-container img {
height: 1.5em; height: 1.5em;
} }
#copiedText{
margin-top: 1em;
}

View file

@ -1,6 +1,6 @@
{ {
"name": "interstellar_ai", "name": "interstellar_ai",
"version": "0.1.0", "version": "0.2.0",
"private": true, "private": true,
"main": "main.js", "main": "main.js",
"scripts": { "scripts": {

View file

@ -1,26 +1,32 @@
import io import io
import numpy as np import numpy as np
import whisper from faster_whisper import WhisperModel
from pydub import AudioSegment from pydub import AudioSegment
class VoiceRecognition: class VoiceRecognition:
@staticmethod @staticmethod
def recognition(audio): def recognition(audio):
audio_data = audio.read() audio_buffer = io.BytesIO(audio.read())
with io.BytesIO(audio_data) as audio_buffer:
audio_segment = AudioSegment.from_ogg(audio_buffer)
raw_data = np.array(audio_segment.get_array_of_samples()) try:
audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
if audio_segment.channels > 1: wav_io = io.BytesIO()
raw_data = raw_data.reshape((-1, audio_segment.channels))[:, 0] audio_segment.export(wav_io, format="wav")
wav_io.seek(0)
except:
print("audio to wav failed")
audio_data = raw_data.astype(np.float32) / (2 ** (audio_segment.sample_width * 8 - 1)) model_size = "base"
model = WhisperModel(model_size, device="cpu", compute_type="int8")
model = whisper.load_model("base") segments, _ = model.transcribe(wav_io)
result = model.transcribe(audio_data) transcription = ""
print(result["text"]) for segment in segments:
return result["text"] transcription += segment.text + " "
result = transcription.strip()
print(result)
return result
# npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt # npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt