forked from React-Group/interstellar_ai
Merge branch 'main' into main
This commit is contained in:
commit
be2a0136e3
7 changed files with 79 additions and 44 deletions
|
@ -1,11 +1,13 @@
|
||||||
"use client"
|
"use client"
|
||||||
import React, { useEffect, useRef, useState } from "react";
|
import React, { use, useEffect, useRef, useState } from "react";
|
||||||
import ConversationFrontend from "../components/ConversationFrontend";
|
import ConversationFrontend from "../components/ConversationFrontend";
|
||||||
import InputFrontend from "../components/InputFrontend";
|
import InputFrontend from "../components/InputFrontend";
|
||||||
import VoiceSend from "./voice_backend"
|
import VoiceSend from "./voice_backend"
|
||||||
import { AudioRecorder } from "./AudioRecorder";
|
import { AudioRecorder } from "./AudioRecorder";
|
||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
import { resolve } from "path";
|
import { resolve } from "path";
|
||||||
|
import { FFmpeg } from "@ffmpeg/ffmpeg";
|
||||||
|
import { fetchFile, toBlobURL } from "@ffmpeg/util"
|
||||||
|
|
||||||
|
|
||||||
const InputOutputBackend: React.FC = () => {
|
const InputOutputBackend: React.FC = () => {
|
||||||
|
@ -125,6 +127,17 @@ const InputOutputBackend: React.FC = () => {
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Variables for System-prompt */
|
||||||
|
const [preferredCurrency, setPreferredCurrency] = useState(localStorage.getItem("preferredCurrency") || "")
|
||||||
|
const [preferredLanguage, setPreferredLanguage] = useState(localStorage.getItem("preferredLanguage") || "")
|
||||||
|
const [timeFormat, setTimeFormat] = useState(localStorage.getItem("timeFormat") || "")
|
||||||
|
const [preferredMeasurement, setPreferredMeasurement] = useState(localStorage.getItem("preferredMeasurement") || "")
|
||||||
|
const [timeZone, setTimeZone] = useState(localStorage.getItem("timeZone") || "")
|
||||||
|
const [dateFormat, setDateFormat] = useState(localStorage.getItem("dateFormat") || "")
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
|
||||||
|
},[preferredCurrency, preferredLanguage, timeFormat, preferredMeasurement, timeZone, dateFormat])
|
||||||
|
|
||||||
const addMessage = (role: string, content: string) => {
|
const addMessage = (role: string, content: string) => {
|
||||||
setMessages(previous => [...previous, { role, content }])
|
setMessages(previous => [...previous, { role, content }])
|
||||||
|
@ -152,19 +165,44 @@ const InputOutputBackend: React.FC = () => {
|
||||||
audioChunks.current.push(event.data)
|
audioChunks.current.push(event.data)
|
||||||
}
|
}
|
||||||
|
|
||||||
mediaRecorder.onstop = () => {
|
mediaRecorder.onstop = async () => {
|
||||||
const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" })
|
const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" })
|
||||||
const url = URL.createObjectURL(audioBlob)
|
|
||||||
console.log(url);
|
|
||||||
setAudioURL(url)
|
|
||||||
audioChunks.current = []
|
audioChunks.current = []
|
||||||
|
// console.log(audioBlob);
|
||||||
|
// const url = URL.createObjectURL(audioBlob)
|
||||||
|
// const audio = new Audio(url);
|
||||||
|
// audio.play().catch(error => console.error("Error playing audio:", error));
|
||||||
|
|
||||||
const remote = new VoiceSend()
|
const remote = new VoiceSend()
|
||||||
remote.sendToVoiceRecognition(audioBlob,)
|
remote.sendToVoiceRecognition(audioBlob)
|
||||||
}
|
}
|
||||||
|
|
||||||
mediaRecorder.start()
|
mediaRecorder.start()
|
||||||
setIsRecording(true)
|
setIsRecording(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
const ffmpegRef = useRef<FFmpeg | null>(null)
|
||||||
|
const audioRef = useRef("")
|
||||||
|
|
||||||
|
const loadFFmpeg = async () => {
|
||||||
|
if (!ffmpegRef.current) {
|
||||||
|
ffmpegRef.current = new FFmpeg()
|
||||||
|
await ffmpegRef.current.load()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const convertOggToWav = async (oggFile: File | Blob) => {
|
||||||
|
await loadFFmpeg()
|
||||||
|
|
||||||
|
const ffmpeg = ffmpegRef.current!
|
||||||
|
|
||||||
|
await ffmpeg.writeFile("input.ogg", await fetchFile(oggFile))
|
||||||
|
await ffmpeg.exec(["-i", "input.ogg", "output.wav"])
|
||||||
|
const wavData = await ffmpeg.readFile("output.wav")
|
||||||
|
console.log(wavData);
|
||||||
|
const wavBlob = new Blob([wavData], { type: "audio/wav" })
|
||||||
|
audioRef.current = URL.createObjectURL(wavBlob)
|
||||||
|
return wavBlob
|
||||||
}
|
}
|
||||||
|
|
||||||
const stopRecording = () => {
|
const stopRecording = () => {
|
||||||
|
@ -242,8 +280,3 @@ const InputOutputBackend: React.FC = () => {
|
||||||
}
|
}
|
||||||
|
|
||||||
export default InputOutputBackend
|
export default InputOutputBackend
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -58,7 +58,7 @@ const ConversationFrontend = React.forwardRef<HTMLDivElement, ConversationProps>
|
||||||
<button type="button" onClick={onCopyClick}>
|
<button type="button" onClick={onCopyClick}>
|
||||||
<img src="/img/copy.svg" alt="copy" />
|
<img src="/img/copy.svg" alt="copy" />
|
||||||
</button>
|
</button>
|
||||||
<p style={{opacity:isClicked?"1":"0", transition:"all 0.3s ease-in-out"}}>Copied!</p>
|
<p id="copiedText" style={{opacity:isClicked?"1":"0", transition:"all 0.3s ease-in-out"}}>Copied!</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -13,10 +13,6 @@ const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
|
||||||
({ message, onSendClick, onMicClick, inputDisabled, isRecording}, ref: ForwardedRef<HTMLDivElement>) => {
|
({ message, onSendClick, onMicClick, inputDisabled, isRecording}, ref: ForwardedRef<HTMLDivElement>) => {
|
||||||
const [inputValue, setInputValue] = useState('');
|
const [inputValue, setInputValue] = useState('');
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
setInputValue(message);
|
|
||||||
}, [message]);
|
|
||||||
|
|
||||||
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||||
setInputValue(e.target.value);
|
setInputValue(e.target.value);
|
||||||
};
|
};
|
||||||
|
@ -31,10 +27,6 @@ const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const styles = {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="input" id="inputForm" ref={ref}>
|
<div className="input" id="inputForm" ref={ref}>
|
||||||
<input
|
<input
|
||||||
|
|
|
@ -69,3 +69,7 @@
|
||||||
.button-container img {
|
.button-container img {
|
||||||
height: 1.5em;
|
height: 1.5em;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#copiedText{
|
||||||
|
margin-top: 1em;
|
||||||
|
}
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"name": "interstellar_ai",
|
"name": "interstellar_ai",
|
||||||
"version": "0.1.0",
|
"version": "0.2.0",
|
||||||
"private": true,
|
"private": true,
|
||||||
"main": "main.js",
|
"main": "main.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
|
30
py/voice.py
30
py/voice.py
|
@ -1,26 +1,32 @@
|
||||||
import io
|
import io
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import whisper
|
from faster_whisper import WhisperModel
|
||||||
from pydub import AudioSegment
|
from pydub import AudioSegment
|
||||||
|
|
||||||
|
|
||||||
class VoiceRecognition:
|
class VoiceRecognition:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def recognition(audio):
|
def recognition(audio):
|
||||||
audio_data = audio.read()
|
audio_buffer = io.BytesIO(audio.read())
|
||||||
with io.BytesIO(audio_data) as audio_buffer:
|
|
||||||
audio_segment = AudioSegment.from_ogg(audio_buffer)
|
|
||||||
|
|
||||||
raw_data = np.array(audio_segment.get_array_of_samples())
|
try:
|
||||||
|
audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
|
||||||
|
|
||||||
if audio_segment.channels > 1:
|
wav_io = io.BytesIO()
|
||||||
raw_data = raw_data.reshape((-1, audio_segment.channels))[:, 0]
|
audio_segment.export(wav_io, format="wav")
|
||||||
|
wav_io.seek(0)
|
||||||
|
except:
|
||||||
|
print("audio to wav failed")
|
||||||
|
|
||||||
audio_data = raw_data.astype(np.float32) / (2 ** (audio_segment.sample_width * 8 - 1))
|
model_size = "base"
|
||||||
|
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
||||||
|
|
||||||
model = whisper.load_model("base")
|
segments, _ = model.transcribe(wav_io)
|
||||||
result = model.transcribe(audio_data)
|
transcription = ""
|
||||||
print(result["text"])
|
for segment in segments:
|
||||||
return result["text"]
|
transcription += segment.text + " "
|
||||||
|
result = transcription.strip()
|
||||||
|
print(result)
|
||||||
|
return result
|
||||||
|
|
||||||
# npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt
|
# npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt
|
Loading…
Reference in a new issue