main #33

Merged
YasinOnm08 merged 4 commits from React-Group/interstellar_ai:main into main 2024-09-30 13:46:40 +02:00
4 changed files with 107 additions and 97 deletions

View file

@ -2,7 +2,7 @@
import React, { use, useEffect, useRef, useState } from "react"; import React, { use, useEffect, useRef, useState } from "react";
import ConversationFrontend from "../components/ConversationFrontend"; import ConversationFrontend from "../components/ConversationFrontend";
import InputFrontend from "../components/InputFrontend"; import InputFrontend from "../components/InputFrontend";
import VoiceSend from "./voice_backend" import { sendToVoiceRecognition } from "./voice_backend"
import { AudioRecorder } from "./AudioRecorder"; import { AudioRecorder } from "./AudioRecorder";
import axios from "axios"; import axios from "axios";
import { resolve } from "path"; import { resolve } from "path";
@ -17,41 +17,53 @@ const InputOutputBackend: React.FC = () => {
content: string content: string
} }
/* Variables for System-prompt */ const [preferredCurrency, setPreferredCurrency] = useState<string | null>(null);
const [preferredCurrency, setPreferredCurrency] = useState<string | null>("") const [preferredLanguage, setPreferredLanguage] = useState<string | null>(null);
const [preferredLanguage, setPreferredLanguage] = useState<string | null>("") const [timeFormat, setTimeFormat] = useState<string | null>(null);
const [timeFormat, setTimeFormat] = useState<string | null>("") const [preferredMeasurement, setPreferredMeasurement] = useState<string | null>(null);
const [preferredMeasurement, setPreferredMeasurement] = useState<string | null>("") const [timeZone, setTimeZone] = useState<string | null>(null);
const [timeZone, setTimeZone] = useState<string | null>("") const [dateFormat, setDateFormat] = useState<string | null>(null);
const [dateFormat, setDateFormat] = useState<string | null>("") const [messages, setMessages] = useState<Message[]>([]);
useEffect(() => { useEffect(() => {
setPreferredCurrency(localStorage.getItem("preferredCurrency")) setPreferredCurrency(localStorage.getItem("preferredCurrency"));
setPreferredLanguage(localStorage.getItem("preferredLanguage")) setPreferredLanguage(localStorage.getItem("preferredLanguage"));
setTimeFormat(localStorage.getItem("timeFormat")) setTimeFormat(localStorage.getItem("timeFormat"));
setPreferredMeasurement(localStorage.getItem("preferredMeasurement")) setPreferredMeasurement(localStorage.getItem("preferredMeasurement"));
setTimeZone(localStorage.getItem("timeZone")) setTimeZone(localStorage.getItem("timeZone"));
setDateFormat(localStorage.getItem("dateFormat")) setDateFormat(localStorage.getItem("dateFormat"));
}, [preferredCurrency, preferredLanguage, timeFormat, preferredMeasurement, timeZone, dateFormat]) }, []);
useEffect(() => {
if (preferredCurrency && preferredLanguage && timeFormat && dateFormat && preferredMeasurement && timeZone) {
setMessages([
{
role: "system",
content: `You are in the timezone: ${timeZone}.
You use the time format ${timeFormat}.
You use the date format ${dateFormat} for all references of dates.
You use the ${preferredMeasurement} system.
You use the currency ${preferredCurrency}.
You will only answer in the language (you will receive the country code) ${preferredLanguage}.
But in the case the user specifically states to answer in another language, do that. Speaking in
another language is not stating you should answer in that language.
Additionally, under no circumstances translate your answer into multiple languages.`,
},
{ role: "assistant", content: "Hello! How can I help you?" },
]);
}
}, [preferredCurrency, preferredLanguage, timeFormat, dateFormat, preferredMeasurement, timeZone]);
const [copyClicked, setCopyClicked] = useState(false) const [copyClicked, setCopyClicked] = useState(false)
const [accessToken, setAccessToken] = useState("") const [accessToken, setAccessToken] = useState("")
const postWorkerRef = useRef<Worker | null>(null) const postWorkerRef = useRef<Worker | null>(null)
const getWorkerRef = useRef<Worker | null>(null) const getWorkerRef = useRef<Worker | null>(null)
const [messages, setMessages] = useState<Message[]>([{ role: "system",
content: `You are in the timezone: ${timeZone}.
You use the time format ${timeFormat}.
You use the date format ${dateFormat} for all references of dates.
You use the ${preferredMeasurement} system. You use the currency ${preferredCurrency}.
You will only answer in the language (you will receive the country code) ${preferredLanguage}.
But in the case the user specifically states to answer in an other language do that speaking in a
nother language is not stating you should answer in that language. Additionally do not translate your answer into multiple languages`
},{ role: "assistant", content: "Hello! How can I help you?" }])
const [liveMessage, setLiveMessage] = useState("") const [liveMessage, setLiveMessage] = useState("")
const [inputMessage, setInputMessage] = useState<string>("") const [inputMessage, setInputMessage] = useState<string>("")
const [inputDisabled, setInputDisabled] = useState(false) const [inputDisabled, setInputDisabled] = useState(false)
const [isRecording, setIsRecording] = useState(false) const [isRecording, setIsRecording] = useState(false)
const mediaRecorderRef = useRef<MediaRecorder | null>(null) const mediaRecorderRef = useRef<MediaRecorder | null>(null)
const audioChunks = useRef<Blob[]>([]) const audioChunks = useRef<Blob[]>([])
@ -169,40 +181,46 @@ const InputOutputBackend: React.FC = () => {
} }
} }
const startRecording = async () => { const startRecording = async (): Promise<string> => {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }) const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const mediaRecorder = new MediaRecorder(stream) const mediaRecorder = new MediaRecorder(stream);
mediaRecorderRef.current = mediaRecorder mediaRecorderRef.current = mediaRecorder;
mediaRecorder.ondataavailable = (event) => {
audioChunks.current.push(event.data)
}
mediaRecorder.onstop = async () => {
const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" })
audioChunks.current = []
// console.log(audioBlob);
// const url = URL.createObjectURL(audioBlob)
// const audio = new Audio(url);
// audio.play().catch(error => console.error("Error playing audio:", error));
const remote = new VoiceSend() audioChunks.current = []; // Initialize audio chunks
remote.sendToVoiceRecognition(audioBlob)
} // Create a promise that resolves when the onstop event is done
const stopRecordingPromise = new Promise<string>((resolve) => {
mediaRecorder.ondataavailable = (event) => {
audioChunks.current.push(event.data);
};
mediaRecorder.onstop = async () => {
const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" });
audioChunks.current = [];
const text_voice = await sendToVoiceRecognition(audioBlob);
console.log(text_voice);
resolve(text_voice); // Resolve the promise with the recognized text
};
});
mediaRecorder.start();
setIsRecording(true);
// Wait for the recording to stop and get the recognized text
return stopRecordingPromise;
};
mediaRecorder.start()
setIsRecording(true)
}
const stopRecording = () => { const stopRecording = () => {
mediaRecorderRef.current?.stop() mediaRecorderRef.current?.stop();
setIsRecording(false) setIsRecording(false);
} };
const handleMicClick = async () => {
const handleMicClick = () => {
if (!isRecording) { if (!isRecording) {
startRecording(); const recognizedText = await startRecording();
setInputMessage(recognizedText); // Set the recognized text after recording
console.log("Set!")
} else { } else {
stopRecording(); stopRecording();
} }
@ -246,7 +264,7 @@ const InputOutputBackend: React.FC = () => {
await wait(1000) await wait(1000)
setCopyClicked(false) setCopyClicked(false)
} }
return ( return (
<> <>
@ -263,7 +281,7 @@ const InputOutputBackend: React.FC = () => {
onMicClick={handleMicClick} onMicClick={handleMicClick}
inputDisabled={inputDisabled} inputDisabled={inputDisabled}
isRecording={isRecording} isRecording={isRecording}
/> />
</> </>
) )
} }

View file

@ -1,26 +1,20 @@
import axios from "axios"; import axios from "axios";
export const sendToVoiceRecognition = (audio_data: Blob): Promise<string> => {
console.log("sending recording...");
class VoiceSend { const formdata = new FormData()
sendToVoiceRecognition(audio_data: Blob) { formdata.append("audio", audio_data)
console.log("sending recording...");
const formdata = new FormData() const dataSend = { option: "offline", type: "basic", audio: audio_data }
formdata.append("audio", audio_data) return axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", formdata)
.then((response) => {
const dataSend = { option:"offline", type:"basic",audio:audio_data } console.log(response.data)
axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", formdata) return response.data.response
.then((response) => { })
console.log(response.data) .catch(error => {
return response.data.response console.log("Error calling API:", error)
}) postMessage({ status: 500 })
.catch(error => { return "Error"
console.log("Error calling API:", error) })
postMessage({ status: 500 }) }
})
}
}
export default VoiceSend;

View file

@ -6,13 +6,17 @@ interface InputProps {
onSendClick: (message: string, override: boolean) => void; onSendClick: (message: string, override: boolean) => void;
onMicClick: () => void; onMicClick: () => void;
inputDisabled: boolean; inputDisabled: boolean;
isRecording:boolean isRecording: boolean
} }
const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>( const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
({ message, onSendClick, onMicClick, inputDisabled, isRecording}, ref: ForwardedRef<HTMLDivElement>) => { ({ message, onSendClick, onMicClick, inputDisabled, isRecording }, ref: ForwardedRef<HTMLDivElement>) => {
const [inputValue, setInputValue] = useState(''); const [inputValue, setInputValue] = useState('');
useEffect(() => {
setInputValue(message);
}, [message]);
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => { const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
setInputValue(e.target.value); setInputValue(e.target.value);
}; };
@ -40,7 +44,7 @@ const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
<button type="button" onClick={() => onSendClick(inputValue, false)} disabled={inputDisabled ? true : false}> <button type="button" onClick={() => onSendClick(inputValue, false)} disabled={inputDisabled ? true : false}>
<img src="/img/send.svg" alt="send" /> <img src="/img/send.svg" alt="send" />
</button> </button>
<button className={`microphone-button ${isRecording ? "red": "var(--input-button-color)"}`} type="button" onClick={onMicClick}> <button className={`microphone-button ${isRecording ? "red" : "var(--input-button-color)"}`} type="button" onClick={onMicClick}>
<img src="/img/microphone.svg" alt="microphone" /> <img src="/img/microphone.svg" alt="microphone" />
</button> </button>
</div> </div>

View file

@ -7,25 +7,19 @@ class VoiceRecognition:
@staticmethod @staticmethod
def recognition(audio): def recognition(audio):
audio_buffer = io.BytesIO(audio.read()) audio_buffer = io.BytesIO(audio.read())
try: audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
wav_io = io.BytesIO()
wav_io = io.BytesIO() audio_segment.export(wav_io, format="wav")
audio_segment.export(wav_io, format="wav") wav_io.seek(0)
wav_io.seek(0)
except:
print("audio to wav failed")
model_size = "base" model_size = "base"
model = WhisperModel(model_size, device="cpu", compute_type=" ") model = WhisperModel(model_size, device="cpu", compute_type="int8")
segments, _ = model.transcribe(wav_io) segments, _ = model.transcribe(wav_io)
transcription = "" transcription = ""
for segment in segments: for segment in segments:
transcription += segment.text + " " transcription += segment.text + " "
result = transcription.strip() result = transcription.strip()
print(result)
return result return result
# npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt