forked from React-Group/interstellar_ai
Compare commits
5 commits
f08bfda7be
...
ec7dfd6f13
Author | SHA1 | Date | |
---|---|---|---|
ec7dfd6f13 | |||
|
26b69a1cb6 | ||
|
1fd916a3eb | ||
|
1e05319266 | ||
8dbb1c9169 |
4 changed files with 107 additions and 97 deletions
|
@ -2,7 +2,7 @@
|
|||
import React, { use, useEffect, useRef, useState } from "react";
|
||||
import ConversationFrontend from "../components/ConversationFrontend";
|
||||
import InputFrontend from "../components/InputFrontend";
|
||||
import VoiceSend from "./voice_backend"
|
||||
import { sendToVoiceRecognition } from "./voice_backend"
|
||||
import { AudioRecorder } from "./AudioRecorder";
|
||||
import axios from "axios";
|
||||
import { resolve } from "path";
|
||||
|
@ -17,36 +17,48 @@ const InputOutputBackend: React.FC = () => {
|
|||
content: string
|
||||
}
|
||||
|
||||
/* Variables for System-prompt */
|
||||
const [preferredCurrency, setPreferredCurrency] = useState<string | null>("")
|
||||
const [preferredLanguage, setPreferredLanguage] = useState<string | null>("")
|
||||
const [timeFormat, setTimeFormat] = useState<string | null>("")
|
||||
const [preferredMeasurement, setPreferredMeasurement] = useState<string | null>("")
|
||||
const [timeZone, setTimeZone] = useState<string | null>("")
|
||||
const [dateFormat, setDateFormat] = useState<string | null>("")
|
||||
const [preferredCurrency, setPreferredCurrency] = useState<string | null>(null);
|
||||
const [preferredLanguage, setPreferredLanguage] = useState<string | null>(null);
|
||||
const [timeFormat, setTimeFormat] = useState<string | null>(null);
|
||||
const [preferredMeasurement, setPreferredMeasurement] = useState<string | null>(null);
|
||||
const [timeZone, setTimeZone] = useState<string | null>(null);
|
||||
const [dateFormat, setDateFormat] = useState<string | null>(null);
|
||||
const [messages, setMessages] = useState<Message[]>([]);
|
||||
|
||||
useEffect(() => {
|
||||
setPreferredCurrency(localStorage.getItem("preferredCurrency"))
|
||||
setPreferredLanguage(localStorage.getItem("preferredLanguage"))
|
||||
setTimeFormat(localStorage.getItem("timeFormat"))
|
||||
setPreferredMeasurement(localStorage.getItem("preferredMeasurement"))
|
||||
setTimeZone(localStorage.getItem("timeZone"))
|
||||
setDateFormat(localStorage.getItem("dateFormat"))
|
||||
}, [preferredCurrency, preferredLanguage, timeFormat, preferredMeasurement, timeZone, dateFormat])
|
||||
setPreferredCurrency(localStorage.getItem("preferredCurrency"));
|
||||
setPreferredLanguage(localStorage.getItem("preferredLanguage"));
|
||||
setTimeFormat(localStorage.getItem("timeFormat"));
|
||||
setPreferredMeasurement(localStorage.getItem("preferredMeasurement"));
|
||||
setTimeZone(localStorage.getItem("timeZone"));
|
||||
setDateFormat(localStorage.getItem("dateFormat"));
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (preferredCurrency && preferredLanguage && timeFormat && dateFormat && preferredMeasurement && timeZone) {
|
||||
setMessages([
|
||||
{
|
||||
role: "system",
|
||||
content: `You are in the timezone: ${timeZone}.
|
||||
You use the time format ${timeFormat}.
|
||||
You use the date format ${dateFormat} for all references of dates.
|
||||
You use the ${preferredMeasurement} system.
|
||||
You use the currency ${preferredCurrency}.
|
||||
You will only answer in the language (you will receive the country code) ${preferredLanguage}.
|
||||
But in the case the user specifically states to answer in another language, do that. Speaking in
|
||||
another language is not stating you should answer in that language.
|
||||
Additionally, under no circumstances translate your answer into multiple languages.`,
|
||||
},
|
||||
{ role: "assistant", content: "Hello! How can I help you?" },
|
||||
]);
|
||||
}
|
||||
}, [preferredCurrency, preferredLanguage, timeFormat, dateFormat, preferredMeasurement, timeZone]);
|
||||
|
||||
|
||||
const [copyClicked, setCopyClicked] = useState(false)
|
||||
const [accessToken, setAccessToken] = useState("")
|
||||
const postWorkerRef = useRef<Worker | null>(null)
|
||||
const getWorkerRef = useRef<Worker | null>(null)
|
||||
const [messages, setMessages] = useState<Message[]>([{ role: "system",
|
||||
content: `You are in the timezone: ${timeZone}.
|
||||
You use the time format ${timeFormat}.
|
||||
You use the date format ${dateFormat} for all references of dates.
|
||||
You use the ${preferredMeasurement} system. You use the currency ${preferredCurrency}.
|
||||
You will only answer in the language (you will receive the country code) ${preferredLanguage}.
|
||||
But in the case the user specifically states to answer in an other language do that speaking in a
|
||||
nother language is not stating you should answer in that language. Additionally do not translate your answer into multiple languages`
|
||||
},{ role: "assistant", content: "Hello! How can I help you?" }])
|
||||
const [liveMessage, setLiveMessage] = useState("")
|
||||
const [inputMessage, setInputMessage] = useState<string>("")
|
||||
const [inputDisabled, setInputDisabled] = useState(false)
|
||||
|
@ -169,40 +181,46 @@ const InputOutputBackend: React.FC = () => {
|
|||
}
|
||||
}
|
||||
|
||||
const startRecording = async () => {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
|
||||
const mediaRecorder = new MediaRecorder(stream)
|
||||
mediaRecorderRef.current = mediaRecorder
|
||||
const startRecording = async (): Promise<string> => {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
const mediaRecorder = new MediaRecorder(stream);
|
||||
mediaRecorderRef.current = mediaRecorder;
|
||||
|
||||
mediaRecorder.ondataavailable = (event) => {
|
||||
audioChunks.current.push(event.data)
|
||||
}
|
||||
audioChunks.current = []; // Initialize audio chunks
|
||||
|
||||
mediaRecorder.onstop = async () => {
|
||||
const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" })
|
||||
audioChunks.current = []
|
||||
// console.log(audioBlob);
|
||||
// const url = URL.createObjectURL(audioBlob)
|
||||
// const audio = new Audio(url);
|
||||
// audio.play().catch(error => console.error("Error playing audio:", error));
|
||||
// Create a promise that resolves when the onstop event is done
|
||||
const stopRecordingPromise = new Promise<string>((resolve) => {
|
||||
mediaRecorder.ondataavailable = (event) => {
|
||||
audioChunks.current.push(event.data);
|
||||
};
|
||||
|
||||
const remote = new VoiceSend()
|
||||
remote.sendToVoiceRecognition(audioBlob)
|
||||
}
|
||||
mediaRecorder.onstop = async () => {
|
||||
const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" });
|
||||
audioChunks.current = [];
|
||||
|
||||
mediaRecorder.start()
|
||||
setIsRecording(true)
|
||||
}
|
||||
const text_voice = await sendToVoiceRecognition(audioBlob);
|
||||
console.log(text_voice);
|
||||
resolve(text_voice); // Resolve the promise with the recognized text
|
||||
};
|
||||
});
|
||||
|
||||
mediaRecorder.start();
|
||||
setIsRecording(true);
|
||||
|
||||
// Wait for the recording to stop and get the recognized text
|
||||
return stopRecordingPromise;
|
||||
};
|
||||
|
||||
const stopRecording = () => {
|
||||
mediaRecorderRef.current?.stop()
|
||||
setIsRecording(false)
|
||||
}
|
||||
mediaRecorderRef.current?.stop();
|
||||
setIsRecording(false);
|
||||
};
|
||||
|
||||
|
||||
const handleMicClick = () => {
|
||||
const handleMicClick = async () => {
|
||||
if (!isRecording) {
|
||||
startRecording();
|
||||
const recognizedText = await startRecording();
|
||||
setInputMessage(recognizedText); // Set the recognized text after recording
|
||||
console.log("Set!")
|
||||
} else {
|
||||
stopRecording();
|
||||
}
|
||||
|
|
|
@ -1,26 +1,20 @@
|
|||
import axios from "axios";
|
||||
|
||||
export const sendToVoiceRecognition = (audio_data: Blob): Promise<string> => {
|
||||
console.log("sending recording...");
|
||||
|
||||
class VoiceSend {
|
||||
sendToVoiceRecognition(audio_data: Blob) {
|
||||
console.log("sending recording...");
|
||||
|
||||
const formdata = new FormData()
|
||||
formdata.append("audio", audio_data)
|
||||
|
||||
const dataSend = { option:"offline", type:"basic",audio:audio_data }
|
||||
axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", formdata)
|
||||
.then((response) => {
|
||||
console.log(response.data)
|
||||
return response.data.response
|
||||
})
|
||||
.catch(error => {
|
||||
console.log("Error calling API:", error)
|
||||
postMessage({ status: 500 })
|
||||
})
|
||||
}
|
||||
const formdata = new FormData()
|
||||
formdata.append("audio", audio_data)
|
||||
|
||||
const dataSend = { option: "offline", type: "basic", audio: audio_data }
|
||||
return axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", formdata)
|
||||
.then((response) => {
|
||||
console.log(response.data)
|
||||
return response.data.response
|
||||
})
|
||||
.catch(error => {
|
||||
console.log("Error calling API:", error)
|
||||
postMessage({ status: 500 })
|
||||
return "Error"
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
export default VoiceSend;
|
|
@ -6,13 +6,17 @@ interface InputProps {
|
|||
onSendClick: (message: string, override: boolean) => void;
|
||||
onMicClick: () => void;
|
||||
inputDisabled: boolean;
|
||||
isRecording:boolean
|
||||
isRecording: boolean
|
||||
}
|
||||
|
||||
const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
|
||||
({ message, onSendClick, onMicClick, inputDisabled, isRecording}, ref: ForwardedRef<HTMLDivElement>) => {
|
||||
({ message, onSendClick, onMicClick, inputDisabled, isRecording }, ref: ForwardedRef<HTMLDivElement>) => {
|
||||
const [inputValue, setInputValue] = useState('');
|
||||
|
||||
useEffect(() => {
|
||||
setInputValue(message);
|
||||
}, [message]);
|
||||
|
||||
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
setInputValue(e.target.value);
|
||||
};
|
||||
|
@ -40,7 +44,7 @@ const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
|
|||
<button type="button" onClick={() => onSendClick(inputValue, false)} disabled={inputDisabled ? true : false}>
|
||||
<img src="/img/send.svg" alt="send" />
|
||||
</button>
|
||||
<button className={`microphone-button ${isRecording ? "red": "var(--input-button-color)"}`} type="button" onClick={onMicClick}>
|
||||
<button className={`microphone-button ${isRecording ? "red" : "var(--input-button-color)"}`} type="button" onClick={onMicClick}>
|
||||
<img src="/img/microphone.svg" alt="microphone" />
|
||||
</button>
|
||||
</div>
|
||||
|
|
16
py/voice.py
16
py/voice.py
|
@ -8,24 +8,18 @@ class VoiceRecognition:
|
|||
def recognition(audio):
|
||||
audio_buffer = io.BytesIO(audio.read())
|
||||
|
||||
try:
|
||||
audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
|
||||
audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
|
||||
|
||||
wav_io = io.BytesIO()
|
||||
audio_segment.export(wav_io, format="wav")
|
||||
wav_io.seek(0)
|
||||
except:
|
||||
print("audio to wav failed")
|
||||
wav_io = io.BytesIO()
|
||||
audio_segment.export(wav_io, format="wav")
|
||||
wav_io.seek(0)
|
||||
|
||||
model_size = "base"
|
||||
model = WhisperModel(model_size, device="cpu", compute_type=" ")
|
||||
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
||||
|
||||
segments, _ = model.transcribe(wav_io)
|
||||
transcription = ""
|
||||
for segment in segments:
|
||||
transcription += segment.text + " "
|
||||
result = transcription.strip()
|
||||
print(result)
|
||||
return result
|
||||
|
||||
# npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt
|
Loading…
Reference in a new issue