forked from React-Group/interstellar_ai
Compare commits
5 commits
f08bfda7be
...
ec7dfd6f13
Author | SHA1 | Date | |
---|---|---|---|
ec7dfd6f13 | |||
|
26b69a1cb6 | ||
|
1fd916a3eb | ||
|
1e05319266 | ||
8dbb1c9169 |
4 changed files with 107 additions and 97 deletions
|
@ -2,7 +2,7 @@
|
||||||
import React, { use, useEffect, useRef, useState } from "react";
|
import React, { use, useEffect, useRef, useState } from "react";
|
||||||
import ConversationFrontend from "../components/ConversationFrontend";
|
import ConversationFrontend from "../components/ConversationFrontend";
|
||||||
import InputFrontend from "../components/InputFrontend";
|
import InputFrontend from "../components/InputFrontend";
|
||||||
import VoiceSend from "./voice_backend"
|
import { sendToVoiceRecognition } from "./voice_backend"
|
||||||
import { AudioRecorder } from "./AudioRecorder";
|
import { AudioRecorder } from "./AudioRecorder";
|
||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
import { resolve } from "path";
|
import { resolve } from "path";
|
||||||
|
@ -17,36 +17,48 @@ const InputOutputBackend: React.FC = () => {
|
||||||
content: string
|
content: string
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Variables for System-prompt */
|
const [preferredCurrency, setPreferredCurrency] = useState<string | null>(null);
|
||||||
const [preferredCurrency, setPreferredCurrency] = useState<string | null>("")
|
const [preferredLanguage, setPreferredLanguage] = useState<string | null>(null);
|
||||||
const [preferredLanguage, setPreferredLanguage] = useState<string | null>("")
|
const [timeFormat, setTimeFormat] = useState<string | null>(null);
|
||||||
const [timeFormat, setTimeFormat] = useState<string | null>("")
|
const [preferredMeasurement, setPreferredMeasurement] = useState<string | null>(null);
|
||||||
const [preferredMeasurement, setPreferredMeasurement] = useState<string | null>("")
|
const [timeZone, setTimeZone] = useState<string | null>(null);
|
||||||
const [timeZone, setTimeZone] = useState<string | null>("")
|
const [dateFormat, setDateFormat] = useState<string | null>(null);
|
||||||
const [dateFormat, setDateFormat] = useState<string | null>("")
|
const [messages, setMessages] = useState<Message[]>([]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
setPreferredCurrency(localStorage.getItem("preferredCurrency"))
|
setPreferredCurrency(localStorage.getItem("preferredCurrency"));
|
||||||
setPreferredLanguage(localStorage.getItem("preferredLanguage"))
|
setPreferredLanguage(localStorage.getItem("preferredLanguage"));
|
||||||
setTimeFormat(localStorage.getItem("timeFormat"))
|
setTimeFormat(localStorage.getItem("timeFormat"));
|
||||||
setPreferredMeasurement(localStorage.getItem("preferredMeasurement"))
|
setPreferredMeasurement(localStorage.getItem("preferredMeasurement"));
|
||||||
setTimeZone(localStorage.getItem("timeZone"))
|
setTimeZone(localStorage.getItem("timeZone"));
|
||||||
setDateFormat(localStorage.getItem("dateFormat"))
|
setDateFormat(localStorage.getItem("dateFormat"));
|
||||||
}, [preferredCurrency, preferredLanguage, timeFormat, preferredMeasurement, timeZone, dateFormat])
|
}, []);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (preferredCurrency && preferredLanguage && timeFormat && dateFormat && preferredMeasurement && timeZone) {
|
||||||
|
setMessages([
|
||||||
|
{
|
||||||
|
role: "system",
|
||||||
|
content: `You are in the timezone: ${timeZone}.
|
||||||
|
You use the time format ${timeFormat}.
|
||||||
|
You use the date format ${dateFormat} for all references of dates.
|
||||||
|
You use the ${preferredMeasurement} system.
|
||||||
|
You use the currency ${preferredCurrency}.
|
||||||
|
You will only answer in the language (you will receive the country code) ${preferredLanguage}.
|
||||||
|
But in the case the user specifically states to answer in another language, do that. Speaking in
|
||||||
|
another language is not stating you should answer in that language.
|
||||||
|
Additionally, under no circumstances translate your answer into multiple languages.`,
|
||||||
|
},
|
||||||
|
{ role: "assistant", content: "Hello! How can I help you?" },
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
}, [preferredCurrency, preferredLanguage, timeFormat, dateFormat, preferredMeasurement, timeZone]);
|
||||||
|
|
||||||
|
|
||||||
const [copyClicked, setCopyClicked] = useState(false)
|
const [copyClicked, setCopyClicked] = useState(false)
|
||||||
const [accessToken, setAccessToken] = useState("")
|
const [accessToken, setAccessToken] = useState("")
|
||||||
const postWorkerRef = useRef<Worker | null>(null)
|
const postWorkerRef = useRef<Worker | null>(null)
|
||||||
const getWorkerRef = useRef<Worker | null>(null)
|
const getWorkerRef = useRef<Worker | null>(null)
|
||||||
const [messages, setMessages] = useState<Message[]>([{ role: "system",
|
|
||||||
content: `You are in the timezone: ${timeZone}.
|
|
||||||
You use the time format ${timeFormat}.
|
|
||||||
You use the date format ${dateFormat} for all references of dates.
|
|
||||||
You use the ${preferredMeasurement} system. You use the currency ${preferredCurrency}.
|
|
||||||
You will only answer in the language (you will receive the country code) ${preferredLanguage}.
|
|
||||||
But in the case the user specifically states to answer in an other language do that speaking in a
|
|
||||||
nother language is not stating you should answer in that language. Additionally do not translate your answer into multiple languages`
|
|
||||||
},{ role: "assistant", content: "Hello! How can I help you?" }])
|
|
||||||
const [liveMessage, setLiveMessage] = useState("")
|
const [liveMessage, setLiveMessage] = useState("")
|
||||||
const [inputMessage, setInputMessage] = useState<string>("")
|
const [inputMessage, setInputMessage] = useState<string>("")
|
||||||
const [inputDisabled, setInputDisabled] = useState(false)
|
const [inputDisabled, setInputDisabled] = useState(false)
|
||||||
|
@ -169,40 +181,46 @@ const InputOutputBackend: React.FC = () => {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const startRecording = async () => {
|
const startRecording = async (): Promise<string> => {
|
||||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||||
const mediaRecorder = new MediaRecorder(stream)
|
const mediaRecorder = new MediaRecorder(stream);
|
||||||
mediaRecorderRef.current = mediaRecorder
|
mediaRecorderRef.current = mediaRecorder;
|
||||||
|
|
||||||
mediaRecorder.ondataavailable = (event) => {
|
audioChunks.current = []; // Initialize audio chunks
|
||||||
audioChunks.current.push(event.data)
|
|
||||||
}
|
|
||||||
|
|
||||||
mediaRecorder.onstop = async () => {
|
// Create a promise that resolves when the onstop event is done
|
||||||
const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" })
|
const stopRecordingPromise = new Promise<string>((resolve) => {
|
||||||
audioChunks.current = []
|
mediaRecorder.ondataavailable = (event) => {
|
||||||
// console.log(audioBlob);
|
audioChunks.current.push(event.data);
|
||||||
// const url = URL.createObjectURL(audioBlob)
|
};
|
||||||
// const audio = new Audio(url);
|
|
||||||
// audio.play().catch(error => console.error("Error playing audio:", error));
|
|
||||||
|
|
||||||
const remote = new VoiceSend()
|
mediaRecorder.onstop = async () => {
|
||||||
remote.sendToVoiceRecognition(audioBlob)
|
const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" });
|
||||||
}
|
audioChunks.current = [];
|
||||||
|
|
||||||
mediaRecorder.start()
|
const text_voice = await sendToVoiceRecognition(audioBlob);
|
||||||
setIsRecording(true)
|
console.log(text_voice);
|
||||||
}
|
resolve(text_voice); // Resolve the promise with the recognized text
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
mediaRecorder.start();
|
||||||
|
setIsRecording(true);
|
||||||
|
|
||||||
|
// Wait for the recording to stop and get the recognized text
|
||||||
|
return stopRecordingPromise;
|
||||||
|
};
|
||||||
|
|
||||||
const stopRecording = () => {
|
const stopRecording = () => {
|
||||||
mediaRecorderRef.current?.stop()
|
mediaRecorderRef.current?.stop();
|
||||||
setIsRecording(false)
|
setIsRecording(false);
|
||||||
}
|
};
|
||||||
|
|
||||||
|
const handleMicClick = async () => {
|
||||||
const handleMicClick = () => {
|
|
||||||
if (!isRecording) {
|
if (!isRecording) {
|
||||||
startRecording();
|
const recognizedText = await startRecording();
|
||||||
|
setInputMessage(recognizedText); // Set the recognized text after recording
|
||||||
|
console.log("Set!")
|
||||||
} else {
|
} else {
|
||||||
stopRecording();
|
stopRecording();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,26 +1,20 @@
|
||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
|
|
||||||
|
export const sendToVoiceRecognition = (audio_data: Blob): Promise<string> => {
|
||||||
|
console.log("sending recording...");
|
||||||
|
|
||||||
class VoiceSend {
|
const formdata = new FormData()
|
||||||
sendToVoiceRecognition(audio_data: Blob) {
|
formdata.append("audio", audio_data)
|
||||||
console.log("sending recording...");
|
|
||||||
|
|
||||||
const formdata = new FormData()
|
|
||||||
formdata.append("audio", audio_data)
|
|
||||||
|
|
||||||
const dataSend = { option:"offline", type:"basic",audio:audio_data }
|
|
||||||
axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", formdata)
|
|
||||||
.then((response) => {
|
|
||||||
console.log(response.data)
|
|
||||||
return response.data.response
|
|
||||||
})
|
|
||||||
.catch(error => {
|
|
||||||
console.log("Error calling API:", error)
|
|
||||||
postMessage({ status: 500 })
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
|
const dataSend = { option: "offline", type: "basic", audio: audio_data }
|
||||||
|
return axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", formdata)
|
||||||
|
.then((response) => {
|
||||||
|
console.log(response.data)
|
||||||
|
return response.data.response
|
||||||
|
})
|
||||||
|
.catch(error => {
|
||||||
|
console.log("Error calling API:", error)
|
||||||
|
postMessage({ status: 500 })
|
||||||
|
return "Error"
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
export default VoiceSend;
|
|
|
@ -6,13 +6,17 @@ interface InputProps {
|
||||||
onSendClick: (message: string, override: boolean) => void;
|
onSendClick: (message: string, override: boolean) => void;
|
||||||
onMicClick: () => void;
|
onMicClick: () => void;
|
||||||
inputDisabled: boolean;
|
inputDisabled: boolean;
|
||||||
isRecording:boolean
|
isRecording: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
|
const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
|
||||||
({ message, onSendClick, onMicClick, inputDisabled, isRecording}, ref: ForwardedRef<HTMLDivElement>) => {
|
({ message, onSendClick, onMicClick, inputDisabled, isRecording }, ref: ForwardedRef<HTMLDivElement>) => {
|
||||||
const [inputValue, setInputValue] = useState('');
|
const [inputValue, setInputValue] = useState('');
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
setInputValue(message);
|
||||||
|
}, [message]);
|
||||||
|
|
||||||
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||||
setInputValue(e.target.value);
|
setInputValue(e.target.value);
|
||||||
};
|
};
|
||||||
|
@ -40,7 +44,7 @@ const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
|
||||||
<button type="button" onClick={() => onSendClick(inputValue, false)} disabled={inputDisabled ? true : false}>
|
<button type="button" onClick={() => onSendClick(inputValue, false)} disabled={inputDisabled ? true : false}>
|
||||||
<img src="/img/send.svg" alt="send" />
|
<img src="/img/send.svg" alt="send" />
|
||||||
</button>
|
</button>
|
||||||
<button className={`microphone-button ${isRecording ? "red": "var(--input-button-color)"}`} type="button" onClick={onMicClick}>
|
<button className={`microphone-button ${isRecording ? "red" : "var(--input-button-color)"}`} type="button" onClick={onMicClick}>
|
||||||
<img src="/img/microphone.svg" alt="microphone" />
|
<img src="/img/microphone.svg" alt="microphone" />
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
16
py/voice.py
16
py/voice.py
|
@ -8,24 +8,18 @@ class VoiceRecognition:
|
||||||
def recognition(audio):
|
def recognition(audio):
|
||||||
audio_buffer = io.BytesIO(audio.read())
|
audio_buffer = io.BytesIO(audio.read())
|
||||||
|
|
||||||
try:
|
audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
|
||||||
audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
|
|
||||||
|
|
||||||
wav_io = io.BytesIO()
|
wav_io = io.BytesIO()
|
||||||
audio_segment.export(wav_io, format="wav")
|
audio_segment.export(wav_io, format="wav")
|
||||||
wav_io.seek(0)
|
wav_io.seek(0)
|
||||||
except:
|
|
||||||
print("audio to wav failed")
|
|
||||||
|
|
||||||
model_size = "base"
|
model_size = "base"
|
||||||
model = WhisperModel(model_size, device="cpu", compute_type=" ")
|
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
||||||
|
|
||||||
segments, _ = model.transcribe(wav_io)
|
segments, _ = model.transcribe(wav_io)
|
||||||
transcription = ""
|
transcription = ""
|
||||||
for segment in segments:
|
for segment in segments:
|
||||||
transcription += segment.text + " "
|
transcription += segment.text + " "
|
||||||
result = transcription.strip()
|
result = transcription.strip()
|
||||||
print(result)
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt
|
|
Loading…
Reference in a new issue