Merge pull request 'main' (#33) from React-Group/interstellar_ai:main into main
Reviewed-on: https://interstellardevelopment.org/code/code/YasinOnm08/interstellar_ai/pulls/33
This commit is contained in:
commit
ec7dfd6f13
4 changed files with 107 additions and 97 deletions
|
@ -2,7 +2,7 @@
|
||||||
import React, { use, useEffect, useRef, useState } from "react";
|
import React, { use, useEffect, useRef, useState } from "react";
|
||||||
import ConversationFrontend from "../components/ConversationFrontend";
|
import ConversationFrontend from "../components/ConversationFrontend";
|
||||||
import InputFrontend from "../components/InputFrontend";
|
import InputFrontend from "../components/InputFrontend";
|
||||||
import VoiceSend from "./voice_backend"
|
import { sendToVoiceRecognition } from "./voice_backend"
|
||||||
import { AudioRecorder } from "./AudioRecorder";
|
import { AudioRecorder } from "./AudioRecorder";
|
||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
import { resolve } from "path";
|
import { resolve } from "path";
|
||||||
|
@ -17,41 +17,53 @@ const InputOutputBackend: React.FC = () => {
|
||||||
content: string
|
content: string
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Variables for System-prompt */
|
const [preferredCurrency, setPreferredCurrency] = useState<string | null>(null);
|
||||||
const [preferredCurrency, setPreferredCurrency] = useState<string | null>("")
|
const [preferredLanguage, setPreferredLanguage] = useState<string | null>(null);
|
||||||
const [preferredLanguage, setPreferredLanguage] = useState<string | null>("")
|
const [timeFormat, setTimeFormat] = useState<string | null>(null);
|
||||||
const [timeFormat, setTimeFormat] = useState<string | null>("")
|
const [preferredMeasurement, setPreferredMeasurement] = useState<string | null>(null);
|
||||||
const [preferredMeasurement, setPreferredMeasurement] = useState<string | null>("")
|
const [timeZone, setTimeZone] = useState<string | null>(null);
|
||||||
const [timeZone, setTimeZone] = useState<string | null>("")
|
const [dateFormat, setDateFormat] = useState<string | null>(null);
|
||||||
const [dateFormat, setDateFormat] = useState<string | null>("")
|
const [messages, setMessages] = useState<Message[]>([]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
setPreferredCurrency(localStorage.getItem("preferredCurrency"))
|
setPreferredCurrency(localStorage.getItem("preferredCurrency"));
|
||||||
setPreferredLanguage(localStorage.getItem("preferredLanguage"))
|
setPreferredLanguage(localStorage.getItem("preferredLanguage"));
|
||||||
setTimeFormat(localStorage.getItem("timeFormat"))
|
setTimeFormat(localStorage.getItem("timeFormat"));
|
||||||
setPreferredMeasurement(localStorage.getItem("preferredMeasurement"))
|
setPreferredMeasurement(localStorage.getItem("preferredMeasurement"));
|
||||||
setTimeZone(localStorage.getItem("timeZone"))
|
setTimeZone(localStorage.getItem("timeZone"));
|
||||||
setDateFormat(localStorage.getItem("dateFormat"))
|
setDateFormat(localStorage.getItem("dateFormat"));
|
||||||
}, [preferredCurrency, preferredLanguage, timeFormat, preferredMeasurement, timeZone, dateFormat])
|
}, []);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (preferredCurrency && preferredLanguage && timeFormat && dateFormat && preferredMeasurement && timeZone) {
|
||||||
|
setMessages([
|
||||||
|
{
|
||||||
|
role: "system",
|
||||||
|
content: `You are in the timezone: ${timeZone}.
|
||||||
|
You use the time format ${timeFormat}.
|
||||||
|
You use the date format ${dateFormat} for all references of dates.
|
||||||
|
You use the ${preferredMeasurement} system.
|
||||||
|
You use the currency ${preferredCurrency}.
|
||||||
|
You will only answer in the language (you will receive the country code) ${preferredLanguage}.
|
||||||
|
But in the case the user specifically states to answer in another language, do that. Speaking in
|
||||||
|
another language is not stating you should answer in that language.
|
||||||
|
Additionally, under no circumstances translate your answer into multiple languages.`,
|
||||||
|
},
|
||||||
|
{ role: "assistant", content: "Hello! How can I help you?" },
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
}, [preferredCurrency, preferredLanguage, timeFormat, dateFormat, preferredMeasurement, timeZone]);
|
||||||
|
|
||||||
|
|
||||||
const [copyClicked, setCopyClicked] = useState(false)
|
const [copyClicked, setCopyClicked] = useState(false)
|
||||||
const [accessToken, setAccessToken] = useState("")
|
const [accessToken, setAccessToken] = useState("")
|
||||||
const postWorkerRef = useRef<Worker | null>(null)
|
const postWorkerRef = useRef<Worker | null>(null)
|
||||||
const getWorkerRef = useRef<Worker | null>(null)
|
const getWorkerRef = useRef<Worker | null>(null)
|
||||||
const [messages, setMessages] = useState<Message[]>([{ role: "system",
|
|
||||||
content: `You are in the timezone: ${timeZone}.
|
|
||||||
You use the time format ${timeFormat}.
|
|
||||||
You use the date format ${dateFormat} for all references of dates.
|
|
||||||
You use the ${preferredMeasurement} system. You use the currency ${preferredCurrency}.
|
|
||||||
You will only answer in the language (you will receive the country code) ${preferredLanguage}.
|
|
||||||
But in the case the user specifically states to answer in an other language do that speaking in a
|
|
||||||
nother language is not stating you should answer in that language. Additionally do not translate your answer into multiple languages`
|
|
||||||
},{ role: "assistant", content: "Hello! How can I help you?" }])
|
|
||||||
const [liveMessage, setLiveMessage] = useState("")
|
const [liveMessage, setLiveMessage] = useState("")
|
||||||
const [inputMessage, setInputMessage] = useState<string>("")
|
const [inputMessage, setInputMessage] = useState<string>("")
|
||||||
const [inputDisabled, setInputDisabled] = useState(false)
|
const [inputDisabled, setInputDisabled] = useState(false)
|
||||||
const [isRecording, setIsRecording] = useState(false)
|
const [isRecording, setIsRecording] = useState(false)
|
||||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null)
|
const mediaRecorderRef = useRef<MediaRecorder | null>(null)
|
||||||
const audioChunks = useRef<Blob[]>([])
|
const audioChunks = useRef<Blob[]>([])
|
||||||
|
|
||||||
|
|
||||||
|
@ -169,40 +181,46 @@ const InputOutputBackend: React.FC = () => {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const startRecording = async () => {
|
const startRecording = async (): Promise<string> => {
|
||||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||||
const mediaRecorder = new MediaRecorder(stream)
|
const mediaRecorder = new MediaRecorder(stream);
|
||||||
mediaRecorderRef.current = mediaRecorder
|
mediaRecorderRef.current = mediaRecorder;
|
||||||
|
|
||||||
mediaRecorder.ondataavailable = (event) => {
|
|
||||||
audioChunks.current.push(event.data)
|
|
||||||
}
|
|
||||||
|
|
||||||
mediaRecorder.onstop = async () => {
|
|
||||||
const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" })
|
|
||||||
audioChunks.current = []
|
|
||||||
// console.log(audioBlob);
|
|
||||||
// const url = URL.createObjectURL(audioBlob)
|
|
||||||
// const audio = new Audio(url);
|
|
||||||
// audio.play().catch(error => console.error("Error playing audio:", error));
|
|
||||||
|
|
||||||
const remote = new VoiceSend()
|
audioChunks.current = []; // Initialize audio chunks
|
||||||
remote.sendToVoiceRecognition(audioBlob)
|
|
||||||
}
|
// Create a promise that resolves when the onstop event is done
|
||||||
|
const stopRecordingPromise = new Promise<string>((resolve) => {
|
||||||
|
mediaRecorder.ondataavailable = (event) => {
|
||||||
|
audioChunks.current.push(event.data);
|
||||||
|
};
|
||||||
|
|
||||||
|
mediaRecorder.onstop = async () => {
|
||||||
|
const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" });
|
||||||
|
audioChunks.current = [];
|
||||||
|
|
||||||
|
const text_voice = await sendToVoiceRecognition(audioBlob);
|
||||||
|
console.log(text_voice);
|
||||||
|
resolve(text_voice); // Resolve the promise with the recognized text
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
mediaRecorder.start();
|
||||||
|
setIsRecording(true);
|
||||||
|
|
||||||
|
// Wait for the recording to stop and get the recognized text
|
||||||
|
return stopRecordingPromise;
|
||||||
|
};
|
||||||
|
|
||||||
mediaRecorder.start()
|
|
||||||
setIsRecording(true)
|
|
||||||
}
|
|
||||||
|
|
||||||
const stopRecording = () => {
|
const stopRecording = () => {
|
||||||
mediaRecorderRef.current?.stop()
|
mediaRecorderRef.current?.stop();
|
||||||
setIsRecording(false)
|
setIsRecording(false);
|
||||||
}
|
};
|
||||||
|
|
||||||
|
const handleMicClick = async () => {
|
||||||
const handleMicClick = () => {
|
|
||||||
if (!isRecording) {
|
if (!isRecording) {
|
||||||
startRecording();
|
const recognizedText = await startRecording();
|
||||||
|
setInputMessage(recognizedText); // Set the recognized text after recording
|
||||||
|
console.log("Set!")
|
||||||
} else {
|
} else {
|
||||||
stopRecording();
|
stopRecording();
|
||||||
}
|
}
|
||||||
|
@ -246,7 +264,7 @@ const InputOutputBackend: React.FC = () => {
|
||||||
await wait(1000)
|
await wait(1000)
|
||||||
setCopyClicked(false)
|
setCopyClicked(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
|
@ -263,7 +281,7 @@ const InputOutputBackend: React.FC = () => {
|
||||||
onMicClick={handleMicClick}
|
onMicClick={handleMicClick}
|
||||||
inputDisabled={inputDisabled}
|
inputDisabled={inputDisabled}
|
||||||
isRecording={isRecording}
|
isRecording={isRecording}
|
||||||
/>
|
/>
|
||||||
</>
|
</>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,26 +1,20 @@
|
||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
|
|
||||||
|
export const sendToVoiceRecognition = (audio_data: Blob): Promise<string> => {
|
||||||
|
console.log("sending recording...");
|
||||||
|
|
||||||
class VoiceSend {
|
const formdata = new FormData()
|
||||||
sendToVoiceRecognition(audio_data: Blob) {
|
formdata.append("audio", audio_data)
|
||||||
console.log("sending recording...");
|
|
||||||
|
|
||||||
const formdata = new FormData()
|
const dataSend = { option: "offline", type: "basic", audio: audio_data }
|
||||||
formdata.append("audio", audio_data)
|
return axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", formdata)
|
||||||
|
.then((response) => {
|
||||||
const dataSend = { option:"offline", type:"basic",audio:audio_data }
|
console.log(response.data)
|
||||||
axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", formdata)
|
return response.data.response
|
||||||
.then((response) => {
|
})
|
||||||
console.log(response.data)
|
.catch(error => {
|
||||||
return response.data.response
|
console.log("Error calling API:", error)
|
||||||
})
|
postMessage({ status: 500 })
|
||||||
.catch(error => {
|
return "Error"
|
||||||
console.log("Error calling API:", error)
|
})
|
||||||
postMessage({ status: 500 })
|
}
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
export default VoiceSend;
|
|
|
@ -6,13 +6,17 @@ interface InputProps {
|
||||||
onSendClick: (message: string, override: boolean) => void;
|
onSendClick: (message: string, override: boolean) => void;
|
||||||
onMicClick: () => void;
|
onMicClick: () => void;
|
||||||
inputDisabled: boolean;
|
inputDisabled: boolean;
|
||||||
isRecording:boolean
|
isRecording: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
|
const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
|
||||||
({ message, onSendClick, onMicClick, inputDisabled, isRecording}, ref: ForwardedRef<HTMLDivElement>) => {
|
({ message, onSendClick, onMicClick, inputDisabled, isRecording }, ref: ForwardedRef<HTMLDivElement>) => {
|
||||||
const [inputValue, setInputValue] = useState('');
|
const [inputValue, setInputValue] = useState('');
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
setInputValue(message);
|
||||||
|
}, [message]);
|
||||||
|
|
||||||
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||||
setInputValue(e.target.value);
|
setInputValue(e.target.value);
|
||||||
};
|
};
|
||||||
|
@ -40,7 +44,7 @@ const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
|
||||||
<button type="button" onClick={() => onSendClick(inputValue, false)} disabled={inputDisabled ? true : false}>
|
<button type="button" onClick={() => onSendClick(inputValue, false)} disabled={inputDisabled ? true : false}>
|
||||||
<img src="/img/send.svg" alt="send" />
|
<img src="/img/send.svg" alt="send" />
|
||||||
</button>
|
</button>
|
||||||
<button className={`microphone-button ${isRecording ? "red": "var(--input-button-color)"}`} type="button" onClick={onMicClick}>
|
<button className={`microphone-button ${isRecording ? "red" : "var(--input-button-color)"}`} type="button" onClick={onMicClick}>
|
||||||
<img src="/img/microphone.svg" alt="microphone" />
|
<img src="/img/microphone.svg" alt="microphone" />
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
24
py/voice.py
24
py/voice.py
|
@ -7,25 +7,19 @@ class VoiceRecognition:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def recognition(audio):
|
def recognition(audio):
|
||||||
audio_buffer = io.BytesIO(audio.read())
|
audio_buffer = io.BytesIO(audio.read())
|
||||||
|
|
||||||
try:
|
audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
|
||||||
audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
|
|
||||||
|
wav_io = io.BytesIO()
|
||||||
wav_io = io.BytesIO()
|
audio_segment.export(wav_io, format="wav")
|
||||||
audio_segment.export(wav_io, format="wav")
|
wav_io.seek(0)
|
||||||
wav_io.seek(0)
|
|
||||||
except:
|
|
||||||
print("audio to wav failed")
|
|
||||||
|
|
||||||
model_size = "base"
|
model_size = "base"
|
||||||
model = WhisperModel(model_size, device="cpu", compute_type=" ")
|
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
||||||
|
|
||||||
segments, _ = model.transcribe(wav_io)
|
segments, _ = model.transcribe(wav_io)
|
||||||
transcription = ""
|
transcription = ""
|
||||||
for segment in segments:
|
for segment in segments:
|
||||||
transcription += segment.text + " "
|
transcription += segment.text + " "
|
||||||
result = transcription.strip()
|
result = transcription.strip()
|
||||||
print(result)
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt
|
|
Loading…
Reference in a new issue