Merge pull request 'Merge pull request 'Merge pull request 'Merge pull request 'Fixed the Models utilising useEffect' (#49) from sageTheDm/interstellar_ai:main into main' (#19) from React-Group/interstellar_ai:main into main' (#50) from sageTheDm/interstellar_ai:main into…' (#20) from React-Group/interstellar_ai:main into main

Reviewed-on: https://interstellardevelopment.org/code/code/sageTheDm/interstellar_ai/pulls/20
This commit is contained in:
sageTheDm 2024-09-26 09:53:53 +02:00
commit 58fb335794
11 changed files with 179 additions and 57 deletions

View file

@ -1,39 +0,0 @@
// import React, { useState, useRef } from 'react'
// const AudioRecorder: React.FC = () => {
// const [isRecording, setIsRecording] = useState(false)
// const [audioURL, setAudioURL] = useState<string | null>(null)
// const medaRecorderRef = useRef<MediaRecorder | null>(null)
// const audioChunks = useRef<Blob[]>([])
// const startRecording = async () => {
// const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
// const mediaRecorder = new MediaRecorder(stream)
// medaRecorderRef.current = mediaRecorder
// mediaRecorder.ondataavailable = (event) => {
// audioChunks.current.push(event.data)
// }
// mediaRecorder.onstop = () => {
// const audioBlob = new Blob(audioChunks.current, { type: "audio/wav" })
// const url = URL.createObjectURL(audioBlob)
// setAudioURL(url)
// audioChunks.current = []
// }
// mediaRecorder.start()
// setIsRecording(true)
// const stopRecording = () => {
// medaRecorderRef.current?.stop()
// setIsRecording(false)
// }
// return (
// <div></div>
// )
// }
// }
// export default AudioRecorder

View file

@ -0,0 +1,34 @@
import React, { useState, useRef } from 'react'
export const AudioRecorder= () => {
const [isRecording, setIsRecording] = useState(false)
const [audioURL, setAudioURL] = useState<string | null>(null)
const mediaRecorderRef = useRef<MediaRecorder | null>(null)
const audioChunks = useRef<Blob[]>([])
const startRecording = async () => {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
const mediaRecorder = new MediaRecorder(stream)
mediaRecorderRef.current = mediaRecorder
mediaRecorder.ondataavailable = (event) => {
audioChunks.current.push(event.data)
}
mediaRecorder.onstop = () => {
const audioBlob = new Blob(audioChunks.current, { type: "audio/wav" })
const url = URL.createObjectURL(audioBlob)
setAudioURL(url)
audioChunks.current = []
}
mediaRecorder.start()
setIsRecording(true)
}
const stopRecording = () => {
mediaRecorderRef.current?.stop()
setIsRecording(false)
}
}

View file

@ -2,8 +2,10 @@
import React, { useEffect, useRef, useState } from "react"; import React, { useEffect, useRef, useState } from "react";
import ConversationFrontend from "../components/ConversationFrontend"; import ConversationFrontend from "../components/ConversationFrontend";
import InputFrontend from "../components/InputFrontend"; import InputFrontend from "../components/InputFrontend";
import VoiceSend from "./voice_backend"
import { AudioRecorder } from "./AudioRecorder";
import axios from "axios"; import axios from "axios";
import { skip } from "node:test";
const InputOutputBackend: React.FC = () => { const InputOutputBackend: React.FC = () => {
type Message = { type Message = {
@ -18,7 +20,11 @@ const InputOutputBackend: React.FC = () => {
const [liveMessage, setLiveMessage] = useState("") const [liveMessage, setLiveMessage] = useState("")
const [inputMessage, setInputMessage] = useState<string>("") const [inputMessage, setInputMessage] = useState<string>("")
const [inputDisabled, setInputDisabled] = useState(false) const [inputDisabled, setInputDisabled] = useState(false)
const [lastMessage, setLastMessage] = useState<Message>({ role: "user", content: "Not supposed to happen." }) const [isRecording, setIsRecording] = useState(false)
const [audioURL, setAudioURL] = useState<string | null>(null)
const mediaRecorderRef = useRef<MediaRecorder | null>(null)
const audioChunks = useRef<Blob[]>([])
console.log(messages); console.log(messages);
@ -123,7 +129,6 @@ const InputOutputBackend: React.FC = () => {
} }
const handleSendClick = (inputValue: string, override: boolean) => { const handleSendClick = (inputValue: string, override: boolean) => {
if (inputValue != "") { if (inputValue != "") {
console.log(inputDisabled)
if (!inputDisabled || override) { if (!inputDisabled || override) {
setInputDisabled(true) setInputDisabled(true)
if (postWorkerRef.current) { if (postWorkerRef.current) {
@ -136,9 +141,43 @@ const InputOutputBackend: React.FC = () => {
} }
} }
const startRecording = async () => {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
const mediaRecorder = new MediaRecorder(stream)
mediaRecorderRef.current = mediaRecorder
mediaRecorder.ondataavailable = (event) => {
audioChunks.current.push(event.data)
}
mediaRecorder.onstop = () => {
const audioBlob = new Blob(audioChunks.current, { type: "audio/wav" })
const url = URL.createObjectURL(audioBlob)
console.log(url);
setAudioURL(url)
audioChunks.current = []
const remote = new VoiceSend()
remote.sendToVoiceRecognition(audioBlob,)
}
mediaRecorder.start()
setIsRecording(true)
}
const stopRecording = () => {
mediaRecorderRef.current?.stop()
setIsRecording(false)
}
const handleMicClick = () => { const handleMicClick = () => {
// do stuff if (!isRecording) {
} startRecording();
} else {
stopRecording();
}
};
const handleResendClick = () => { const handleResendClick = () => {
var temporary_message = messages[messages.length - 2]['content'] var temporary_message = messages[messages.length - 2]['content']
@ -180,6 +219,7 @@ const InputOutputBackend: React.FC = () => {
onSendClick={handleSendClick} onSendClick={handleSendClick}
onMicClick={handleMicClick} onMicClick={handleMicClick}
inputDisabled={inputDisabled} inputDisabled={inputDisabled}
isRecording={isRecording}
/> />
</div> </div>
) )

View file

@ -0,0 +1,30 @@
import axios from "axios";
class VoiceSend {
sendToVoiceRecognition(audio_data: Blob) {
console.log("sending recording...");
console.log(typeof (audio_data));
console.log(audio_data instanceof Blob);
const formdata = new FormData()
formdata.append("audio", audio_data)
formdata.append("option", "offline")
formdata.append("type", "basic")
const dataSend = { option:"offline", type:"basic",audio:audio_data }
axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", formdata)
.then((response) => {
console.log(response.data)
return response.data.response
})
.catch(error => {
console.log("Error calling API:", error)
postMessage({ status: 500 })
})
}
}
export default VoiceSend;

View file

@ -1,14 +1,16 @@
import React, { useState, ForwardedRef, useEffect } from 'react'; import React, { useState, ForwardedRef, useEffect } from 'react';
import "../styles/variables.css"
interface InputProps { interface InputProps {
message: string; message: string;
onSendClick: (message: string, override: boolean) => void; onSendClick: (message: string, override: boolean) => void;
onMicClick: () => void; onMicClick: () => void;
inputDisabled: boolean inputDisabled: boolean;
isRecording:boolean
} }
const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>( const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
({ message, onSendClick, onMicClick, inputDisabled }, ref: ForwardedRef<HTMLDivElement>) => { ({ message, onSendClick, onMicClick, inputDisabled, isRecording}, ref: ForwardedRef<HTMLDivElement>) => {
const [inputValue, setInputValue] = useState(''); const [inputValue, setInputValue] = useState('');
useEffect(() => { useEffect(() => {
@ -29,6 +31,10 @@ const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
} }
}; };
const styles = {
}
return ( return (
<div className="input" id="inputForm" ref={ref}> <div className="input" id="inputForm" ref={ref}>
<input <input
@ -42,7 +48,7 @@ const InputFrontend = React.forwardRef<HTMLDivElement, InputProps>(
<button type="button" onClick={() => onSendClick(inputValue, false)} disabled={inputDisabled ? true : false}> <button type="button" onClick={() => onSendClick(inputValue, false)} disabled={inputDisabled ? true : false}>
<img src="/img/send.svg" alt="send" /> <img src="/img/send.svg" alt="send" />
</button> </button>
<button type="button" onClick={onMicClick}> <button className={`microphone-button ${isRecording ? "red":"green"}`} type="button" onClick={onMicClick}>
<img src="/img/microphone.svg" alt="microphone" /> <img src="/img/microphone.svg" alt="microphone" />
</button> </button>
</div> </div>

View file

@ -59,3 +59,19 @@
background-color: var(--input-button-hover-color); background-color: var(--input-button-hover-color);
box-shadow: 0 6px 15px rgba(0, 0, 0, 0.2); box-shadow: 0 6px 15px rgba(0, 0, 0, 0.2);
} }
.microphone-button.red{
background-color: var(--close-button-color);
}
.microphone-button.green{
background-color: var(--button-background-color);
}
.microphone-button.red:hover{
background-color: var(--close-button-hover-color);
}
.microphone-button.green:hover{
background-color: var(--input-button-hover-color);
}

View file

@ -19,6 +19,7 @@
--conversation-background-color: #79832e; /* Background color for conversation container */ --conversation-background-color: #79832e; /* Background color for conversation container */
--doc-background-color: #ffffff; /* Background color for documents */ --doc-background-color: #ffffff; /* Background color for documents */
--close-button-color: red; --close-button-color: red;
--close-button-hover-color: #9e0101; /*NEW*/
--burger-menu-background-color: #79832e; /*NEW*/ --burger-menu-background-color: #79832e; /*NEW*/
--overlay-text-color:white; /*NEW*/ --overlay-text-color:white; /*NEW*/

View file

@ -6,6 +6,7 @@ from ai import AI
from db import DB from db import DB
from weather import Weather from weather import Weather
from voice import VoiceRecognition from voice import VoiceRecognition
from tts import TTS
class API: class API:
@ -17,6 +18,7 @@ class API:
self.db = DB() self.db = DB()
self.weather = Weather() self.weather = Weather()
self.voice = VoiceRecognition() self.voice = VoiceRecognition()
self.tts = TTS()
self.db.load_database() self.db.load_database()
self.ai_response_lock = threading.Lock() self.ai_response_lock = threading.Lock()
CORS(self.app) CORS(self.app)
@ -97,13 +99,16 @@ class API:
@self.app.route('/interstellar_ai/api/voice_recognition', methods=['POST']) @self.app.route('/interstellar_ai/api/voice_recognition', methods=['POST'])
def voice_recognition(): def voice_recognition():
recognition_type = request.args.get('type') print(request.args)
audio = request.args.get('audio_data') recog_type = request.form.get('type')
option = request.args.get('option') print(recog_type)
if recognition_type == "basic": audio = request.files.get('audio')
return jsonify({'status': 200, 'response': self.voice.basic_recognition(audio, option)}) option = request.form.get('option')
if recog_type == "basic":
return jsonify({'status': 401, 'response': "Invalid type"}) text = self.voice.basic_recognition(audio, option)
return jsonify({'status': 200, 'response': text})
else:
return jsonify({'status': 401, 'response': "Invalid type"})
@self.app.route('/interstellar_ai/api/weather', methods=['POST']) @self.app.route('/interstellar_ai/api/weather', methods=['POST'])
def get_weather(): def get_weather():
@ -113,6 +118,11 @@ class API:
self.app.run(debug=True, host='0.0.0.0', port=5000) self.app.run(debug=True, host='0.0.0.0', port=5000)
@self.app.route('/interstellar_ai/api/tts', methods=['POST'])
def tts():
text = request.args.get('text')
return jsonify({'status': 200, 'response': self.tts.gen_tts(text)})
api = API() api = API()
api.run() api.run()

View file

@ -10,4 +10,7 @@ PocketSphinx
google-cloud-speech google-cloud-speech
google-generativeai google-generativeai
python-weather python-weather
pycouchdb pycouchdb
pyttsx3
pip-licenses
openai-whisper

10
py/tts.py Normal file
View file

@ -0,0 +1,10 @@
import pyttsx3
class TTS:
@staticmethod
def gen_tts(text):
engine = pyttsx3.init()
engine.setProperty('rate', 70)
engine.say(text)
engine.runAndWait()

View file

@ -4,10 +4,21 @@ import speech_recognition as sr
class VoiceRecognition: class VoiceRecognition:
@staticmethod @staticmethod
def basic_recognition(audio, option): def basic_recognition(audio, option):
print(type(audio))
print("preparing")
r = sr.Recognizer() r = sr.Recognizer()
if option == "online": if option == "online":
return r.recognize_google_cloud(audio) with sr.AudioFile(audio) as source:
print("online")
text = r.recognize_google_cloud(source)
print("recognized as: " + text)
return text
elif option == "offline": elif option == "offline":
return r.recognize_sphinx(audio) with sr.AudioFile(audio) as source:
print("offline")
text = r.recognize_sphinx(source)
print("recognized as: " + text)
return text
print("nothing")
return False return False