diff --git a/py/api.py b/py/api.py index f3e13ea..0951717 100644 --- a/py/api.py +++ b/py/api.py @@ -99,9 +99,16 @@ class API: @self.app.route('/interstellar_ai/api/voice_recognition', methods=['POST']) def voice_recognition(): + print(request.args) + recog_type = request.form.get('type') + print(recog_type) audio = request.files.get('audio') - text = self.voice.recognition(audio) - return jsonify({'status': 200, 'response': text}) + option = request.form.get('option') + if recog_type == "basic": + text = self.voice.basic_recognition(audio, option) + return jsonify({'status': 200, 'response': text}) + else: + return jsonify({'status': 401, 'response': "Invalid type"}) @self.app.route('/interstellar_ai/api/weather', methods=['POST']) def get_weather(): diff --git a/py/requirements.txt b/py/requirements.txt index 7ea1a85..8c6a016 100644 --- a/py/requirements.txt +++ b/py/requirements.txt @@ -13,5 +13,4 @@ python-weather pycouchdb pyttsx3 pip-licenses -openai-whisper -pydub \ No newline at end of file +openai-whisper \ No newline at end of file diff --git a/py/voice.py b/py/voice.py index b4516f0..ec6c407 100644 --- a/py/voice.py +++ b/py/voice.py @@ -1,10 +1,24 @@ -import whisper +import speech_recognition as sr class VoiceRecognition: @staticmethod - def recognition(audio): - model = whisper.load_model("base") - result = model.transcribe(audio) - print(result["text"]) - return result["text"] + def basic_recognition(audio, option): + print(type(audio)) + print("preparing") + r = sr.Recognizer() + if option == "online": + with sr.AudioFile(audio) as source: + print("online") + text = r.recognize_google_cloud(source) + print("recognized as: " + text) + return text + elif option == "offline": + with sr.AudioFile(audio) as source: + print("offline") + text = r.recognize_sphinx(source) + print("recognized as: " + text) + return text + + print("nothing") + return False