From ab12b796ec9a2c1fe16eb84fb4bf3ea64bc04c52 Mon Sep 17 00:00:00 2001
From: Patrick_Pluto <patrick_pluto@noreply.codeberg.org>
Date: Thu, 26 Sep 2024 11:01:15 +0200
Subject: [PATCH] whisper is better

---
 py/api.py   | 11 ++---------
 py/voice.py | 48 +++++++-----------------------------------------
 2 files changed, 9 insertions(+), 50 deletions(-)

diff --git a/py/api.py b/py/api.py
index 0951717..f3e13ea 100644
--- a/py/api.py
+++ b/py/api.py
@@ -99,16 +99,9 @@ class API:
 
         @self.app.route('/interstellar_ai/api/voice_recognition', methods=['POST'])
         def voice_recognition():
-            print(request.args)
-            recog_type = request.form.get('type')
-            print(recog_type)
             audio = request.files.get('audio')
-            option = request.form.get('option')
-            if recog_type == "basic":
-                text = self.voice.basic_recognition(audio, option)
-                return jsonify({'status': 200, 'response': text})
-            else:
-                return jsonify({'status': 401, 'response': "Invalid type"})
+            text = self.voice.recognition(audio)
+            return jsonify({'status': 200, 'response': text})
 
         @self.app.route('/interstellar_ai/api/weather', methods=['POST'])
         def get_weather():
diff --git a/py/voice.py b/py/voice.py
index b9500da..b4516f0 100644
--- a/py/voice.py
+++ b/py/voice.py
@@ -1,44 +1,10 @@
-import speech_recognition as sr
-from pydub import AudioSegment
+import whisper
 
 
 class VoiceRecognition:
-    def check_audio_format(self, file_path):
-        try:
-            audio = AudioSegment.from_ogg(file_path)
-            print(f"Audio format: {audio.format}")
-            return True
-        except Exception as e:
-            print(f"Error reading audio file: {e}")
-            return False
-
-    def basic_recognition(self, audio, option):
-        print(type(audio))
-        print("preparing")
-        r = sr.Recognizer()
-
-        # Read the data from the FileStorage object
-        audio_data = audio.read()
-
-        # Write the audio data to a file
-        with open('output.wav', 'wb') as file:
-            file.write(audio_data)
-
-        self.check_audio_format(audio)
-        if option == "online":
-            with sr.AudioFile(audio) as source:
-                print(type(source))
-                print("online")
-                text = r.recognize_google_cloud(source)
-                print("recognized as: " + text)
-                return text
-        elif option == "offline":
-            with sr.AudioFile(audio) as source:
-                print(type(source))
-                print("offline")
-                text = r.recognize_sphinx(source)
-                print("recognized as: " + text)
-                return text
-
-        print("nothing")
-        return False
+    @staticmethod
+    def recognition(audio):
+        model = whisper.load_model("base")
+        result = model.transcribe(audio)
+        print(result["text"])
+        return result["text"]