whisper is better

This commit is contained in:
Patrick_Pluto 2024-09-26 11:01:15 +02:00
parent 8cc9b1b015
commit ab12b796ec
2 changed files with 9 additions and 50 deletions

View file

@ -99,16 +99,9 @@ class API:
@self.app.route('/interstellar_ai/api/voice_recognition', methods=['POST'])
def voice_recognition():
print(request.args)
recog_type = request.form.get('type')
print(recog_type)
audio = request.files.get('audio')
option = request.form.get('option')
if recog_type == "basic":
text = self.voice.basic_recognition(audio, option)
return jsonify({'status': 200, 'response': text})
else:
return jsonify({'status': 401, 'response': "Invalid type"})
text = self.voice.recognition(audio)
return jsonify({'status': 200, 'response': text})
@self.app.route('/interstellar_ai/api/weather', methods=['POST'])
def get_weather():

View file

@ -1,44 +1,10 @@
import speech_recognition as sr
from pydub import AudioSegment
import whisper
class VoiceRecognition:
def check_audio_format(self, file_path):
try:
audio = AudioSegment.from_ogg(file_path)
print(f"Audio format: {audio.format}")
return True
except Exception as e:
print(f"Error reading audio file: {e}")
return False
def basic_recognition(self, audio, option):
print(type(audio))
print("preparing")
r = sr.Recognizer()
# Read the data from the FileStorage object
audio_data = audio.read()
# Write the audio data to a file
with open('output.wav', 'wb') as file:
file.write(audio_data)
self.check_audio_format(audio)
if option == "online":
with sr.AudioFile(audio) as source:
print(type(source))
print("online")
text = r.recognize_google_cloud(source)
print("recognized as: " + text)
return text
elif option == "offline":
with sr.AudioFile(audio) as source:
print(type(source))
print("offline")
text = r.recognize_sphinx(source)
print("recognized as: " + text)
return text
print("nothing")
return False
@staticmethod
def recognition(audio):
model = whisper.load_model("base")
result = model.transcribe(audio)
print(result["text"])
return result["text"]