whisper is better
This commit is contained in:
parent
8cc9b1b015
commit
ab12b796ec
2 changed files with 9 additions and 50 deletions
11
py/api.py
11
py/api.py
|
@ -99,16 +99,9 @@ class API:
|
|||
|
||||
@self.app.route('/interstellar_ai/api/voice_recognition', methods=['POST'])
|
||||
def voice_recognition():
|
||||
print(request.args)
|
||||
recog_type = request.form.get('type')
|
||||
print(recog_type)
|
||||
audio = request.files.get('audio')
|
||||
option = request.form.get('option')
|
||||
if recog_type == "basic":
|
||||
text = self.voice.basic_recognition(audio, option)
|
||||
return jsonify({'status': 200, 'response': text})
|
||||
else:
|
||||
return jsonify({'status': 401, 'response': "Invalid type"})
|
||||
text = self.voice.recognition(audio)
|
||||
return jsonify({'status': 200, 'response': text})
|
||||
|
||||
@self.app.route('/interstellar_ai/api/weather', methods=['POST'])
|
||||
def get_weather():
|
||||
|
|
48
py/voice.py
48
py/voice.py
|
@ -1,44 +1,10 @@
|
|||
import speech_recognition as sr
|
||||
from pydub import AudioSegment
|
||||
import whisper
|
||||
|
||||
|
||||
class VoiceRecognition:
|
||||
def check_audio_format(self, file_path):
|
||||
try:
|
||||
audio = AudioSegment.from_ogg(file_path)
|
||||
print(f"Audio format: {audio.format}")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error reading audio file: {e}")
|
||||
return False
|
||||
|
||||
def basic_recognition(self, audio, option):
|
||||
print(type(audio))
|
||||
print("preparing")
|
||||
r = sr.Recognizer()
|
||||
|
||||
# Read the data from the FileStorage object
|
||||
audio_data = audio.read()
|
||||
|
||||
# Write the audio data to a file
|
||||
with open('output.wav', 'wb') as file:
|
||||
file.write(audio_data)
|
||||
|
||||
self.check_audio_format(audio)
|
||||
if option == "online":
|
||||
with sr.AudioFile(audio) as source:
|
||||
print(type(source))
|
||||
print("online")
|
||||
text = r.recognize_google_cloud(source)
|
||||
print("recognized as: " + text)
|
||||
return text
|
||||
elif option == "offline":
|
||||
with sr.AudioFile(audio) as source:
|
||||
print(type(source))
|
||||
print("offline")
|
||||
text = r.recognize_sphinx(source)
|
||||
print("recognized as: " + text)
|
||||
return text
|
||||
|
||||
print("nothing")
|
||||
return False
|
||||
@staticmethod
|
||||
def recognition(audio):
|
||||
model = whisper.load_model("base")
|
||||
result = model.transcribe(audio)
|
||||
print(result["text"])
|
||||
return result["text"]
|
||||
|
|
Loading…
Reference in a new issue