forked from React-Group/interstellar_ai
whisper is better
This commit is contained in:
parent
8cc9b1b015
commit
ab12b796ec
2 changed files with 9 additions and 50 deletions
11
py/api.py
11
py/api.py
|
@ -99,16 +99,9 @@ class API:
|
||||||
|
|
||||||
@self.app.route('/interstellar_ai/api/voice_recognition', methods=['POST'])
|
@self.app.route('/interstellar_ai/api/voice_recognition', methods=['POST'])
|
||||||
def voice_recognition():
|
def voice_recognition():
|
||||||
print(request.args)
|
|
||||||
recog_type = request.form.get('type')
|
|
||||||
print(recog_type)
|
|
||||||
audio = request.files.get('audio')
|
audio = request.files.get('audio')
|
||||||
option = request.form.get('option')
|
text = self.voice.recognition(audio)
|
||||||
if recog_type == "basic":
|
return jsonify({'status': 200, 'response': text})
|
||||||
text = self.voice.basic_recognition(audio, option)
|
|
||||||
return jsonify({'status': 200, 'response': text})
|
|
||||||
else:
|
|
||||||
return jsonify({'status': 401, 'response': "Invalid type"})
|
|
||||||
|
|
||||||
@self.app.route('/interstellar_ai/api/weather', methods=['POST'])
|
@self.app.route('/interstellar_ai/api/weather', methods=['POST'])
|
||||||
def get_weather():
|
def get_weather():
|
||||||
|
|
48
py/voice.py
48
py/voice.py
|
@ -1,44 +1,10 @@
|
||||||
import speech_recognition as sr
|
import whisper
|
||||||
from pydub import AudioSegment
|
|
||||||
|
|
||||||
|
|
||||||
class VoiceRecognition:
|
class VoiceRecognition:
|
||||||
def check_audio_format(self, file_path):
|
@staticmethod
|
||||||
try:
|
def recognition(audio):
|
||||||
audio = AudioSegment.from_ogg(file_path)
|
model = whisper.load_model("base")
|
||||||
print(f"Audio format: {audio.format}")
|
result = model.transcribe(audio)
|
||||||
return True
|
print(result["text"])
|
||||||
except Exception as e:
|
return result["text"]
|
||||||
print(f"Error reading audio file: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def basic_recognition(self, audio, option):
|
|
||||||
print(type(audio))
|
|
||||||
print("preparing")
|
|
||||||
r = sr.Recognizer()
|
|
||||||
|
|
||||||
# Read the data from the FileStorage object
|
|
||||||
audio_data = audio.read()
|
|
||||||
|
|
||||||
# Write the audio data to a file
|
|
||||||
with open('output.wav', 'wb') as file:
|
|
||||||
file.write(audio_data)
|
|
||||||
|
|
||||||
self.check_audio_format(audio)
|
|
||||||
if option == "online":
|
|
||||||
with sr.AudioFile(audio) as source:
|
|
||||||
print(type(source))
|
|
||||||
print("online")
|
|
||||||
text = r.recognize_google_cloud(source)
|
|
||||||
print("recognized as: " + text)
|
|
||||||
return text
|
|
||||||
elif option == "offline":
|
|
||||||
with sr.AudioFile(audio) as source:
|
|
||||||
print(type(source))
|
|
||||||
print("offline")
|
|
||||||
text = r.recognize_sphinx(source)
|
|
||||||
print("recognized as: " + text)
|
|
||||||
return text
|
|
||||||
|
|
||||||
print("nothing")
|
|
||||||
return False
|
|
||||||
|
|
Loading…
Reference in a new issue