ai-virtual-assistant/py/api.py

51 lines
1.8 KiB
Python
Raw Normal View History

2024-09-16 10:03:26 +02:00
import requests
2024-09-16 10:29:26 +02:00
import json
2024-09-16 11:44:35 +02:00
from transformers import AutoTokenizer, LlamaForCausalLM
2024-09-16 10:03:26 +02:00
2024-09-16 11:27:00 +02:00
class API:
@staticmethod
2024-09-16 11:44:35 +02:00
def process_text_transformers(prompt, model):
model = LlamaForCausalLM.from_pretrained(model)
tokenizer = AutoTokenizer.from_pretrained(model)
inputs = tokenizer(prompt, return_tensors="pt")
generate_ids = model.generate(inputs.input_ids, max_length=30)
return tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
@staticmethod
def process_text_local(prompt, model):
2024-09-16 10:29:26 +02:00
ollama_url = "http://localhost:11434"
response = requests.post(
f"{ollama_url}/api/generate", json={"model": model, "prompt": prompt}
)
if response.status_code == 200:
response_data = []
for line in response.iter_lines():
line_decoded = line.decode("utf-8")
line_raw = json.loads(line_decoded)
response_data.append(line_raw["response"])
final_response = "".join(response_data)
generated_text = final_response.splitlines()[-1]
return generated_text
else:
return "Error: " + response.text
2024-09-16 10:03:26 +02:00
2024-09-16 11:27:00 +02:00
def send_message(self, message, model):
if model == 1:
2024-09-16 11:44:35 +02:00
answer = self.process_text_local(message, "phi3.5")
2024-09-16 11:27:00 +02:00
elif model == 2:
2024-09-16 11:44:35 +02:00
answer = self.process_text_local(message, "gemma2:2b")
2024-09-16 11:27:00 +02:00
elif model == 3:
2024-09-16 11:44:35 +02:00
answer = self.process_text_local(message, "qwen2:0.5b")
2024-09-16 11:27:00 +02:00
elif model == 4:
2024-09-16 11:44:35 +02:00
answer = self.process_text_local(message, "codegemma:2b")
elif model == 5:
answer = self.process_text_transformers(message, "meta-llama/Meta-Llama-3.1-8B")
2024-09-16 11:27:00 +02:00
else:
return "Invalid choice"
2024-09-16 10:29:26 +02:00
return answer