import requests import json from transformers import AutoTokenizer, LlamaForCausalLM class API: @staticmethod def process_text_transformers(prompt, model): model = LlamaForCausalLM.from_pretrained(model) tokenizer = AutoTokenizer.from_pretrained(model) inputs = tokenizer(prompt, return_tensors="pt") generate_ids = model.generate(inputs.input_ids, max_length=30) return tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] @staticmethod def process_text_local(prompt, model): ollama_url = "http://localhost:11434" response = requests.post( f"{ollama_url}/api/generate", json={"model": model, "prompt": prompt} ) if response.status_code == 200: response_data = [] for line in response.iter_lines(): line_decoded = line.decode("utf-8") line_raw = json.loads(line_decoded) response_data.append(line_raw["response"]) final_response = "".join(response_data) return final_response else: return "Error: " + response.text def send_message(self, message, model): if model == 1: answer = self.process_text_local(message, "phi3.5") elif model == 2: answer = self.process_text_local(message, "gemma2:2b") elif model == 3: answer = self.process_text_local(message, "qwen2:0.5b") elif model == 4: answer = self.process_text_local(message, "codegemma:2b") elif model == 5: answer = self.process_text_transformers(message, "meta-llama/Meta-Llama-3.1-8B") else: return "Invalid choice" return answer