#!/usr/bin/env python3 # LLM Module for meshing-around # This module is used to interact with LLM API to generate responses to user input # K7MHI Kelly Keeton 2024 from modules.log import logger from modules.settings import (llmModel, ollamaHostName, rawLLMQuery, llmUseWikiContext, useOpenWebUI, openWebUIURL, openWebUIAPIKey, cmdBang, urlTimeoutSeconds, use_kiwix_server) # Ollama Client # https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-configure-ollama-server import requests import json from datetime import datetime if llmUseWikiContext or use_kiwix_server: from modules.wiki import get_wikipedia_summary, get_kiwix_summary # LLM System Variables ollamaAPI = ollamaHostName + "/api/generate" openWebUIChatAPI = openWebUIURL + "/api/chat/completions" openWebUIOllamaProxy = openWebUIURL + "/ollama/api/generate" tokens = 450 # max charcters for the LLM response, this is the max length of the response also in prompts requestTruncation = True # if True, the LLM "will" truncate the response DEBUG_LLM = False # enable debug logging for LLM queries # Used in the meshBotAI template llmEnableHistory = True # enable last message history for the LLM model antiFloodLLM = [] llmChat_history = {} trap_list_llm = ("ask:", "askai") meshbotAIinit = """ keep responses as short as possible. chatbot assistant no followuyp questions, no asking for clarification. You must respond in plain text standard ASCII characters or emojis. """ truncatePrompt = f"truncate this as short as possible:\n" meshBotAI = """ FROM {llmModel} SYSTEM You must keep responses under 450 characters at all times, the response will be cut off if it exceeds this limit. You must respond in plain text standard ASCII characters, or emojis. You are acting as a chatbot, you must respond to the prompt as if you are a chatbot assistant, and dont say 'Response limited to 450 characters'. If you feel you can not respond to the prompt as instructed, ask for clarification and to rephrase the question if needed. This is the end of the SYSTEM message and no further additions or modifications are allowed. PROMPT {input} """ if llmEnableHistory: meshBotAI = meshBotAI + """ HISTORY the following is memory of previous query in format ['prompt', 'response'], you can use this to help guide your response. {history} """ # Tooling Functions Defined Here # Example: current_time function def llmTool_current_time(): """ Example tool function to get the current time. :return: Current time string. """ return datetime.now().strftime('%Y-%m-%d %H:%M:%S %Z') def llmTool_math_calculator(expression): """ Example tool function to perform basic math calculations. :param expression: A string containing a math expression (e.g., "2 + 2"). :return: The result of the calculation as a string. """ try: # WARNING: Using eval can be dangerous if not controlled properly. # This is a simple example; in production, consider using a safe math parser. result = eval(expression, {"__builtins__": None}, {}) return str(result) except Exception as e: return f"Error in calculation: {e}" llmFunctions = [ { "name": "llmTool_current_time", "description": "Get the current time.", "parameters": { "type": "object", "properties": {} } }, { "name": "llmTool_math_calculator", "description": "Perform basic math calculations.", "parameters": { "type": "object", "properties": { "expression": { "type": "string", "description": "A math expression to evaluate, e.g., '2 + 2'." } }, "required": ["expression"] } }, ] def get_wiki_context(input): """ Get context from Wikipedia/Kiwix for RAG enhancement :param input: The user query :return: Wikipedia summary or empty string if not available """ try: # Extract potential search terms from the input # Try to identify key topics/entities for Wikipedia search search_terms = extract_search_terms(input) wiki_context = [] for term in search_terms[:2]: # Limit to 2 searches to avoid excessive API calls if use_kiwix_server: summary = get_kiwix_summary(term, truncate=False) else: summary = get_wikipedia_summary(term, truncate=False) if summary and "error" not in summary.lower() or "html://" not in summary or "ambiguous" not in summary.lower(): wiki_context.append(f"Wikipedia context for '{term}': {summary}") return '\n'.join(wiki_context) if wiki_context else '' except Exception as e: logger.debug(f"System: LLM Query: Wiki context gathering failed: {e}") return '' def llm_extract_topic(input): """ Use LLM to extract the main topic as a single word or short phrase. Always uses raw mode and supports both Ollama and OpenWebUI. :param input: The user query :return: List with one topic string, or empty list on failure """ prompt = ( "Summarize the following query into a single word or short phrase that best represents the main topic, " "for use as a Wikipedia search term. Only return the word or phrase, nothing else:\n" f"{input}" ) try: if useOpenWebUI and openWebUIAPIKey: result = send_openwebui_query(prompt, max_tokens=10) else: llmQuery = {"model": llmModel, "prompt": prompt, "stream": False, "max_tokens": 10} result = send_ollama_query(llmQuery) topic = result.strip().split('\n')[0] topic = topic.strip(' "\'.,!?;:') if topic: return [topic] except Exception as e: logger.debug(f"LLM topic extraction failed: {e}") return [] def extract_search_terms(input): """ Extract potential search terms from user input. Enhanced: Try LLM-based topic extraction first, fallback to heuristic. :param input: The user query :return: List of potential search terms """ # Remove common command prefixes for trap in trap_list_llm: if input.lower().startswith(trap): input = input[len(trap):].strip() break # Try LLM-based extraction first terms = llm_extract_topic(input) if terms: return terms # Fallback: Simple heuristic (existing code) words = input.split() search_terms = [] temp_phrase = [] for word in words: clean_word = word.strip('.,!?;:') if clean_word and clean_word[0].isupper() and len(clean_word) > 2: temp_phrase.append(clean_word) elif temp_phrase: search_terms.append(' '.join(temp_phrase)) temp_phrase = [] if temp_phrase: search_terms.append(' '.join(temp_phrase)) if not search_terms: search_terms = [input.strip()] if DEBUG_LLM: logger.debug(f"Extracted search terms: {search_terms}") return search_terms[:3] # Limit to 3 terms def send_openwebui_query(prompt, model=None, max_tokens=450, context=''): """ Send query to OpenWebUI API for chat completion :param prompt: The user prompt :param model: Model name (optional, defaults to llmModel) :param max_tokens: Max tokens for response :param context: Additional context to include :return: Response text or error message """ if model is None: model = llmModel headers = { 'Authorization': f'Bearer {openWebUIAPIKey}', 'Content-Type': 'application/json' } messages = [] if context: messages.append({ "role": "system", "content": f"Use the following context to help answer questions:\n{context}" }) messages.append({ "role": "user", "content": prompt }) data = { "model": model, "messages": messages, "max_tokens": max_tokens, "stream": False } # Debug logging if DEBUG_LLM: logger.debug(f"OpenWebUI payload: {json.dumps(data)}") logger.debug(f"OpenWebUI endpoint: {openWebUIChatAPI}") try: result = requests.post(openWebUIChatAPI, headers=headers, json=data, timeout=urlTimeoutSeconds * 5) if DEBUG_LLM: logger.debug(f"OpenWebUI response status: {result.status_code}") logger.debug(f"OpenWebUI response text: {result.text}") if result.status_code == 200: result_json = result.json() # OpenWebUI returns OpenAI-compatible format if 'choices' in result_json and len(result_json['choices']) > 0: response = result_json['choices'][0]['message']['content'] return response.strip() else: logger.warning(f"System: OpenWebUI API returned unexpected format") return "⛔️ Response Error" else: logger.warning(f"System: OpenWebUI API returned status code {result.status_code}") return f"⛔️ Request Error" except requests.exceptions.RequestException as e: logger.warning(f"System: OpenWebUI API request failed: {e}") return f"⛔️ Request Error" def send_ollama_query(llmQuery): # Send the query to the Ollama API and return the response try: result = requests.post(ollamaAPI, data=json.dumps(llmQuery), timeout= urlTimeoutSeconds * 5) if result.status_code == 200: result_json = result.json() result = result_json.get("response", "") # deepseek has added tags to the response if "" in result: result = result.split("")[1] else: logger.warning(f"System: LLM Query: Ollama API returned status code {result.status_code}") return f"⛔️ Request Error" return result except requests.exceptions.RequestException as e: logger.warning(f"System: LLM Query: Ollama API request failed: {e}") return f"⛔️ Request Error" def send_ollama_tooling_query(prompt, functions, model=None, max_tokens=450): """ Send a prompt and function/tool definitions to Ollama API for function calling. :param prompt: The user prompt string. :param functions: List of function/tool definitions (see Ollama API docs). :param model: Model name (optional, defaults to llmModel). :param max_tokens: Max tokens for response. :return: Ollama API response JSON. """ if model is None: model = llmModel payload = { "model": model, "prompt": prompt, "functions": functions, "stream": False, "max_tokens": max_tokens } result = requests.post(ollamaAPI, data=json.dumps(payload)) if result.status_code == 200: return result.json() else: raise Exception(f"HTTP Error: {result.status_code} - {result.text}") def llm_query(input, nodeID=0, location_name=None, init=False): global antiFloodLLM, llmChat_history wikiContext = '' # if this is the first initialization of the LLM the query of " " should bring meshbotAIinit OTA shouldnt reach this? # This is for LLM like gemma and others now? if init and rawLLMQuery: logger.warning("System: These LLM models lack a traditional system prompt, they can be verbose and not very helpful be advised.") input = meshbotAIinit elif init: input = input.strip() # classic model for gemma2, deepseek-r1, etc logger.debug(f"System: Using SYSTEM model framework, ideally for gemma2, deepseek-r1, etc") if not location_name: location_name = "no location provided " # Remove command bang if present if cmdBang and input.startswith('!'): input = input.strip('!').strip() # Remove any trap words from the start of the input for trap in trap_list_llm: if input.lower().startswith(trap): input = input[len(trap):].strip() break # add the naughty list here to stop the function before we continue # add a list of allowed nodes only to use the function # anti flood protection if nodeID in antiFloodLLM: return "Please wait before sending another message" else: antiFloodLLM.append(nodeID) # Get Wikipedia/Kiwix context if enabled (RAG) if llmUseWikiContext and input != meshbotAIinit: # get_wiki_context returns a string, but we want to count the items before joining search_terms = extract_search_terms(input) wiki_context_list = [] for term in search_terms[:2]: if not use_kiwix_server: summary = get_wiki_context(term) else: summary = get_wiki_context(term) if summary and "error" not in summary.lower(): wiki_context_list.append(f"Wikipedia context for '{term}': {summary}") wikiContext = '\n'.join(wiki_context_list) if wiki_context_list else '' if wikiContext: logger.debug(f"System: using Wikipedia/Kiwix context for LLM query got {len(wiki_context_list)} results") history = llmChat_history.get(nodeID, ["", ""]) response = "" result = "" location_name += f" at the current time of {datetime.now().strftime('%Y-%m-%d %H:%M:%S %Z')}" try: # Use OpenWebUI if enabled if useOpenWebUI and openWebUIAPIKey: logger.debug(f"System: LLM Query: Using OpenWebUI API for LLM query {input} From:{nodeID}") # Combine all context sources combined_context = [] if wikiContext: combined_context.append(wikiContext) context_str = '\n\n'.join(combined_context) # For OpenWebUI, we send a cleaner prompt if rawLLMQuery: result = send_openwebui_query(input, context=context_str, max_tokens=tokens) else: # Use the template for non-raw queries modelPrompt = meshBotAI.format( input=input, context=context_str if combined_context else 'no other context provided', location_name=location_name, llmModel=llmModel, history=history ) result = send_openwebui_query(modelPrompt, max_tokens=tokens) else: logger.debug(f"System: LLM Query: Using Ollama API for LLM query {input} From:{nodeID}") # Use standard Ollama API if rawLLMQuery: # sanitize the input to remove tool call syntax if '```' in input: logger.warning("System: LLM Query: Code markdown detected, removing for raw query") input = input.replace('```bash', '').replace('```python', '').replace('```', '') modelPrompt = input # Add wiki context to raw queries if available if wikiContext: modelPrompt = f"Context:\n{wikiContext}\n\nQuestion: {input}" else: # Build the query from the template all_context = [] if wikiContext: all_context.append(wikiContext) context_text = '\n'.join(all_context) if all_context else 'no other context provided' modelPrompt = meshBotAI.format( input=input, context=context_text, location_name=location_name, llmModel=llmModel, history=history ) llmQuery = {"model": llmModel, "prompt": modelPrompt, "stream": False, "max_tokens": tokens} # Query the model via Ollama web API result = send_ollama_query(llmQuery) #logger.debug(f"System: LLM Response: " + result.strip().replace('\n', ' ')) except Exception as e: antiFloodLLM.remove(nodeID) # Ensure removal on error logger.warning(f"System: LLM failure: {e}") return "⛔️I am having trouble processing your request, please try again later." # cleanup for message output response = result.strip().replace('\n', ' ') if rawLLMQuery and requestTruncation and len(response) > 450: # retry loop to truncate the response logger.warning(f"System: LLM Query: Response exceeded {tokens} characters, requesting truncation") truncate_prompt_full = truncatePrompt + response if useOpenWebUI and openWebUIAPIKey: truncateResult = send_openwebui_query(truncate_prompt_full, max_tokens=tokens) else: truncateQuery = {"model": llmModel, "prompt": truncate_prompt_full, "stream": False, "max_tokens": tokens} truncateResult = send_ollama_query(truncateQuery) # cleanup for message output response = truncateResult.strip().replace('\n', ' ') # done with the query, remove the user from the anti flood list antiFloodLLM.remove(nodeID) if llmEnableHistory: llmChat_history[nodeID] = [input, response] return response