mirror of
https://github.com/SpudGunMan/meshing-around.git
synced 2026-07-04 00:41:35 +02:00
Add RAG support with Wikipedia/Kiwix and OpenWebUI integration
Co-authored-by: SpudGunMan <12676665+SpudGunMan@users.noreply.github.com>
This commit is contained in:
+10
-1
@@ -83,7 +83,16 @@ ollamaHostName = http://localhost:11434
|
||||
# If False, the LLM only replies to the "ask:" and "askai" commands.
|
||||
llmReplyToNonCommands = True
|
||||
# if True, the input is sent raw to the LLM, if False uses legacy template query
|
||||
rawLLMQuery = True
|
||||
rawLLMQuery = True
|
||||
# Enable Wikipedia/Kiwix integration with LLM for RAG (Retrieval Augmented Generation)
|
||||
# When enabled, LLM will automatically search Wikipedia/Kiwix and include context in responses
|
||||
llmUseWikiContext = False
|
||||
# Use OpenWebUI instead of direct Ollama API (enables advanced RAG features)
|
||||
useOpenWebUI = False
|
||||
# OpenWebUI server URL (e.g., http://localhost:3000)
|
||||
openWebUIURL = http://localhost:3000
|
||||
# OpenWebUI API key/token (required when useOpenWebUI is True)
|
||||
openWebUIAPIKey =
|
||||
|
||||
# StoreForward Enabled and Limits
|
||||
StoreForward = True
|
||||
|
||||
+186
-16
@@ -3,7 +3,8 @@
|
||||
# This module is used to interact with LLM API to generate responses to user input
|
||||
# K7MHI Kelly Keeton 2024
|
||||
from modules.log import logger
|
||||
from modules.settings import llmModel, ollamaHostName, rawLLMQuery
|
||||
from modules.settings import (llmModel, ollamaHostName, rawLLMQuery,
|
||||
llmUseWikiContext, useOpenWebUI, openWebUIURL, openWebUIAPIKey)
|
||||
|
||||
# Ollama Client
|
||||
# https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-configure-ollama-server
|
||||
@@ -17,6 +18,8 @@ if not rawLLMQuery:
|
||||
|
||||
# LLM System Variables
|
||||
ollamaAPI = ollamaHostName + "/api/generate"
|
||||
openWebUIChatAPI = openWebUIURL + "/api/chat/completions"
|
||||
openWebUIOllamaProxy = openWebUIURL + "/ollama/api/generate"
|
||||
tokens = 450 # max charcters for the LLM response, this is the max length of the response also in prompts
|
||||
requestTruncation = True # if True, the LLM "will" truncate the response
|
||||
|
||||
@@ -177,6 +180,120 @@ def get_google_context(input, num_results):
|
||||
googleResults = ['no other context provided']
|
||||
return googleResults
|
||||
|
||||
def get_wiki_context(input):
|
||||
"""
|
||||
Get context from Wikipedia/Kiwix for RAG enhancement
|
||||
:param input: The user query
|
||||
:return: Wikipedia summary or empty string if not available
|
||||
"""
|
||||
try:
|
||||
from modules.wiki import get_wikipedia_summary
|
||||
# Extract potential search terms from the input
|
||||
# Try to identify key topics/entities for Wikipedia search
|
||||
search_terms = extract_search_terms(input)
|
||||
|
||||
wiki_context = []
|
||||
for term in search_terms[:2]: # Limit to 2 searches to avoid excessive API calls
|
||||
summary = get_wikipedia_summary(term)
|
||||
if summary and "error" not in summary.lower():
|
||||
wiki_context.append(f"Wikipedia context for '{term}': {summary}")
|
||||
|
||||
return '\n'.join(wiki_context) if wiki_context else ''
|
||||
except Exception as e:
|
||||
logger.debug(f"System: LLM Query: Wiki context gathering failed: {e}")
|
||||
return ''
|
||||
|
||||
def extract_search_terms(input):
|
||||
"""
|
||||
Extract potential search terms from user input
|
||||
Simple implementation: look for capitalized words, proper nouns, etc.
|
||||
:param input: The user query
|
||||
:return: List of potential search terms
|
||||
"""
|
||||
# Remove common command prefixes
|
||||
for trap in trap_list_llm:
|
||||
if input.lower().startswith(trap):
|
||||
input = input[len(trap):].strip()
|
||||
break
|
||||
|
||||
# Simple heuristic: extract capitalized words and phrases
|
||||
words = input.split()
|
||||
search_terms = []
|
||||
|
||||
# Look for multi-word capitalized phrases
|
||||
temp_phrase = []
|
||||
for word in words:
|
||||
# Remove punctuation for checking
|
||||
clean_word = word.strip('.,!?;:')
|
||||
if clean_word and clean_word[0].isupper() and len(clean_word) > 2:
|
||||
temp_phrase.append(clean_word)
|
||||
elif temp_phrase:
|
||||
search_terms.append(' '.join(temp_phrase))
|
||||
temp_phrase = []
|
||||
|
||||
if temp_phrase:
|
||||
search_terms.append(' '.join(temp_phrase))
|
||||
|
||||
# If no capitalized terms found, use the whole query
|
||||
if not search_terms:
|
||||
search_terms = [input.strip()]
|
||||
|
||||
return search_terms[:3] # Limit to 3 terms
|
||||
|
||||
def send_openwebui_query(prompt, model=None, max_tokens=450, context=''):
|
||||
"""
|
||||
Send query to OpenWebUI API for chat completion
|
||||
:param prompt: The user prompt
|
||||
:param model: Model name (optional, defaults to llmModel)
|
||||
:param max_tokens: Max tokens for response
|
||||
:param context: Additional context to include
|
||||
:return: Response text or error message
|
||||
"""
|
||||
if model is None:
|
||||
model = llmModel
|
||||
|
||||
headers = {
|
||||
'Authorization': f'Bearer {openWebUIAPIKey}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
messages = []
|
||||
if context:
|
||||
messages.append({
|
||||
"role": "system",
|
||||
"content": f"Use the following context to help answer questions:\n{context}"
|
||||
})
|
||||
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
})
|
||||
|
||||
data = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens,
|
||||
"stream": False
|
||||
}
|
||||
|
||||
try:
|
||||
result = requests.post(openWebUIChatAPI, headers=headers, json=data, timeout=10)
|
||||
if result.status_code == 200:
|
||||
result_json = result.json()
|
||||
# OpenWebUI returns OpenAI-compatible format
|
||||
if 'choices' in result_json and len(result_json['choices']) > 0:
|
||||
response = result_json['choices'][0]['message']['content']
|
||||
return response.strip()
|
||||
else:
|
||||
logger.warning(f"System: OpenWebUI API returned unexpected format")
|
||||
return "⛔️ Response Error"
|
||||
else:
|
||||
logger.warning(f"System: OpenWebUI API returned status code {result.status_code}")
|
||||
return f"⛔️ Request Error"
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.warning(f"System: OpenWebUI API request failed: {e}")
|
||||
return f"⛔️ Request Error"
|
||||
|
||||
def send_ollama_query(llmQuery):
|
||||
# Send the query to the Ollama API and return the response
|
||||
try:
|
||||
@@ -222,6 +339,7 @@ def send_ollama_tooling_query(prompt, functions, model=None, max_tokens=450):
|
||||
def llm_query(input, nodeID=0, location_name=None):
|
||||
global antiFloodLLM, llmChat_history
|
||||
googleResults = []
|
||||
wikiContext = ''
|
||||
|
||||
# if this is the first initialization of the LLM the query of " " should bring meshbotAIinit OTA shouldnt reach this?
|
||||
# This is for LLM like gemma and others now?
|
||||
@@ -251,13 +369,20 @@ def llm_query(input, nodeID=0, location_name=None):
|
||||
else:
|
||||
antiFloodLLM.append(nodeID)
|
||||
|
||||
# Get Wikipedia/Kiwix context if enabled (RAG)
|
||||
if llmUseWikiContext and input != meshbotAIinit:
|
||||
wikiContext = get_wiki_context(input)
|
||||
if wikiContext:
|
||||
logger.debug(f"System: Wiki-Enhanced LLM Query with context")
|
||||
|
||||
# Get Google context if enabled and not using raw query
|
||||
if llmContext_fromGoogle and not rawLLMQuery:
|
||||
googleResults = get_google_context(input, googleSearchResults)
|
||||
|
||||
history = llmChat_history.get(nodeID, ["", ""])
|
||||
|
||||
if googleResults:
|
||||
logger.debug(f"System: Google-Enhanced LLM Query: {input} From:{nodeID}")
|
||||
if googleResults or wikiContext:
|
||||
logger.debug(f"System: Context-Enhanced LLM Query: {input} From:{nodeID}")
|
||||
else:
|
||||
logger.debug(f"System: LLM Query: {input} From:{nodeID}")
|
||||
|
||||
@@ -266,19 +391,64 @@ def llm_query(input, nodeID=0, location_name=None):
|
||||
location_name += f" at the current time of {datetime.now().strftime('%Y-%m-%d %H:%M:%S %Z')}"
|
||||
|
||||
try:
|
||||
if rawLLMQuery:
|
||||
# sanitize the input to remove tool call syntax
|
||||
if '```' in input:
|
||||
logger.warning("System: LLM Query: Code markdown detected, removing for raw query")
|
||||
input = input.replace('```bash', '').replace('```python', '').replace('```', '')
|
||||
modelPrompt = input
|
||||
else:
|
||||
# Build the query from the template
|
||||
modelPrompt = meshBotAI.format(input=input, context='\n'.join(googleResults), location_name=location_name, llmModel=llmModel, history=history)
|
||||
# Use OpenWebUI if enabled
|
||||
if useOpenWebUI and openWebUIAPIKey:
|
||||
logger.debug("System: Using OpenWebUI API")
|
||||
|
||||
llmQuery = {"model": llmModel, "prompt": modelPrompt, "stream": False, "max_tokens": tokens}
|
||||
# Query the model via Ollama web API
|
||||
result = send_ollama_query(llmQuery)
|
||||
# Combine all context sources
|
||||
combined_context = []
|
||||
if wikiContext:
|
||||
combined_context.append(wikiContext)
|
||||
if googleResults:
|
||||
combined_context.append("Google search results: " + '\n'.join(googleResults))
|
||||
|
||||
context_str = '\n\n'.join(combined_context)
|
||||
|
||||
# For OpenWebUI, we send a cleaner prompt
|
||||
if rawLLMQuery:
|
||||
result = send_openwebui_query(input, context=context_str, max_tokens=tokens)
|
||||
else:
|
||||
# Use the template for non-raw queries
|
||||
modelPrompt = meshBotAI.format(
|
||||
input=input,
|
||||
context=context_str if combined_context else 'no other context provided',
|
||||
location_name=location_name,
|
||||
llmModel=llmModel,
|
||||
history=history
|
||||
)
|
||||
result = send_openwebui_query(modelPrompt, max_tokens=tokens)
|
||||
else:
|
||||
# Use standard Ollama API
|
||||
if rawLLMQuery:
|
||||
# sanitize the input to remove tool call syntax
|
||||
if '```' in input:
|
||||
logger.warning("System: LLM Query: Code markdown detected, removing for raw query")
|
||||
input = input.replace('```bash', '').replace('```python', '').replace('```', '')
|
||||
modelPrompt = input
|
||||
|
||||
# Add wiki context to raw queries if available
|
||||
if wikiContext:
|
||||
modelPrompt = f"Context:\n{wikiContext}\n\nQuestion: {input}"
|
||||
else:
|
||||
# Build the query from the template
|
||||
all_context = []
|
||||
if wikiContext:
|
||||
all_context.append(wikiContext)
|
||||
if googleResults:
|
||||
all_context.extend(googleResults)
|
||||
|
||||
context_text = '\n'.join(all_context) if all_context else 'no other context provided'
|
||||
modelPrompt = meshBotAI.format(
|
||||
input=input,
|
||||
context=context_text,
|
||||
location_name=location_name,
|
||||
llmModel=llmModel,
|
||||
history=history
|
||||
)
|
||||
|
||||
llmQuery = {"model": llmModel, "prompt": modelPrompt, "stream": False, "max_tokens": tokens}
|
||||
# Query the model via Ollama web API
|
||||
result = send_ollama_query(llmQuery)
|
||||
|
||||
#logger.debug(f"System: LLM Response: " + result.strip().replace('\n', ' '))
|
||||
except Exception as e:
|
||||
@@ -296,7 +466,7 @@ def llm_query(input, nodeID=0, location_name=None):
|
||||
truncateResult = send_ollama_query(truncateQuery)
|
||||
|
||||
# cleanup for message output
|
||||
response = result.strip().replace('\n', ' ')
|
||||
response = truncateResult.strip().replace('\n', ' ')
|
||||
|
||||
# done with the query, remove the user from the anti flood list
|
||||
antiFloodLLM.remove(nodeID)
|
||||
|
||||
@@ -256,6 +256,10 @@ try:
|
||||
llmModel = config['general'].get('ollamaModel', 'gemma3:270m') # default gemma3:270m
|
||||
rawLLMQuery = config['general'].getboolean('rawLLMQuery', True) #default True
|
||||
llmReplyToNonCommands = config['general'].getboolean('llmReplyToNonCommands', True) # default True
|
||||
llmUseWikiContext = config['general'].getboolean('llmUseWikiContext', False) # default False
|
||||
useOpenWebUI = config['general'].getboolean('useOpenWebUI', False) # default False
|
||||
openWebUIURL = config['general'].get('openWebUIURL', 'http://localhost:3000') # default localhost:3000
|
||||
openWebUIAPIKey = config['general'].get('openWebUIAPIKey', '') # default empty
|
||||
dont_retry_disconnect = config['general'].getboolean('dont_retry_disconnect', False) # default False, retry on disconnect
|
||||
favoriteNodeList = config['general'].get('favoriteNodeList', '').split(',')
|
||||
enableEcho = config['general'].getboolean('enableEcho', False) # default False
|
||||
|
||||
@@ -97,6 +97,24 @@ class TestBot(unittest.TestCase):
|
||||
response = send_ollama_query("Hello, Ollama!")
|
||||
self.assertIsInstance(response, str)
|
||||
|
||||
def test_extract_search_terms(self):
|
||||
from llm import extract_search_terms
|
||||
# Test with capitalized terms
|
||||
terms = extract_search_terms("What is Python programming?")
|
||||
self.assertIsInstance(terms, list)
|
||||
self.assertTrue(len(terms) > 0)
|
||||
# Test with multiple capitalized words
|
||||
terms2 = extract_search_terms("Tell me about Albert Einstein and Marie Curie")
|
||||
self.assertIsInstance(terms2, list)
|
||||
self.assertTrue(len(terms2) > 0)
|
||||
|
||||
def test_get_wiki_context(self):
|
||||
from llm import get_wiki_context
|
||||
# Test with a well-known topic
|
||||
context = get_wiki_context("Python programming language")
|
||||
self.assertIsInstance(context, str)
|
||||
# Context might be empty if wiki is disabled or fails, that's ok
|
||||
|
||||
def test_get_moon_phase(self):
|
||||
from space import get_moon
|
||||
phase = get_moon(lat, lon)
|
||||
|
||||
Reference in New Issue
Block a user