Add RAG support with Wikipedia/Kiwix and OpenWebUI integration

Co-authored-by: SpudGunMan <12676665+SpudGunMan@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2025-10-27 03:32:42 +00:00
parent 434fbc3eef
commit b0cf5914bf
4 changed files with 218 additions and 17 deletions
+10 -1
View File
@@ -83,7 +83,16 @@ ollamaHostName = http://localhost:11434
# If False, the LLM only replies to the "ask:" and "askai" commands.
llmReplyToNonCommands = True
# if True, the input is sent raw to the LLM, if False uses legacy template query
rawLLMQuery = True
rawLLMQuery = True
# Enable Wikipedia/Kiwix integration with LLM for RAG (Retrieval Augmented Generation)
# When enabled, LLM will automatically search Wikipedia/Kiwix and include context in responses
llmUseWikiContext = False
# Use OpenWebUI instead of direct Ollama API (enables advanced RAG features)
useOpenWebUI = False
# OpenWebUI server URL (e.g., http://localhost:3000)
openWebUIURL = http://localhost:3000
# OpenWebUI API key/token (required when useOpenWebUI is True)
openWebUIAPIKey =
# StoreForward Enabled and Limits
StoreForward = True
+186 -16
View File
@@ -3,7 +3,8 @@
# This module is used to interact with LLM API to generate responses to user input
# K7MHI Kelly Keeton 2024
from modules.log import logger
from modules.settings import llmModel, ollamaHostName, rawLLMQuery
from modules.settings import (llmModel, ollamaHostName, rawLLMQuery,
llmUseWikiContext, useOpenWebUI, openWebUIURL, openWebUIAPIKey)
# Ollama Client
# https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-configure-ollama-server
@@ -17,6 +18,8 @@ if not rawLLMQuery:
# LLM System Variables
ollamaAPI = ollamaHostName + "/api/generate"
openWebUIChatAPI = openWebUIURL + "/api/chat/completions"
openWebUIOllamaProxy = openWebUIURL + "/ollama/api/generate"
tokens = 450 # max charcters for the LLM response, this is the max length of the response also in prompts
requestTruncation = True # if True, the LLM "will" truncate the response
@@ -177,6 +180,120 @@ def get_google_context(input, num_results):
googleResults = ['no other context provided']
return googleResults
def get_wiki_context(input):
"""
Get context from Wikipedia/Kiwix for RAG enhancement
:param input: The user query
:return: Wikipedia summary or empty string if not available
"""
try:
from modules.wiki import get_wikipedia_summary
# Extract potential search terms from the input
# Try to identify key topics/entities for Wikipedia search
search_terms = extract_search_terms(input)
wiki_context = []
for term in search_terms[:2]: # Limit to 2 searches to avoid excessive API calls
summary = get_wikipedia_summary(term)
if summary and "error" not in summary.lower():
wiki_context.append(f"Wikipedia context for '{term}': {summary}")
return '\n'.join(wiki_context) if wiki_context else ''
except Exception as e:
logger.debug(f"System: LLM Query: Wiki context gathering failed: {e}")
return ''
def extract_search_terms(input):
"""
Extract potential search terms from user input
Simple implementation: look for capitalized words, proper nouns, etc.
:param input: The user query
:return: List of potential search terms
"""
# Remove common command prefixes
for trap in trap_list_llm:
if input.lower().startswith(trap):
input = input[len(trap):].strip()
break
# Simple heuristic: extract capitalized words and phrases
words = input.split()
search_terms = []
# Look for multi-word capitalized phrases
temp_phrase = []
for word in words:
# Remove punctuation for checking
clean_word = word.strip('.,!?;:')
if clean_word and clean_word[0].isupper() and len(clean_word) > 2:
temp_phrase.append(clean_word)
elif temp_phrase:
search_terms.append(' '.join(temp_phrase))
temp_phrase = []
if temp_phrase:
search_terms.append(' '.join(temp_phrase))
# If no capitalized terms found, use the whole query
if not search_terms:
search_terms = [input.strip()]
return search_terms[:3] # Limit to 3 terms
def send_openwebui_query(prompt, model=None, max_tokens=450, context=''):
"""
Send query to OpenWebUI API for chat completion
:param prompt: The user prompt
:param model: Model name (optional, defaults to llmModel)
:param max_tokens: Max tokens for response
:param context: Additional context to include
:return: Response text or error message
"""
if model is None:
model = llmModel
headers = {
'Authorization': f'Bearer {openWebUIAPIKey}',
'Content-Type': 'application/json'
}
messages = []
if context:
messages.append({
"role": "system",
"content": f"Use the following context to help answer questions:\n{context}"
})
messages.append({
"role": "user",
"content": prompt
})
data = {
"model": model,
"messages": messages,
"max_tokens": max_tokens,
"stream": False
}
try:
result = requests.post(openWebUIChatAPI, headers=headers, json=data, timeout=10)
if result.status_code == 200:
result_json = result.json()
# OpenWebUI returns OpenAI-compatible format
if 'choices' in result_json and len(result_json['choices']) > 0:
response = result_json['choices'][0]['message']['content']
return response.strip()
else:
logger.warning(f"System: OpenWebUI API returned unexpected format")
return "⛔️ Response Error"
else:
logger.warning(f"System: OpenWebUI API returned status code {result.status_code}")
return f"⛔️ Request Error"
except requests.exceptions.RequestException as e:
logger.warning(f"System: OpenWebUI API request failed: {e}")
return f"⛔️ Request Error"
def send_ollama_query(llmQuery):
# Send the query to the Ollama API and return the response
try:
@@ -222,6 +339,7 @@ def send_ollama_tooling_query(prompt, functions, model=None, max_tokens=450):
def llm_query(input, nodeID=0, location_name=None):
global antiFloodLLM, llmChat_history
googleResults = []
wikiContext = ''
# if this is the first initialization of the LLM the query of " " should bring meshbotAIinit OTA shouldnt reach this?
# This is for LLM like gemma and others now?
@@ -251,13 +369,20 @@ def llm_query(input, nodeID=0, location_name=None):
else:
antiFloodLLM.append(nodeID)
# Get Wikipedia/Kiwix context if enabled (RAG)
if llmUseWikiContext and input != meshbotAIinit:
wikiContext = get_wiki_context(input)
if wikiContext:
logger.debug(f"System: Wiki-Enhanced LLM Query with context")
# Get Google context if enabled and not using raw query
if llmContext_fromGoogle and not rawLLMQuery:
googleResults = get_google_context(input, googleSearchResults)
history = llmChat_history.get(nodeID, ["", ""])
if googleResults:
logger.debug(f"System: Google-Enhanced LLM Query: {input} From:{nodeID}")
if googleResults or wikiContext:
logger.debug(f"System: Context-Enhanced LLM Query: {input} From:{nodeID}")
else:
logger.debug(f"System: LLM Query: {input} From:{nodeID}")
@@ -266,19 +391,64 @@ def llm_query(input, nodeID=0, location_name=None):
location_name += f" at the current time of {datetime.now().strftime('%Y-%m-%d %H:%M:%S %Z')}"
try:
if rawLLMQuery:
# sanitize the input to remove tool call syntax
if '```' in input:
logger.warning("System: LLM Query: Code markdown detected, removing for raw query")
input = input.replace('```bash', '').replace('```python', '').replace('```', '')
modelPrompt = input
else:
# Build the query from the template
modelPrompt = meshBotAI.format(input=input, context='\n'.join(googleResults), location_name=location_name, llmModel=llmModel, history=history)
# Use OpenWebUI if enabled
if useOpenWebUI and openWebUIAPIKey:
logger.debug("System: Using OpenWebUI API")
llmQuery = {"model": llmModel, "prompt": modelPrompt, "stream": False, "max_tokens": tokens}
# Query the model via Ollama web API
result = send_ollama_query(llmQuery)
# Combine all context sources
combined_context = []
if wikiContext:
combined_context.append(wikiContext)
if googleResults:
combined_context.append("Google search results: " + '\n'.join(googleResults))
context_str = '\n\n'.join(combined_context)
# For OpenWebUI, we send a cleaner prompt
if rawLLMQuery:
result = send_openwebui_query(input, context=context_str, max_tokens=tokens)
else:
# Use the template for non-raw queries
modelPrompt = meshBotAI.format(
input=input,
context=context_str if combined_context else 'no other context provided',
location_name=location_name,
llmModel=llmModel,
history=history
)
result = send_openwebui_query(modelPrompt, max_tokens=tokens)
else:
# Use standard Ollama API
if rawLLMQuery:
# sanitize the input to remove tool call syntax
if '```' in input:
logger.warning("System: LLM Query: Code markdown detected, removing for raw query")
input = input.replace('```bash', '').replace('```python', '').replace('```', '')
modelPrompt = input
# Add wiki context to raw queries if available
if wikiContext:
modelPrompt = f"Context:\n{wikiContext}\n\nQuestion: {input}"
else:
# Build the query from the template
all_context = []
if wikiContext:
all_context.append(wikiContext)
if googleResults:
all_context.extend(googleResults)
context_text = '\n'.join(all_context) if all_context else 'no other context provided'
modelPrompt = meshBotAI.format(
input=input,
context=context_text,
location_name=location_name,
llmModel=llmModel,
history=history
)
llmQuery = {"model": llmModel, "prompt": modelPrompt, "stream": False, "max_tokens": tokens}
# Query the model via Ollama web API
result = send_ollama_query(llmQuery)
#logger.debug(f"System: LLM Response: " + result.strip().replace('\n', ' '))
except Exception as e:
@@ -296,7 +466,7 @@ def llm_query(input, nodeID=0, location_name=None):
truncateResult = send_ollama_query(truncateQuery)
# cleanup for message output
response = result.strip().replace('\n', ' ')
response = truncateResult.strip().replace('\n', ' ')
# done with the query, remove the user from the anti flood list
antiFloodLLM.remove(nodeID)
+4
View File
@@ -256,6 +256,10 @@ try:
llmModel = config['general'].get('ollamaModel', 'gemma3:270m') # default gemma3:270m
rawLLMQuery = config['general'].getboolean('rawLLMQuery', True) #default True
llmReplyToNonCommands = config['general'].getboolean('llmReplyToNonCommands', True) # default True
llmUseWikiContext = config['general'].getboolean('llmUseWikiContext', False) # default False
useOpenWebUI = config['general'].getboolean('useOpenWebUI', False) # default False
openWebUIURL = config['general'].get('openWebUIURL', 'http://localhost:3000') # default localhost:3000
openWebUIAPIKey = config['general'].get('openWebUIAPIKey', '') # default empty
dont_retry_disconnect = config['general'].getboolean('dont_retry_disconnect', False) # default False, retry on disconnect
favoriteNodeList = config['general'].get('favoriteNodeList', '').split(',')
enableEcho = config['general'].getboolean('enableEcho', False) # default False
+18
View File
@@ -97,6 +97,24 @@ class TestBot(unittest.TestCase):
response = send_ollama_query("Hello, Ollama!")
self.assertIsInstance(response, str)
def test_extract_search_terms(self):
from llm import extract_search_terms
# Test with capitalized terms
terms = extract_search_terms("What is Python programming?")
self.assertIsInstance(terms, list)
self.assertTrue(len(terms) > 0)
# Test with multiple capitalized words
terms2 = extract_search_terms("Tell me about Albert Einstein and Marie Curie")
self.assertIsInstance(terms2, list)
self.assertTrue(len(terms2) > 0)
def test_get_wiki_context(self):
from llm import get_wiki_context
# Test with a well-known topic
context = get_wiki_context("Python programming language")
self.assertIsInstance(context, str)
# Context might be empty if wiki is disabled or fails, that's ok
def test_get_moon_phase(self):
from space import get_moon
phase = get_moon(lat, lon)