Add RAG support with Wikipedia/Kiwix and OpenWebUI integration

Co-authored-by: SpudGunMan <12676665+SpudGunMan@users.noreply.github.com>
2026-07-04 00:41:35 +02:00 · 2025-10-27 03:32:42 +00:00
parent 434fbc3eef
commit b0cf5914bf
4 changed files with 218 additions and 17 deletions
@@ -83,7 +83,16 @@ ollamaHostName = http://localhost:11434
 # If False, the LLM only replies to the "ask:" and "askai" commands.
 llmReplyToNonCommands = True
 # if True, the input is sent raw to the LLM, if False uses legacy template query
-rawLLMQuery = True 
+rawLLMQuery = True
+# Enable Wikipedia/Kiwix integration with LLM for RAG (Retrieval Augmented Generation)
+# When enabled, LLM will automatically search Wikipedia/Kiwix and include context in responses
+llmUseWikiContext = False
+# Use OpenWebUI instead of direct Ollama API (enables advanced RAG features)
+useOpenWebUI = False
+# OpenWebUI server URL (e.g., http://localhost:3000)
+openWebUIURL = http://localhost:3000
+# OpenWebUI API key/token (required when useOpenWebUI is True)
+openWebUIAPIKey = 

 # StoreForward Enabled and Limits
 StoreForward = True
@@ -3,7 +3,8 @@
 # This module is used to interact with LLM API to generate responses to user input
 # K7MHI Kelly Keeton 2024
 from modules.log import logger
-from modules.settings import llmModel, ollamaHostName, rawLLMQuery
+from modules.settings import (llmModel, ollamaHostName, rawLLMQuery, 
+                              llmUseWikiContext, useOpenWebUI, openWebUIURL, openWebUIAPIKey)

 # Ollama Client
 # https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-configure-ollama-server
@@ -17,6 +18,8 @@ if not rawLLMQuery:

 # LLM System Variables
 ollamaAPI = ollamaHostName + "/api/generate"
+openWebUIChatAPI = openWebUIURL + "/api/chat/completions"
+openWebUIOllamaProxy = openWebUIURL + "/ollama/api/generate"
 tokens = 450 # max charcters for the LLM response, this is the max length of the response also in prompts
 requestTruncation = True # if True, the LLM "will" truncate the response 

@@ -177,6 +180,120 @@ def get_google_context(input, num_results):
        googleResults = ['no other context provided']
    return googleResults

+def get_wiki_context(input):
+    """
+    Get context from Wikipedia/Kiwix for RAG enhancement
+    :param input: The user query
+    :return: Wikipedia summary or empty string if not available
+    """
+    try:
+        from modules.wiki import get_wikipedia_summary
+        # Extract potential search terms from the input
+        # Try to identify key topics/entities for Wikipedia search
+        search_terms = extract_search_terms(input)
+        
+        wiki_context = []
+        for term in search_terms[:2]:  # Limit to 2 searches to avoid excessive API calls
+            summary = get_wikipedia_summary(term)
+            if summary and "error" not in summary.lower():
+                wiki_context.append(f"Wikipedia context for '{term}': {summary}")
+        
+        return '\n'.join(wiki_context) if wiki_context else ''
+    except Exception as e:
+        logger.debug(f"System: LLM Query: Wiki context gathering failed: {e}")
+        return ''
+
+def extract_search_terms(input):
+    """
+    Extract potential search terms from user input
+    Simple implementation: look for capitalized words, proper nouns, etc.
+    :param input: The user query
+    :return: List of potential search terms
+    """
+    # Remove common command prefixes
+    for trap in trap_list_llm:
+        if input.lower().startswith(trap):
+            input = input[len(trap):].strip()
+            break
+    
+    # Simple heuristic: extract capitalized words and phrases
+    words = input.split()
+    search_terms = []
+    
+    # Look for multi-word capitalized phrases
+    temp_phrase = []
+    for word in words:
+        # Remove punctuation for checking
+        clean_word = word.strip('.,!?;:')
+        if clean_word and clean_word[0].isupper() and len(clean_word) > 2:
+            temp_phrase.append(clean_word)
+        elif temp_phrase:
+            search_terms.append(' '.join(temp_phrase))
+            temp_phrase = []
+    
+    if temp_phrase:
+        search_terms.append(' '.join(temp_phrase))
+    
+    # If no capitalized terms found, use the whole query
+    if not search_terms:
+        search_terms = [input.strip()]
+    
+    return search_terms[:3]  # Limit to 3 terms
+
+def send_openwebui_query(prompt, model=None, max_tokens=450, context=''):
+    """
+    Send query to OpenWebUI API for chat completion
+    :param prompt: The user prompt
+    :param model: Model name (optional, defaults to llmModel)
+    :param max_tokens: Max tokens for response
+    :param context: Additional context to include
+    :return: Response text or error message
+    """
+    if model is None:
+        model = llmModel
+    
+    headers = {
+        'Authorization': f'Bearer {openWebUIAPIKey}',
+        'Content-Type': 'application/json'
+    }
+    
+    messages = []
+    if context:
+        messages.append({
+            "role": "system",
+            "content": f"Use the following context to help answer questions:\n{context}"
+        })
+    
+    messages.append({
+        "role": "user",
+        "content": prompt
+    })
+    
+    data = {
+        "model": model,
+        "messages": messages,
+        "max_tokens": max_tokens,
+        "stream": False
+    }
+    
+    try:
+        result = requests.post(openWebUIChatAPI, headers=headers, json=data, timeout=10)
+        if result.status_code == 200:
+            result_json = result.json()
+            # OpenWebUI returns OpenAI-compatible format
+            if 'choices' in result_json and len(result_json['choices']) > 0:
+                response = result_json['choices'][0]['message']['content']
+                return response.strip()
+            else:
+                logger.warning(f"System: OpenWebUI API returned unexpected format")
+                return "⛔️ Response Error"
+        else:
+            logger.warning(f"System: OpenWebUI API returned status code {result.status_code}")
+            return f"⛔️ Request Error"
+    except requests.exceptions.RequestException as e:
+        logger.warning(f"System: OpenWebUI API request failed: {e}")
+        return f"⛔️ Request Error"
+
 def send_ollama_query(llmQuery):
    # Send the query to the Ollama API and return the response
    try:
@@ -222,6 +339,7 @@ def send_ollama_tooling_query(prompt, functions, model=None, max_tokens=450):
 def llm_query(input, nodeID=0, location_name=None):
    global antiFloodLLM, llmChat_history
    googleResults = []
+    wikiContext = ''

    # if this is the first initialization of the LLM the query of " " should bring meshbotAIinit OTA shouldnt reach this?
    # This is for LLM like gemma and others now?
@@ -251,13 +369,20 @@ def llm_query(input, nodeID=0, location_name=None):
    else:
        antiFloodLLM.append(nodeID)

+    # Get Wikipedia/Kiwix context if enabled (RAG)
+    if llmUseWikiContext and input != meshbotAIinit:
+        wikiContext = get_wiki_context(input)
+        if wikiContext:
+            logger.debug(f"System: Wiki-Enhanced LLM Query with context")
+
+    # Get Google context if enabled and not using raw query
    if llmContext_fromGoogle and not rawLLMQuery:
        googleResults = get_google_context(input, googleSearchResults)

    history = llmChat_history.get(nodeID, ["", ""])

-    if googleResults:
-        logger.debug(f"System: Google-Enhanced LLM Query: {input} From:{nodeID}")
+    if googleResults or wikiContext:
+        logger.debug(f"System: Context-Enhanced LLM Query: {input} From:{nodeID}")
    else:
        logger.debug(f"System: LLM Query: {input} From:{nodeID}")
    
@@ -266,19 +391,64 @@ def llm_query(input, nodeID=0, location_name=None):
    location_name += f" at the current time of {datetime.now().strftime('%Y-%m-%d %H:%M:%S %Z')}"

    try:
-        if rawLLMQuery:
-            # sanitize the input to remove tool call syntax
-            if '```' in input:
-                logger.warning("System: LLM Query: Code markdown detected, removing for raw query")
-            input = input.replace('```bash', '').replace('```python', '').replace('```', '')
-            modelPrompt = input
-        else:
-            # Build the query from the template
-            modelPrompt = meshBotAI.format(input=input, context='\n'.join(googleResults), location_name=location_name, llmModel=llmModel, history=history)
+        # Use OpenWebUI if enabled
+        if useOpenWebUI and openWebUIAPIKey:
+            logger.debug("System: Using OpenWebUI API")
            
-        llmQuery = {"model": llmModel, "prompt": modelPrompt, "stream": False, "max_tokens": tokens}
-        # Query the model via Ollama web API
-        result = send_ollama_query(llmQuery)
+            # Combine all context sources
+            combined_context = []
+            if wikiContext:
+                combined_context.append(wikiContext)
+            if googleResults:
+                combined_context.append("Google search results: " + '\n'.join(googleResults))
+            
+            context_str = '\n\n'.join(combined_context)
+            
+            # For OpenWebUI, we send a cleaner prompt
+            if rawLLMQuery:
+                result = send_openwebui_query(input, context=context_str, max_tokens=tokens)
+            else:
+                # Use the template for non-raw queries
+                modelPrompt = meshBotAI.format(
+                    input=input, 
+                    context=context_str if combined_context else 'no other context provided',
+                    location_name=location_name, 
+                    llmModel=llmModel, 
+                    history=history
+                )
+                result = send_openwebui_query(modelPrompt, max_tokens=tokens)
+        else:
+            # Use standard Ollama API
+            if rawLLMQuery:
+                # sanitize the input to remove tool call syntax
+                if '```' in input:
+                    logger.warning("System: LLM Query: Code markdown detected, removing for raw query")
+                input = input.replace('```bash', '').replace('```python', '').replace('```', '')
+                modelPrompt = input
+                
+                # Add wiki context to raw queries if available
+                if wikiContext:
+                    modelPrompt = f"Context:\n{wikiContext}\n\nQuestion: {input}"
+            else:
+                # Build the query from the template
+                all_context = []
+                if wikiContext:
+                    all_context.append(wikiContext)
+                if googleResults:
+                    all_context.extend(googleResults)
+                
+                context_text = '\n'.join(all_context) if all_context else 'no other context provided'
+                modelPrompt = meshBotAI.format(
+                    input=input, 
+                    context=context_text,
+                    location_name=location_name, 
+                    llmModel=llmModel, 
+                    history=history
+                )
+                
+            llmQuery = {"model": llmModel, "prompt": modelPrompt, "stream": False, "max_tokens": tokens}
+            # Query the model via Ollama web API
+            result = send_ollama_query(llmQuery)

        #logger.debug(f"System: LLM Response: " + result.strip().replace('\n', ' '))
    except Exception as e:
@@ -296,7 +466,7 @@ def llm_query(input, nodeID=0, location_name=None):
        truncateResult = send_ollama_query(truncateQuery)

        # cleanup for message output
-        response = result.strip().replace('\n', ' ')
+        response = truncateResult.strip().replace('\n', ' ')

    # done with the query, remove the user from the anti flood list
    antiFloodLLM.remove(nodeID)
@@ -256,6 +256,10 @@ try:
    llmModel = config['general'].get('ollamaModel', 'gemma3:270m') # default gemma3:270m
    rawLLMQuery = config['general'].getboolean('rawLLMQuery', True) #default True
    llmReplyToNonCommands = config['general'].getboolean('llmReplyToNonCommands', True) # default True
+    llmUseWikiContext = config['general'].getboolean('llmUseWikiContext', False) # default False
+    useOpenWebUI = config['general'].getboolean('useOpenWebUI', False) # default False
+    openWebUIURL = config['general'].get('openWebUIURL', 'http://localhost:3000') # default localhost:3000
+    openWebUIAPIKey = config['general'].get('openWebUIAPIKey', '') # default empty
    dont_retry_disconnect = config['general'].getboolean('dont_retry_disconnect', False) # default False, retry on disconnect
    favoriteNodeList = config['general'].get('favoriteNodeList', '').split(',')
    enableEcho = config['general'].getboolean('enableEcho', False) # default False
@@ -97,6 +97,24 @@ class TestBot(unittest.TestCase):
        response = send_ollama_query("Hello, Ollama!")
        self.assertIsInstance(response, str)

+    def test_extract_search_terms(self):
+        from llm import extract_search_terms
+        # Test with capitalized terms
+        terms = extract_search_terms("What is Python programming?")
+        self.assertIsInstance(terms, list)
+        self.assertTrue(len(terms) > 0)
+        # Test with multiple capitalized words
+        terms2 = extract_search_terms("Tell me about Albert Einstein and Marie Curie")
+        self.assertIsInstance(terms2, list)
+        self.assertTrue(len(terms2) > 0)
+
+    def test_get_wiki_context(self):
+        from llm import get_wiki_context
+        # Test with a well-known topic
+        context = get_wiki_context("Python programming language")
+        self.assertIsInstance(context, str)
+        # Context might be empty if wiki is disabled or fails, that's ok
+
    def test_get_moon_phase(self):
        from space import get_moon
        phase = get_moon(lat, lon)