mirror of
https://github.com/SpudGunMan/meshing-around.git
synced 2026-03-28 17:32:36 +01:00
Compare commits
31 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6f16fc6afb | ||
|
|
fd971d8cc5 | ||
|
|
96193a22e8 | ||
|
|
02b0cde1c8 | ||
|
|
40f4de02d9 | ||
|
|
0b1d626f09 | ||
|
|
964883cae9 | ||
|
|
6ab1102d07 | ||
|
|
c8d8880806 | ||
|
|
21c2f7df18 | ||
|
|
cb51cf921b | ||
|
|
908e84e155 | ||
|
|
b9eaf7deb0 | ||
|
|
128ac456eb | ||
|
|
1269214264 | ||
|
|
4daf087fa5 | ||
|
|
9282c63206 | ||
|
|
710342447f | ||
|
|
8e2c3a43fb | ||
|
|
8d82823ccc | ||
|
|
27789d7508 | ||
|
|
680ba98a1c | ||
|
|
4d71a64971 | ||
|
|
d608754b5e | ||
|
|
70ab741746 | ||
|
|
b0cf5914bf | ||
|
|
434fbc3eef | ||
|
|
1186801d7e | ||
|
|
902d764ca0 | ||
|
|
00fd29e679 | ||
|
|
163920b399 |
10
.github/workflows/docker-image.yml
vendored
10
.github/workflows/docker-image.yml
vendored
@@ -25,10 +25,10 @@ jobs:
|
||||
#
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v5
|
||||
# Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
|
||||
uses: docker/login-action@28fdb31ff34708d19615a74d67103ddc2ea9725c
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
@@ -36,7 +36,7 @@ jobs:
|
||||
# This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels.
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
|
||||
uses: docker/metadata-action@032a4b3bda1b716928481836ac5bfe36e1feaad6
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
# This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages.
|
||||
@@ -44,7 +44,7 @@ jobs:
|
||||
# It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step.
|
||||
- name: Build and push Docker image
|
||||
id: push
|
||||
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
|
||||
uses: docker/build-push-action@9e436ba9f2d7bcd1d038c8e55d039d37896ddc5d
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
@@ -53,7 +53,7 @@ jobs:
|
||||
|
||||
# This step generates an artifact attestation for the image, which is an unforgeable statement about where and how it was built. It increases supply chain security for people who consume the image. For more information, see [Using artifact attestations to establish provenance for builds](/actions/security-guides/using-artifact-attestations-to-establish-provenance-for-builds).
|
||||
- name: Generate artifact attestation
|
||||
uses: actions/attest-build-provenance@v2
|
||||
uses: actions/attest-build-provenance@v3
|
||||
with:
|
||||
subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}}
|
||||
subject-digest: ${{ steps.push.outputs.digest }}
|
||||
|
||||
@@ -42,7 +42,7 @@ Mesh Bot is a feature-rich Python bot designed to enhance your [Meshtastic](http
|
||||
### Interactive AI and Data Lookup
|
||||
- **Weather, Earthquake, River, and Tide Data**: Get local alerts and info from NOAA/USGS; uses Open-Meteo for areas outside NOAA coverage.
|
||||
- **Wikipedia Search**: Retrieve summaries from Wikipedia.
|
||||
- **Ollama LLM Integration**: Query the [Ollama](https://github.com/ollama/ollama/tree/main/docs) AI for advanced responses.
|
||||
- **OpenWebUI, Ollama LLM Integration**: Query the [Ollama](https://github.com/ollama/ollama/tree/main/docs) AI for advanced responses. Supports RAG (Retrieval Augmented Generation) with Wikipedia/Kiwix context and [OpenWebUI](https://github.com/open-webui/open-webui) integration for enhanced AI capabilities. [LLM Readme](modules/llm.md)
|
||||
- **Satellite Passes**: Find upcoming satellite passes for your location.
|
||||
- **GeoMeasuring Tools**: Calculate distances and midpoints using collected GPS data; supports Fox & Hound direction finding.
|
||||
|
||||
|
||||
@@ -75,15 +75,28 @@ kiwixLibraryName = wikipedia_en_100_nopic_2025-09
|
||||
|
||||
# Enable ollama LLM see more at https://ollama.com
|
||||
ollama = False
|
||||
# Ollama model to use (defaults to gemma3:270m)
|
||||
# Ollama model to use (defaults to gemma3:270m) gemma2 is good for older SYSTEM prompt
|
||||
# ollamaModel = gemma3:latest
|
||||
# ollamaModel = gemma2:2b
|
||||
# server instance to use (defaults to local machine install)
|
||||
ollamaHostName = http://localhost:11434
|
||||
|
||||
# Produce LLM replies to messages that aren't commands?
|
||||
# If False, the LLM only replies to the "ask:" and "askai" commands.
|
||||
llmReplyToNonCommands = True
|
||||
# if True, the input is sent raw to the LLM, if False uses legacy template query
|
||||
rawLLMQuery = True
|
||||
# if True, the input is sent raw to the LLM, if False uses SYSTEM prompt
|
||||
rawLLMQuery = True
|
||||
|
||||
# Enable Wikipedia/Kiwix integration with LLM for RAG (Retrieval Augmented Generation)
|
||||
# When enabled, LLM will automatically search Wikipedia/Kiwix and include context in responses
|
||||
llmUseWikiContext = False
|
||||
|
||||
# Use OpenWebUI instead of direct Ollama API (enables advanced RAG features)
|
||||
useOpenWebUI = False
|
||||
# OpenWebUI server URL (e.g., http://localhost:3000)
|
||||
openWebUIURL = http://localhost:3000
|
||||
# OpenWebUI API key/token (required when useOpenWebUI is True)
|
||||
openWebUIAPIKey =
|
||||
|
||||
# StoreForward Enabled and Limits
|
||||
StoreForward = True
|
||||
|
||||
@@ -5,54 +5,65 @@ from modules.system import send_message
|
||||
|
||||
def setup_custom_schedules(send_message, tell_joke, welcome_message, handle_wxc, MOTD, schedulerChannel, schedulerInterface):
|
||||
"""
|
||||
Set up all custom schedules. Edit this function to add or remove scheduled tasks.
|
||||
Set up custom schedules. Edit the example schedules as needed.
|
||||
|
||||
1. in config.ini set "value" under [scheduler] to: value = custom
|
||||
2. edit this file to add/remove/modify schedules
|
||||
3. restart mesh bot
|
||||
4. verify schedules are working by checking the log file
|
||||
5. Make sure to uncomment the example schedules below to enable them
|
||||
"""
|
||||
try:
|
||||
# Example task functions, modify as needed the channel and interface parameters default to schedulerChannel and schedulerInterface
|
||||
def send_joke(channel, interface):
|
||||
# uses system.send_message to send the result of tell_joke()
|
||||
send_message(tell_joke(), channel, 0, interface)
|
||||
|
||||
### Example schedules
|
||||
# Send a joke every 2 minutes
|
||||
#schedule.every(2).minutes.do(send_joke, send_message, tell_joke, schedulerChannel, schedulerInterface)
|
||||
# Send a good morning message every day at 9 AM
|
||||
#schedule.every().day.at("09:00").do(send_good_morning, send_message, schedulerChannel, schedulerInterface)
|
||||
# Send weather update every day at 8 AM
|
||||
#schedule.every().day.at("08:00").do(send_wx, send_message, handle_wxc, schedulerChannel, schedulerInterface)
|
||||
# Send weather alerts every Wednesday at noon
|
||||
#schedule.every().wednesday.at("12:00").do(send_weather_alert, send_message, schedulerChannel, schedulerInterface)
|
||||
# Send configuration URL every 2 days at 10 AM
|
||||
#schedule.every(2).days.at("10:00").do(send_config_url, send_message, schedulerChannel, schedulerInterface)
|
||||
# Send net starting message every Wednesday at 7 PM
|
||||
#schedule.every().wednesday.at("19:00").do(send_net_starting, send_message, schedulerChannel, schedulerInterface)
|
||||
# Send welcome message every 2 days at 8 AM
|
||||
#schedule.every(2).days.at("08:00").do(send_welcome, send_message, schedulerChannel, schedulerInterface)
|
||||
# Send MOTD every day at 1 PM
|
||||
#schedule.every().day.at("13:00").do(send_motd, send_message, MOTD, schedulerChannel, schedulerInterface)
|
||||
# Send bbslink message every 2 days at 10 AM
|
||||
#schedule.every(2).days.at("10:00").do(send_message("bbslink MeshBot looking for peers", schedulerChannel, 0, schedulerInterface))
|
||||
def send_good_morning(channel, interface):
|
||||
# uses system.send_message to send "Good Morning"
|
||||
send_message("Good Morning", channel, 0, interface)
|
||||
|
||||
# Example task functions, modify as needed the channel and interface parameters default to schedulerChannel and schedulerInterface
|
||||
def send_wx(channel, interface):
|
||||
# uses system.send_message to send the result of handle_wxc(id,id,cmd,days_returned)
|
||||
send_message(handle_wxc(0, 1, 'wx', days=1), channel, 0, interface)
|
||||
|
||||
def send_joke(send_message, tell_joke, channel, interface):
|
||||
send_message(tell_joke(), channel, 0, interface)
|
||||
def send_weather_alert(channel, interface):
|
||||
# uses system.send_message to send string
|
||||
send_message("Weather alerts available on 'Alerts' channel with default 'AQ==' key.", channel, 0, interface)
|
||||
|
||||
def send_good_morning(send_message, channel, interface):
|
||||
send_message("Good Morning", channel, 0, interface)
|
||||
def send_config_url(channel, interface):
|
||||
# uses system.send_message to send string
|
||||
send_message("Join us on Medium Fast https://meshtastic.org/e/#CgcSAQE6AggNEg4IARAEOAFAA0gBUB5oAQ", channel, 0, interface)
|
||||
|
||||
def send_wx(send_message, handle_wxc, channel, interface):
|
||||
send_message(handle_wxc(0, 1, 'wx', days=1), channel, 0, interface)
|
||||
def send_net_starting(channel, interface):
|
||||
# uses system.send_message to send string, channel 2, interface 3
|
||||
send_message("Net Starting Now", 2, 0, 3)
|
||||
|
||||
def send_weather_alert(send_message, channel, interface):
|
||||
send_message("Weather alerts available on 'Alerts' channel with default 'AQ==' key.", channel, 0, interface)
|
||||
def send_welcome(channel, interface):
|
||||
# uses system.send_message to send string, channel 2, interface 1
|
||||
send_message("Welcome to the group", 2, 0, 1)
|
||||
|
||||
def send_config_url(send_message, channel, interface):
|
||||
send_message("Join us on Medium Fast https://meshtastic.org/e/#CgcSAQE6AggNEg4IARAEOAFAA0gBUB5oAQ", channel, 0, interface)
|
||||
def send_motd(channel, interface):
|
||||
send_message(MOTD, channel, 0, interface)
|
||||
|
||||
def send_net_starting(send_message, channel, interface):
|
||||
send_message("Net Starting Now", channel, 0, interface)
|
||||
### Send a joke every 2 minutes
|
||||
#schedule.every(2).minutes.do(lambda: send_joke(schedulerChannel, schedulerInterface))
|
||||
### Send a good morning message every day at 9 AM
|
||||
#schedule.every().day.at("09:00").do(lambda: send_good_morning(schedulerChannel, schedulerInterface))
|
||||
### Send weather update every day at 8 AM
|
||||
#schedule.every().day.at("08:00").do(lambda: send_wx(schedulerChannel, schedulerInterface))
|
||||
### Send weather alerts every Wednesday at noon
|
||||
#schedule.every().wednesday.at("12:00").do(lambda: send_weather_alert(schedulerChannel, schedulerInterface))
|
||||
### Send configuration URL every 2 days at 10 AM
|
||||
#schedule.every(2).days.at("10:00").do(lambda: send_config_url(schedulerChannel, schedulerInterface))
|
||||
### Send net starting message every Wednesday at 7 PM
|
||||
#schedule.every().wednesday.at("19:00").do(lambda: send_net_starting(schedulerChannel, schedulerInterface))
|
||||
### Send welcome message every 2 days at 8 AM
|
||||
#schedule.every(2).days.at("08:00").do(lambda: send_welcome(schedulerChannel, schedulerInterface))
|
||||
### Send MOTD every day at 1 PM
|
||||
#schedule.every().day.at("13:00").do(lambda: send_motd(schedulerChannel, schedulerInterface))
|
||||
### Send bbslink message every 2 days at 10 AM
|
||||
#schedule.every(2).days.at("10:00").do(lambda: send_message("bbslink MeshBot looking for peers", schedulerChannel, 0, schedulerInterface))
|
||||
|
||||
def send_welcome(send_message, channel, interface):
|
||||
send_message("Welcome to the group", channel, 0, interface)
|
||||
|
||||
def send_motd(send_message, MOTD, channel, interface):
|
||||
send_message(MOTD, channel, 0, interface)
|
||||
|
||||
def send_bbslink(send_message, channel, interface):
|
||||
send_message("bbslink MeshBot looking for peers", channel, 0, interface)
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting up custom schedules: {e}")
|
||||
28
mesh_bot.py
28
mesh_bot.py
@@ -378,9 +378,14 @@ def handle_echo(message, message_from_id, deviceID, isDM, channel_number):
|
||||
#send_raw_bytes echo the data to the channel with synch word:
|
||||
port_num = 256
|
||||
synch_word = b"echo:"
|
||||
message = message.split("echo ")[1]
|
||||
raw_bytes = synch_word + message.encode('utf-8')
|
||||
send_raw_bytes(message_from_id, raw_bytes, nodeInt=deviceID, channel=channel_number, portnum=port_num)
|
||||
parts = message.split("echo ", 1)
|
||||
if len(parts) > 1 and parts[1].strip() != "":
|
||||
msg_to_echo = parts[1]
|
||||
raw_bytes = synch_word + msg_to_echo.encode('utf-8')
|
||||
send_raw_bytes(message_from_id, raw_bytes, nodeInt=deviceID, channel=channel_number, portnum=port_num)
|
||||
return f"Sent binary echo message to {message_from_id} to {port_num} on channel {channel_number} device {deviceID}"
|
||||
else:
|
||||
return "Please provide a message to echo back to you. Example:echo Hello World"
|
||||
except Exception as e:
|
||||
logger.error(f"System: Echo Exception {e}")
|
||||
return f"Sent binary echo message to {message_from_id} to {port_num} on channel {channel_number} device {deviceID}"
|
||||
@@ -1486,10 +1491,21 @@ def handle_boot(mesh=True):
|
||||
f"{get_name_from_number(myNodeNum, 'short', i)}. NodeID: {myNodeNum}, {decimal_to_hex(myNodeNum)}")
|
||||
|
||||
if llm_enabled:
|
||||
logger.debug(f"System: Ollama LLM Enabled, loading model {my_settings.llmModel} please wait")
|
||||
llmLoad = llm_query(" ")
|
||||
msg = f"System: LLM Enabled"
|
||||
llmLoad = llm_query(" ", init=True)
|
||||
if "trouble" not in llmLoad:
|
||||
logger.debug(f"System: LLM Model {my_settings.llmModel} loaded")
|
||||
if my_settings.llmReplyToNonCommands:
|
||||
msg += " | Reply to DM's Enabled"
|
||||
if my_settings.llmUseWikiContext:
|
||||
wiki_source = "Kiwixpedia" if my_settings.use_kiwix_server else "Wikipedia"
|
||||
msg += f" | {wiki_source} Context Enabled"
|
||||
if my_settings.useOpenWebUI:
|
||||
msg += " | OpenWebUI API Enabled"
|
||||
else:
|
||||
msg += f" | Ollama API Model {my_settings.llmModel} loaded. Use {'RAW' if my_settings.rawLLMQuery else 'SYSTEM'} prompt mode."
|
||||
logger.debug(msg)
|
||||
else:
|
||||
logger.debug(f"System: Bad response from LLM: {llmLoad}")
|
||||
|
||||
if my_settings.bbs_enabled:
|
||||
logger.debug(f"System: BBS Enabled, {bbsdb} has {len(bbs_messages)} messages. Direct Mail Messages waiting: {(len(bbs_dm) - 1)}")
|
||||
|
||||
@@ -254,6 +254,8 @@ Enable and configure VOX features in the `[vox]` section of `config.ini`.
|
||||
|
||||
Configure in `[ollama]` section of `config.ini`.
|
||||
|
||||
More at [LLM Readme](llm.md)
|
||||
|
||||
---
|
||||
|
||||
## Wikipedia Search
|
||||
@@ -762,29 +764,6 @@ enabled = True
|
||||
repeater_channels = [2, 3]
|
||||
```
|
||||
|
||||
### Ollama (LLM/AI) Settings
|
||||
For Ollama to work, the command line `ollama run 'model'` needs to work properly. Ensure you have enough RAM and your GPU is working as expected. The default model for this project is set to `gemma3:270m`. Ollama can be remote [Ollama Server](https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-configure-ollama-server) works on a pi58GB with 40 second or less response time.
|
||||
|
||||
```ini
|
||||
# Enable ollama LLM see more at https://ollama.com
|
||||
ollama = True # Ollama model to use (defaults to gemma2:2b)
|
||||
ollamaModel = gemma3:latest # Ollama model to use (defaults to gemma3:270m)
|
||||
ollamaHostName = http://localhost:11434 # server instance to use (defaults to local machine install)
|
||||
```
|
||||
|
||||
Also see `llm.py` for changing the defaults of:
|
||||
|
||||
```ini
|
||||
# LLM System Variables
|
||||
rawQuery = True # if True, the input is sent raw to the LLM if False, it is processed by the meshBotAI template
|
||||
|
||||
# Used in the meshBotAI template (legacy)
|
||||
llmEnableHistory = True # enable history for the LLM model to use in responses adds to compute time
|
||||
llmContext_fromGoogle = True # enable context from google search results helps with responses accuracy
|
||||
googleSearchResults = 3 # number of google search results to include in the context more results = more compute time
|
||||
```
|
||||
Note for LLM in docker with [NVIDIA](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html). Needed for the container with ollama running.
|
||||
|
||||
### Wikipedia Search Settings
|
||||
The Wikipedia search module can use either the online Wikipedia API or a local Kiwix server for offline wiki access. Kiwix is especially useful for mesh networks operating in remote or offline environments.
|
||||
|
||||
@@ -808,7 +787,7 @@ To set up a local Kiwix server:
|
||||
1. Install Kiwix tools: https://kiwix.org/en/ `sudo apt install kiwix-tools -y`
|
||||
2. Download a Wikipedia ZIM file to `data/`: https://library.kiwix.org/ `wget https://download.kiwix.org/zim/wikipedia/wikipedia_en_100_nopic_2025-09.zim`
|
||||
3. Run the server: `kiwix-serve --port 8080 wikipedia_en_100_nopic_2025-09.zim`
|
||||
4. Set `useKiwixServer = True` in your config.ini
|
||||
4. Set `useKiwixServer = True` in your config.ini with `wikipedia = True`
|
||||
|
||||
The bot will automatically extract and truncate content to fit Meshtastic's message size limits (~500 characters).
|
||||
|
||||
|
||||
@@ -10,8 +10,8 @@ import bs4 as bs # pip install beautifulsoup4
|
||||
from modules.log import logger
|
||||
from modules.settings import urlTimeoutSeconds, NO_ALERTS, myRegionalKeysDE
|
||||
|
||||
trap_list_location_eu = ("ukalert")
|
||||
trap_list_location_de = ("dealert")
|
||||
trap_list_location_eu = ("ukalert",)
|
||||
trap_list_location_de = ("dealert",)
|
||||
|
||||
def get_govUK_alerts(lat, lon):
|
||||
try:
|
||||
|
||||
64
modules/llm.md
Normal file
64
modules/llm.md
Normal file
@@ -0,0 +1,64 @@
|
||||
# How do I use this thing?
|
||||
This is not a full turnkey setup yet?
|
||||
|
||||
|
||||
For Ollama to work, the command line `ollama run 'model'` needs to work properly. Ensure you have enough RAM and your GPU is working as expected. The default model for this project is set to `gemma3:270m`. Ollama can be remote [Ollama Server](https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-configure-ollama-server) works on a pi58GB with 40 second or less response time.
|
||||
|
||||
|
||||
# Ollama local
|
||||
```bash
|
||||
# bash
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
# docker
|
||||
docker run -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -e OLLAMA_API_BASE_URL=http://host.docker.internal:11434 open-webui/open-webui
|
||||
```
|
||||
|
||||
```ini
|
||||
#service file addition
|
||||
# https://github.com/ollama/ollama/issues/703
|
||||
[Service]
|
||||
Environment="OLLAMA_HOST=0.0.0.0:11434"
|
||||
```
|
||||
## validation
|
||||
http://IP::11434
|
||||
`Ollama is running`
|
||||
|
||||
## Docs
|
||||
Note for LLM in docker with [NVIDIA](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html). Needed for the container with ollama running?
|
||||
|
||||
---
|
||||
|
||||
# OpenWebUI (docker)
|
||||
```bash
|
||||
## ollama in docker
|
||||
docker run -d -p 3000:8080 --gpus all -v open-webui:/app/backend/data --name open-webui ghcr.io/open-webui/open-webui:cuda
|
||||
|
||||
## external ollama
|
||||
docker run -d -p 3000:8080 -e OLLAMA_BASE_URL=https://IP:11434 -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
|
||||
```
|
||||
wait for engine to build, update the config.ini for the bot
|
||||
|
||||
```ini
|
||||
# Use OpenWebUI instead of direct Ollama API (enables advanced RAG features)
|
||||
useOpenWebUI = False
|
||||
# OpenWebUI server URL (e.g., http://localhost:3000)
|
||||
openWebUIURL = http://localhost:3000
|
||||
# OpenWebUI API key/token (required when useOpenWebUI is True)
|
||||
openWebUIAPIKey = sk-xxxx (see below for help)
|
||||
```
|
||||
|
||||
## Validation
|
||||
http://IP:3000
|
||||
make a new admin user.
|
||||
validate you have models imported or that the system is working for query.
|
||||
make a new user for the bot
|
||||
|
||||
## API Key
|
||||
- upper right settings for the user
|
||||
- settings -> account
|
||||
- get/create the API key for the user
|
||||
|
||||
## Docs
|
||||
set api endpoint [OpenWebUI API](https://docs.openwebui.com/getting-started/api-endpoints)
|
||||
|
||||
---
|
||||
349
modules/llm.py
349
modules/llm.py
@@ -3,30 +3,28 @@
|
||||
# This module is used to interact with LLM API to generate responses to user input
|
||||
# K7MHI Kelly Keeton 2024
|
||||
from modules.log import logger
|
||||
from modules.settings import llmModel, ollamaHostName, rawLLMQuery
|
||||
from modules.settings import (llmModel, ollamaHostName, rawLLMQuery,
|
||||
llmUseWikiContext, useOpenWebUI, openWebUIURL, openWebUIAPIKey, cmdBang, urlTimeoutSeconds, use_kiwix_server)
|
||||
|
||||
# Ollama Client
|
||||
# https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-configure-ollama-server
|
||||
import requests
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
if not rawLLMQuery:
|
||||
# this may be removed in the future
|
||||
from googlesearch import search # pip install googlesearch-python
|
||||
if llmUseWikiContext or use_kiwix_server:
|
||||
from modules.wiki import get_wikipedia_summary, get_kiwix_summary
|
||||
|
||||
# LLM System Variables
|
||||
ollamaAPI = ollamaHostName + "/api/generate"
|
||||
openWebUIChatAPI = openWebUIURL + "/api/chat/completions"
|
||||
openWebUIOllamaProxy = openWebUIURL + "/ollama/api/generate"
|
||||
tokens = 450 # max charcters for the LLM response, this is the max length of the response also in prompts
|
||||
requestTruncation = True # if True, the LLM "will" truncate the response
|
||||
|
||||
openaiAPI = "https://api.openai.com/v1/completions" # not used, if you do push a enhancement!
|
||||
requestTruncation = True # if True, the LLM "will" truncate the response
|
||||
DEBUG_LLM = False # enable debug logging for LLM queries
|
||||
|
||||
# Used in the meshBotAI template
|
||||
llmEnableHistory = True # enable last message history for the LLM model
|
||||
llmContext_fromGoogle = True # enable context from google search results adds to compute time but really helps with responses accuracy
|
||||
|
||||
googleSearchResults = 3 # number of google search results to include in the context more results = more compute time
|
||||
antiFloodLLM = []
|
||||
llmChat_history = {}
|
||||
trap_list_llm = ("ask:", "askai")
|
||||
@@ -52,24 +50,6 @@ meshBotAI = """
|
||||
|
||||
"""
|
||||
|
||||
if llmContext_fromGoogle:
|
||||
meshBotAI = meshBotAI + """
|
||||
CONTEXT
|
||||
The following is the location of the user
|
||||
{location_name}
|
||||
|
||||
The following is for context around the prompt to help guide your response.
|
||||
{context}
|
||||
|
||||
"""
|
||||
else:
|
||||
meshBotAI = meshBotAI + """
|
||||
CONTEXT
|
||||
The following is the location of the user
|
||||
{location_name}
|
||||
|
||||
"""
|
||||
|
||||
if llmEnableHistory:
|
||||
meshBotAI = meshBotAI + """
|
||||
HISTORY
|
||||
@@ -101,22 +81,6 @@ def llmTool_math_calculator(expression):
|
||||
except Exception as e:
|
||||
return f"Error in calculation: {e}"
|
||||
|
||||
def llmTool_get_google(query, num_results=3):
|
||||
"""
|
||||
Example tool function to perform a Google search and return results.
|
||||
:param query: The search query string.
|
||||
:param num_results: Number of search results to return.
|
||||
:return: A list of search result titles and descriptions.
|
||||
"""
|
||||
results = []
|
||||
try:
|
||||
googleSearch = search(query, advanced=True, num_results=num_results)
|
||||
for result in googleSearch:
|
||||
results.append(f"{result.title}: {result.description}")
|
||||
return results
|
||||
except Exception as e:
|
||||
return [f"Error in Google search: {e}"]
|
||||
|
||||
llmFunctions = [
|
||||
|
||||
{
|
||||
@@ -141,46 +105,163 @@ llmFunctions = [
|
||||
"required": ["expression"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "llmTool_get_google",
|
||||
"description": "Perform a Google search and return results.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The search query string."
|
||||
},
|
||||
"num_results": {
|
||||
"type": "integer",
|
||||
"description": "Number of search results to return.",
|
||||
"default": 3
|
||||
}
|
||||
},
|
||||
"required": ["query"]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def get_google_context(input, num_results):
|
||||
# Get context from Google search results
|
||||
googleResults = []
|
||||
def get_wiki_context(input):
|
||||
"""
|
||||
Get context from Wikipedia/Kiwix for RAG enhancement
|
||||
:param input: The user query
|
||||
:return: Wikipedia summary or empty string if not available
|
||||
"""
|
||||
try:
|
||||
googleSearch = search(input, advanced=True, num_results=num_results)
|
||||
if googleSearch:
|
||||
for result in googleSearch:
|
||||
googleResults.append(f"{result.title} {result.description}")
|
||||
else:
|
||||
googleResults = ['no other context provided']
|
||||
# Extract potential search terms from the input
|
||||
# Try to identify key topics/entities for Wikipedia search
|
||||
search_terms = extract_search_terms(input)
|
||||
|
||||
wiki_context = []
|
||||
for term in search_terms[:2]: # Limit to 2 searches to avoid excessive API calls
|
||||
if use_kiwix_server:
|
||||
summary = get_kiwix_summary(term, truncate=False)
|
||||
else:
|
||||
summary = get_wikipedia_summary(term, truncate=False)
|
||||
|
||||
if summary and "error" not in summary.lower() or "html://" not in summary or "ambiguous" not in summary.lower():
|
||||
wiki_context.append(f"Wikipedia context for '{term}': {summary}")
|
||||
|
||||
return '\n'.join(wiki_context) if wiki_context else ''
|
||||
except Exception as e:
|
||||
logger.debug(f"System: LLM Query: context gathering failed, likely due to network issues")
|
||||
googleResults = ['no other context provided']
|
||||
return googleResults
|
||||
logger.debug(f"System: LLM Query: Wiki context gathering failed: {e}")
|
||||
return ''
|
||||
|
||||
def llm_extract_topic(input):
|
||||
"""
|
||||
Use LLM to extract the main topic as a single word or short phrase.
|
||||
Always uses raw mode and supports both Ollama and OpenWebUI.
|
||||
:param input: The user query
|
||||
:return: List with one topic string, or empty list on failure
|
||||
"""
|
||||
prompt = (
|
||||
"Summarize the following query into a single word or short phrase that best represents the main topic, "
|
||||
"for use as a Wikipedia search term. Only return the word or phrase, nothing else:\n"
|
||||
f"{input}"
|
||||
)
|
||||
try:
|
||||
if useOpenWebUI and openWebUIAPIKey:
|
||||
result = send_openwebui_query(prompt, max_tokens=10)
|
||||
else:
|
||||
llmQuery = {"model": llmModel, "prompt": prompt, "stream": False, "max_tokens": 10}
|
||||
result = send_ollama_query(llmQuery)
|
||||
topic = result.strip().split('\n')[0]
|
||||
topic = topic.strip(' "\'.,!?;:')
|
||||
if topic:
|
||||
return [topic]
|
||||
except Exception as e:
|
||||
logger.debug(f"LLM topic extraction failed: {e}")
|
||||
return []
|
||||
|
||||
def extract_search_terms(input):
|
||||
"""
|
||||
Extract potential search terms from user input.
|
||||
Enhanced: Try LLM-based topic extraction first, fallback to heuristic.
|
||||
:param input: The user query
|
||||
:return: List of potential search terms
|
||||
"""
|
||||
# Remove common command prefixes
|
||||
for trap in trap_list_llm:
|
||||
if input.lower().startswith(trap):
|
||||
input = input[len(trap):].strip()
|
||||
break
|
||||
|
||||
# Try LLM-based extraction first
|
||||
terms = llm_extract_topic(input)
|
||||
if terms:
|
||||
return terms
|
||||
|
||||
# Fallback: Simple heuristic (existing code)
|
||||
words = input.split()
|
||||
search_terms = []
|
||||
temp_phrase = []
|
||||
for word in words:
|
||||
clean_word = word.strip('.,!?;:')
|
||||
if clean_word and clean_word[0].isupper() and len(clean_word) > 2:
|
||||
temp_phrase.append(clean_word)
|
||||
elif temp_phrase:
|
||||
search_terms.append(' '.join(temp_phrase))
|
||||
temp_phrase = []
|
||||
if temp_phrase:
|
||||
search_terms.append(' '.join(temp_phrase))
|
||||
if not search_terms:
|
||||
search_terms = [input.strip()]
|
||||
if DEBUG_LLM:
|
||||
logger.debug(f"Extracted search terms: {search_terms}")
|
||||
return search_terms[:3] # Limit to 3 terms
|
||||
|
||||
def send_openwebui_query(prompt, model=None, max_tokens=450, context=''):
|
||||
"""
|
||||
Send query to OpenWebUI API for chat completion
|
||||
:param prompt: The user prompt
|
||||
:param model: Model name (optional, defaults to llmModel)
|
||||
:param max_tokens: Max tokens for response
|
||||
:param context: Additional context to include
|
||||
:return: Response text or error message
|
||||
"""
|
||||
if model is None:
|
||||
model = llmModel
|
||||
|
||||
headers = {
|
||||
'Authorization': f'Bearer {openWebUIAPIKey}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
messages = []
|
||||
if context:
|
||||
messages.append({
|
||||
"role": "system",
|
||||
"content": f"Use the following context to help answer questions:\n{context}"
|
||||
})
|
||||
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
})
|
||||
|
||||
data = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens,
|
||||
"stream": False
|
||||
}
|
||||
|
||||
# Debug logging
|
||||
if DEBUG_LLM:
|
||||
logger.debug(f"OpenWebUI payload: {json.dumps(data)}")
|
||||
logger.debug(f"OpenWebUI endpoint: {openWebUIChatAPI}")
|
||||
|
||||
try:
|
||||
result = requests.post(openWebUIChatAPI, headers=headers, json=data, timeout=urlTimeoutSeconds * 5)
|
||||
if DEBUG_LLM:
|
||||
logger.debug(f"OpenWebUI response status: {result.status_code}")
|
||||
logger.debug(f"OpenWebUI response text: {result.text}")
|
||||
if result.status_code == 200:
|
||||
result_json = result.json()
|
||||
# OpenWebUI returns OpenAI-compatible format
|
||||
if 'choices' in result_json and len(result_json['choices']) > 0:
|
||||
response = result_json['choices'][0]['message']['content']
|
||||
return response.strip()
|
||||
else:
|
||||
logger.warning(f"System: OpenWebUI API returned unexpected format")
|
||||
return "⛔️ Response Error"
|
||||
else:
|
||||
logger.warning(f"System: OpenWebUI API returned status code {result.status_code}")
|
||||
return f"⛔️ Request Error"
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.warning(f"System: OpenWebUI API request failed: {e}")
|
||||
return f"⛔️ Request Error"
|
||||
|
||||
def send_ollama_query(llmQuery):
|
||||
# Send the query to the Ollama API and return the response
|
||||
try:
|
||||
result = requests.post(ollamaAPI, data=json.dumps(llmQuery), timeout=5)
|
||||
result = requests.post(ollamaAPI, data=json.dumps(llmQuery), timeout= urlTimeoutSeconds * 5)
|
||||
if result.status_code == 200:
|
||||
result_json = result.json()
|
||||
result = result_json.get("response", "")
|
||||
@@ -219,24 +300,28 @@ def send_ollama_tooling_query(prompt, functions, model=None, max_tokens=450):
|
||||
else:
|
||||
raise Exception(f"HTTP Error: {result.status_code} - {result.text}")
|
||||
|
||||
def llm_query(input, nodeID=0, location_name=None):
|
||||
def llm_query(input, nodeID=0, location_name=None, init=False):
|
||||
global antiFloodLLM, llmChat_history
|
||||
googleResults = []
|
||||
wikiContext = ''
|
||||
|
||||
# if this is the first initialization of the LLM the query of " " should bring meshbotAIinit OTA shouldnt reach this?
|
||||
# This is for LLM like gemma and others now?
|
||||
if input == " " and rawLLMQuery:
|
||||
if init and rawLLMQuery:
|
||||
logger.warning("System: These LLM models lack a traditional system prompt, they can be verbose and not very helpful be advised.")
|
||||
input = meshbotAIinit
|
||||
else:
|
||||
elif init:
|
||||
input = input.strip()
|
||||
# classic model for gemma2, deepseek-r1, etc
|
||||
logger.debug(f"System: Using classic LLM model framework, ideally for gemma2, deepseek-r1, etc")
|
||||
logger.debug(f"System: Using SYSTEM model framework, ideally for gemma2, deepseek-r1, etc")
|
||||
|
||||
if not location_name:
|
||||
location_name = "no location provided "
|
||||
|
||||
# Remove command bang if present
|
||||
if cmdBang and input.startswith('!'):
|
||||
input = input.strip('!').strip()
|
||||
|
||||
# remove askai: and ask: from the input
|
||||
# Remove any trap words from the start of the input
|
||||
for trap in trap_list_llm:
|
||||
if input.lower().startswith(trap):
|
||||
input = input[len(trap):].strip()
|
||||
@@ -251,34 +336,84 @@ def llm_query(input, nodeID=0, location_name=None):
|
||||
else:
|
||||
antiFloodLLM.append(nodeID)
|
||||
|
||||
if llmContext_fromGoogle and not rawLLMQuery:
|
||||
googleResults = get_google_context(input, googleSearchResults)
|
||||
# Get Wikipedia/Kiwix context if enabled (RAG)
|
||||
if llmUseWikiContext and input != meshbotAIinit:
|
||||
# get_wiki_context returns a string, but we want to count the items before joining
|
||||
search_terms = extract_search_terms(input)
|
||||
wiki_context_list = []
|
||||
for term in search_terms[:2]:
|
||||
if not use_kiwix_server:
|
||||
summary = get_wiki_context(term)
|
||||
else:
|
||||
summary = get_wiki_context(term)
|
||||
if summary and "error" not in summary.lower():
|
||||
wiki_context_list.append(f"Wikipedia context for '{term}': {summary}")
|
||||
wikiContext = '\n'.join(wiki_context_list) if wiki_context_list else ''
|
||||
if wikiContext:
|
||||
logger.debug(f"System: using Wikipedia/Kiwix context for LLM query got {len(wiki_context_list)} results")
|
||||
|
||||
history = llmChat_history.get(nodeID, ["", ""])
|
||||
|
||||
if googleResults:
|
||||
logger.debug(f"System: Google-Enhanced LLM Query: {input} From:{nodeID}")
|
||||
else:
|
||||
logger.debug(f"System: LLM Query: {input} From:{nodeID}")
|
||||
|
||||
response = ""
|
||||
result = ""
|
||||
location_name += f" at the current time of {datetime.now().strftime('%Y-%m-%d %H:%M:%S %Z')}"
|
||||
|
||||
try:
|
||||
if rawLLMQuery:
|
||||
# sanitize the input to remove tool call syntax
|
||||
if '```' in input:
|
||||
logger.warning("System: LLM Query: Code markdown detected, removing for raw query")
|
||||
input = input.replace('```bash', '').replace('```python', '').replace('```', '')
|
||||
modelPrompt = input
|
||||
else:
|
||||
# Build the query from the template
|
||||
modelPrompt = meshBotAI.format(input=input, context='\n'.join(googleResults), location_name=location_name, llmModel=llmModel, history=history)
|
||||
# Use OpenWebUI if enabled
|
||||
if useOpenWebUI and openWebUIAPIKey:
|
||||
logger.debug(f"System: LLM Query: Using OpenWebUI API for LLM query {input} From:{nodeID}")
|
||||
|
||||
llmQuery = {"model": llmModel, "prompt": modelPrompt, "stream": False, "max_tokens": tokens}
|
||||
# Query the model via Ollama web API
|
||||
result = send_ollama_query(llmQuery)
|
||||
# Combine all context sources
|
||||
combined_context = []
|
||||
if wikiContext:
|
||||
combined_context.append(wikiContext)
|
||||
|
||||
context_str = '\n\n'.join(combined_context)
|
||||
|
||||
# For OpenWebUI, we send a cleaner prompt
|
||||
if rawLLMQuery:
|
||||
result = send_openwebui_query(input, context=context_str, max_tokens=tokens)
|
||||
else:
|
||||
# Use the template for non-raw queries
|
||||
modelPrompt = meshBotAI.format(
|
||||
input=input,
|
||||
context=context_str if combined_context else 'no other context provided',
|
||||
location_name=location_name,
|
||||
llmModel=llmModel,
|
||||
history=history
|
||||
)
|
||||
result = send_openwebui_query(modelPrompt, max_tokens=tokens)
|
||||
else:
|
||||
logger.debug(f"System: LLM Query: Using Ollama API for LLM query {input} From:{nodeID}")
|
||||
# Use standard Ollama API
|
||||
if rawLLMQuery:
|
||||
# sanitize the input to remove tool call syntax
|
||||
if '```' in input:
|
||||
logger.warning("System: LLM Query: Code markdown detected, removing for raw query")
|
||||
input = input.replace('```bash', '').replace('```python', '').replace('```', '')
|
||||
modelPrompt = input
|
||||
|
||||
# Add wiki context to raw queries if available
|
||||
if wikiContext:
|
||||
modelPrompt = f"Context:\n{wikiContext}\n\nQuestion: {input}"
|
||||
else:
|
||||
# Build the query from the template
|
||||
all_context = []
|
||||
if wikiContext:
|
||||
all_context.append(wikiContext)
|
||||
|
||||
context_text = '\n'.join(all_context) if all_context else 'no other context provided'
|
||||
modelPrompt = meshBotAI.format(
|
||||
input=input,
|
||||
context=context_text,
|
||||
location_name=location_name,
|
||||
llmModel=llmModel,
|
||||
history=history
|
||||
)
|
||||
|
||||
llmQuery = {"model": llmModel, "prompt": modelPrompt, "stream": False, "max_tokens": tokens}
|
||||
# Query the model via Ollama web API
|
||||
result = send_ollama_query(llmQuery)
|
||||
|
||||
#logger.debug(f"System: LLM Response: " + result.strip().replace('\n', ' '))
|
||||
except Exception as e:
|
||||
@@ -290,13 +425,17 @@ def llm_query(input, nodeID=0, location_name=None):
|
||||
response = result.strip().replace('\n', ' ')
|
||||
|
||||
if rawLLMQuery and requestTruncation and len(response) > 450:
|
||||
#retryy loop to truncate the response
|
||||
# retry loop to truncate the response
|
||||
logger.warning(f"System: LLM Query: Response exceeded {tokens} characters, requesting truncation")
|
||||
truncateQuery = {"model": llmModel, "prompt": truncatePrompt + response, "stream": False, "max_tokens": tokens}
|
||||
truncateResult = send_ollama_query(truncateQuery)
|
||||
truncate_prompt_full = truncatePrompt + response
|
||||
if useOpenWebUI and openWebUIAPIKey:
|
||||
truncateResult = send_openwebui_query(truncate_prompt_full, max_tokens=tokens)
|
||||
else:
|
||||
truncateQuery = {"model": llmModel, "prompt": truncate_prompt_full, "stream": False, "max_tokens": tokens}
|
||||
truncateResult = send_ollama_query(truncateQuery)
|
||||
|
||||
# cleanup for message output
|
||||
response = result.strip().replace('\n', ' ')
|
||||
response = truncateResult.strip().replace('\n', ' ')
|
||||
|
||||
# done with the query, remove the user from the anti flood list
|
||||
antiFloodLLM.remove(nodeID)
|
||||
|
||||
@@ -77,6 +77,7 @@ def get_rss_feed(msg):
|
||||
return "No RSS or Atom feed entries found."
|
||||
|
||||
formatted_entries = []
|
||||
seen_first3 = set() # Track first 3 words (lowercased) to avoid duplicates
|
||||
for item in items:
|
||||
# Helper to try multiple tag names
|
||||
def find_any(item, tags):
|
||||
@@ -122,9 +123,16 @@ def get_rss_feed(msg):
|
||||
if len(description) > RSS_TRIM_LENGTH:
|
||||
description = description[:RSS_TRIM_LENGTH - 3] + "..."
|
||||
|
||||
# Duplicate check: use first 3 words of description (or title if description is empty)
|
||||
text_for_dupe = description if description else (title or "")
|
||||
first3 = " ".join(text_for_dupe.lower().split()[:3])
|
||||
if first3 in seen_first3:
|
||||
continue
|
||||
seen_first3.add(first3)
|
||||
|
||||
formatted_entries.append(f"{title}\n{description}\n")
|
||||
return "\n".join(formatted_entries)
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching RSS feed from {feed_url}: {e}")
|
||||
return ERROR_FETCHING_DATA
|
||||
|
||||
|
||||
|
||||
@@ -256,6 +256,10 @@ try:
|
||||
llmModel = config['general'].get('ollamaModel', 'gemma3:270m') # default gemma3:270m
|
||||
rawLLMQuery = config['general'].getboolean('rawLLMQuery', True) #default True
|
||||
llmReplyToNonCommands = config['general'].getboolean('llmReplyToNonCommands', True) # default True
|
||||
llmUseWikiContext = config['general'].getboolean('llmUseWikiContext', False) # default False
|
||||
useOpenWebUI = config['general'].getboolean('useOpenWebUI', False) # default False
|
||||
openWebUIURL = config['general'].get('openWebUIURL', 'http://localhost:3000') # default localhost:3000
|
||||
openWebUIAPIKey = config['general'].get('openWebUIAPIKey', '') # default empty
|
||||
dont_retry_disconnect = config['general'].getboolean('dont_retry_disconnect', False) # default False, retry on disconnect
|
||||
favoriteNodeList = config['general'].get('favoriteNodeList', '').split(',')
|
||||
enableEcho = config['general'].getboolean('enableEcho', False) # default False
|
||||
|
||||
@@ -147,8 +147,8 @@ if dxspotter_enabled:
|
||||
help_message = help_message + ", dx"
|
||||
|
||||
# Wikipedia Search Configuration
|
||||
if wikipedia_enabled:
|
||||
from modules.wiki import * # from the spudgunman/meshing-around repo
|
||||
if wikipedia_enabled or use_kiwix_server:
|
||||
from modules.wiki import get_wikipedia_summary, get_kiwix_summary, get_wikipedia_summary
|
||||
trap_list = trap_list + ("wiki",)
|
||||
help_message = help_message + ", wiki"
|
||||
|
||||
|
||||
@@ -97,6 +97,24 @@ class TestBot(unittest.TestCase):
|
||||
response = send_ollama_query("Hello, Ollama!")
|
||||
self.assertIsInstance(response, str)
|
||||
|
||||
def test_extract_search_terms(self):
|
||||
from llm import extract_search_terms
|
||||
# Test with capitalized terms
|
||||
terms = extract_search_terms("What is Python programming?")
|
||||
self.assertIsInstance(terms, list)
|
||||
self.assertTrue(len(terms) > 0)
|
||||
# Test with multiple capitalized words
|
||||
terms2 = extract_search_terms("Tell me about Albert Einstein and Marie Curie")
|
||||
self.assertIsInstance(terms2, list)
|
||||
self.assertTrue(len(terms2) > 0)
|
||||
|
||||
def test_get_wiki_context(self):
|
||||
from llm import get_wiki_context
|
||||
# Test with a well-known topic
|
||||
context = get_wiki_context("Python programming language")
|
||||
self.assertIsInstance(context, str)
|
||||
# Context might be empty if wiki is disabled or fails, that's ok
|
||||
|
||||
def test_get_moon_phase(self):
|
||||
from space import get_moon
|
||||
phase = get_moon(lat, lon)
|
||||
|
||||
117
modules/wiki.py
117
modules/wiki.py
@@ -2,7 +2,7 @@
|
||||
|
||||
from modules.log import logger
|
||||
from modules.settings import (use_kiwix_server, kiwix_url, kiwix_library_name,
|
||||
urlTimeoutSeconds, wiki_return_limit, ERROR_FETCHING_DATA)
|
||||
urlTimeoutSeconds, wiki_return_limit, ERROR_FETCHING_DATA, wikipedia_enabled)
|
||||
#import wikipedia # pip install wikipedia
|
||||
import requests
|
||||
import bs4 as bs
|
||||
@@ -17,77 +17,63 @@ def tag_visible(element):
|
||||
return True
|
||||
|
||||
def text_from_html(body):
|
||||
"""Extract visible text from HTML content"""
|
||||
"""Extract main article text from HTML content"""
|
||||
soup = bs.BeautifulSoup(body, 'html.parser')
|
||||
texts = soup.find_all(string=True)
|
||||
# Try to find the main content div (works for both Kiwix and Wikipedia HTML)
|
||||
main = soup.find('div', class_='mw-parser-output')
|
||||
if not main:
|
||||
# Fallback: just use the body if main content div not found
|
||||
main = soup.body
|
||||
if not main:
|
||||
return ""
|
||||
texts = main.find_all(string=True)
|
||||
visible_texts = filter(tag_visible, texts)
|
||||
return " ".join(t.strip() for t in visible_texts if t.strip())
|
||||
|
||||
def get_kiwix_summary(search_term):
|
||||
"""Query local Kiwix server for Wikipedia article"""
|
||||
def get_kiwix_summary(search_term, truncate=True):
|
||||
"""Query local Kiwix server for Wikipedia article using only search results."""
|
||||
if search_term is None or search_term.strip() == "":
|
||||
return ERROR_FETCHING_DATA
|
||||
try:
|
||||
search_encoded = quote(search_term)
|
||||
# Try direct article access first
|
||||
wiki_article = search_encoded.capitalize().replace("%20", "_")
|
||||
exact_url = f"{kiwix_url}/raw/{kiwix_library_name}/content/A/{wiki_article}"
|
||||
|
||||
response = requests.get(exact_url, timeout=urlTimeoutSeconds)
|
||||
if response.status_code == 200:
|
||||
# Extract and clean text
|
||||
text = text_from_html(response.text)
|
||||
# Remove common Wikipedia metadata prefixes
|
||||
text = text.split("Jump to navigation", 1)[-1]
|
||||
text = text.split("Jump to search", 1)[-1]
|
||||
# Truncate to reasonable length (first few sentences)
|
||||
sentences = text.split('. ')
|
||||
summary = '. '.join(sentences[:wiki_return_limit])
|
||||
if summary and not summary.endswith('.'):
|
||||
summary += '.'
|
||||
return summary.strip()[:500] # Hard limit at 500 chars
|
||||
|
||||
# If direct access fails, try search
|
||||
logger.debug(f"System: Kiwix direct article not found for:{search_term} Status Code:{response.status_code}")
|
||||
search_url = f"{kiwix_url}/search?content={kiwix_library_name}&pattern={search_encoded}"
|
||||
response = requests.get(search_url, timeout=urlTimeoutSeconds)
|
||||
|
||||
|
||||
if response.status_code == 200 and "No results were found" not in response.text:
|
||||
soup = bs.BeautifulSoup(response.text, 'html.parser')
|
||||
links = [a['href'] for a in soup.find_all('a', href=True) if "start=" not in a['href']]
|
||||
|
||||
for link in links[:3]: # Check first 3 results
|
||||
article_name = link.split("/")[-1]
|
||||
if not article_name or article_name[0].islower():
|
||||
results = soup.select('div.results ul li')
|
||||
logger.debug(f"Kiwix: Found {len(results)} results in search results for:{search_term}")
|
||||
for li in results[:3]:
|
||||
a = li.find('a', href=True)
|
||||
if not a:
|
||||
continue
|
||||
|
||||
article_url = f"{kiwix_url}{link}"
|
||||
article_url = f"{kiwix_url}{a['href']}"
|
||||
article_response = requests.get(article_url, timeout=urlTimeoutSeconds)
|
||||
if article_response.status_code == 200:
|
||||
text = text_from_html(article_response.text)
|
||||
text = text.split("Jump to navigation", 1)[-1]
|
||||
text = text.split("Jump to search", 1)[-1]
|
||||
# Remove navigation and search jump text
|
||||
# text = text.split("Jump to navigation", 1)[-1]
|
||||
# text = text.split("Jump to search", 1)[-1]
|
||||
sentences = text.split('. ')
|
||||
summary = '. '.join(sentences[:wiki_return_limit])
|
||||
if summary and not summary.endswith('.'):
|
||||
summary += '.'
|
||||
return summary.strip()[:500]
|
||||
|
||||
logger.warning(f"System: No Kiwix Results for:{search_term}")
|
||||
# try to fall back to online Wikipedia if available
|
||||
return get_wikipedia_summary(search_term, force=True)
|
||||
if truncate:
|
||||
return summary.strip()[:500]
|
||||
else:
|
||||
return summary.strip()
|
||||
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.warning(f"System: Kiwix connection error: {e}")
|
||||
return "Unable to connect to local wiki server"
|
||||
# Fallback to online Wikipedia
|
||||
return get_wikipedia_summary(search_term, force=True)
|
||||
except Exception as e:
|
||||
logger.warning(f"System: Error with Kiwix for:{search_term} {e}")
|
||||
logger.debug(f"System: No Kiwix Results for:{search_term}")
|
||||
if wikipedia_enabled:
|
||||
logger.debug("Kiwix: Falling back to Wikipedia API.")
|
||||
return get_wikipedia_summary(search_term, force=True)
|
||||
return ERROR_FETCHING_DATA
|
||||
|
||||
def get_wikipedia_summary(search_term, location=None, force=False):
|
||||
except Exception as e:
|
||||
logger.warning(f"System: Error with Kiwix for:{search_term} URL:{search_url} {e}")
|
||||
return ERROR_FETCHING_DATA
|
||||
|
||||
def get_wikipedia_summary(search_term, location=None, force=False, truncate=True):
|
||||
if use_kiwix_server and not force:
|
||||
return get_kiwix_summary(search_term)
|
||||
|
||||
@@ -105,22 +91,45 @@ def get_wikipedia_summary(search_term, location=None, force=False):
|
||||
return ERROR_FETCHING_DATA
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
# Check for error response from Wikipedia API
|
||||
logger.debug(f"Wikipedia API response for '{search_term}': {len(data)} keys")
|
||||
if "extract" not in data or not data.get("extract"):
|
||||
logger.warning(f"System: Wikipedia API returned no extract for:{search_term} (data: {data})")
|
||||
#logger.debug(f"System: Wikipedia API returned no extract for:{search_term} (data: {data})")
|
||||
return ERROR_FETCHING_DATA
|
||||
if data.get("type") == "disambiguation" or "may refer to:" in data.get("extract", ""):
|
||||
#logger.warning(f"System: Disambiguation page for:{search_term} (data: {data})")
|
||||
# Fetch and parse the HTML disambiguation page
|
||||
html_url = f"https://en.wikipedia.org/wiki/{requests.utils.quote(search_term)}"
|
||||
html_resp = requests.get(html_url, timeout=5, headers=headers)
|
||||
if html_resp.status_code == 200:
|
||||
soup = bs.BeautifulSoup(html_resp.text, 'html.parser')
|
||||
items = soup.select('div.mw-parser-output ul li a[href^="/wiki/"]')
|
||||
choices = []
|
||||
for a in items:
|
||||
title = a.get('title')
|
||||
href = a.get('href')
|
||||
# Filter out non-article links
|
||||
if title and href and ':' not in href:
|
||||
choices.append(f"{title} (https://en.wikipedia.org{href})")
|
||||
if len(choices) >= 5:
|
||||
break
|
||||
if choices:
|
||||
return f"'{search_term}' is ambiguous. Did you mean:\n- " + "\n- ".join(choices)
|
||||
return f"'{search_term}' is ambiguous. Please be more specific. See: {html_url}"
|
||||
summary = data.get("extract")
|
||||
if not summary or not isinstance(summary, str) or not summary.strip():
|
||||
logger.warning(f"System: No summary found for:{search_term}")
|
||||
#logger.debug(f"System: No summary found for:{search_term} (data: {data})")
|
||||
return ERROR_FETCHING_DATA
|
||||
sentences = [s for s in summary.split('. ') if s.strip()]
|
||||
if not sentences:
|
||||
logger.warning(f"System: Wikipedia summary split produced no sentences for:{search_term}")
|
||||
return ERROR_FETCHING_DATA
|
||||
summary = '. '.join(sentences[:wiki_return_limit])
|
||||
if summary and not summary.endswith('.'):
|
||||
summary += '.'
|
||||
return summary.strip()[:500]
|
||||
if truncate:
|
||||
# Truncate to 500 characters
|
||||
return summary.strip()[:500]
|
||||
else:
|
||||
return summary.strip()
|
||||
except Exception as e:
|
||||
logger.warning(f"System: Wikipedia API error for:{search_term} {e}")
|
||||
return ERROR_FETCHING_DATA
|
||||
|
||||
@@ -7,5 +7,4 @@ maidenhead
|
||||
beautifulsoup4
|
||||
dadjokes
|
||||
geopy
|
||||
schedule
|
||||
googlesearch-python
|
||||
schedule
|
||||
Reference in New Issue
Block a user