Files
meshing-around/modules/wiki.py
SpudGunMan fa76a76203 BIG OLD PATCH 🍠
pz days ... haha. I hope this works.
fancy potato
2025-10-24 19:54:46 -07:00

161 lines
7.6 KiB
Python

# meshbot wiki module
from modules.log import logger
from modules.settings import (use_kiwix_server, kiwix_url, kiwix_library_name,
urlTimeoutSeconds, wiki_return_limit, ERROR_FETCHING_DATA)
#import wikipedia # pip install wikipedia
import requests
import bs4 as bs
from urllib.parse import quote
def tag_visible(element):
"""Filter visible text from HTML elements for Kiwix"""
if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
return False
if isinstance(element, bs.element.Comment):
return False
return True
def text_from_html(body):
"""Extract visible text from HTML content"""
soup = bs.BeautifulSoup(body, 'html.parser')
texts = soup.find_all(string=True)
visible_texts = filter(tag_visible, texts)
return " ".join(t.strip() for t in visible_texts if t.strip())
def get_kiwix_summary(search_term):
"""Query local Kiwix server for Wikipedia article"""
if search_term is None or search_term.strip() == "":
return ERROR_FETCHING_DATA
try:
search_encoded = quote(search_term)
# Try direct article access first
wiki_article = search_encoded.capitalize().replace("%20", "_")
exact_url = f"{kiwix_url}/raw/{kiwix_library_name}/content/A/{wiki_article}"
response = requests.get(exact_url, timeout=urlTimeoutSeconds)
if response.status_code == 200:
# Extract and clean text
text = text_from_html(response.text)
# Remove common Wikipedia metadata prefixes
text = text.split("Jump to navigation", 1)[-1]
text = text.split("Jump to search", 1)[-1]
# Truncate to reasonable length (first few sentences)
sentences = text.split('. ')
summary = '. '.join(sentences[:wiki_return_limit])
if summary and not summary.endswith('.'):
summary += '.'
return summary.strip()[:500] # Hard limit at 500 chars
# If direct access fails, try search
logger.debug(f"System: Kiwix direct article not found for:{search_term} Status Code:{response.status_code}")
search_url = f"{kiwix_url}/search?content={kiwix_library_name}&pattern={search_encoded}"
response = requests.get(search_url, timeout=urlTimeoutSeconds)
if response.status_code == 200 and "No results were found" not in response.text:
soup = bs.BeautifulSoup(response.text, 'html.parser')
links = [a['href'] for a in soup.find_all('a', href=True) if "start=" not in a['href']]
for link in links[:3]: # Check first 3 results
article_name = link.split("/")[-1]
if not article_name or article_name[0].islower():
continue
article_url = f"{kiwix_url}{link}"
article_response = requests.get(article_url, timeout=urlTimeoutSeconds)
if article_response.status_code == 200:
text = text_from_html(article_response.text)
text = text.split("Jump to navigation", 1)[-1]
text = text.split("Jump to search", 1)[-1]
sentences = text.split('. ')
summary = '. '.join(sentences[:wiki_return_limit])
if summary and not summary.endswith('.'):
summary += '.'
return summary.strip()[:500]
logger.warning(f"System: No Kiwix Results for:{search_term}")
# try to fall back to online Wikipedia if available
return get_wikipedia_summary(search_term, force=True)
except requests.RequestException as e:
logger.warning(f"System: Kiwix connection error: {e}")
return "Unable to connect to local wiki server"
# Fallback to online Wikipedia
return get_wikipedia_summary(search_term, force=True)
except Exception as e:
logger.warning(f"System: Error with Kiwix for:{search_term} {e}")
return ERROR_FETCHING_DATA
def get_wikipedia_summary(search_term, location=None, force=False):
if use_kiwix_server and not force:
return get_kiwix_summary(search_term)
if not search_term or not search_term.strip():
return ERROR_FETCHING_DATA
api_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{requests.utils.quote(search_term)}"
headers = {
"User-Agent": "MeshBot/1.0 (https://github.com/kkeeton/meshing-around; contact: youremail@example.com)"
}
try:
response = requests.get(api_url, timeout=5, headers=headers)
if response.status_code == 404:
logger.warning(f"System: No Wikipedia Results for:{search_term}")
return ERROR_FETCHING_DATA
response.raise_for_status()
data = response.json()
# Check for error response from Wikipedia API
if "extract" not in data or not data.get("extract"):
logger.warning(f"System: Wikipedia API returned no extract for:{search_term} (data: {data})")
return ERROR_FETCHING_DATA
summary = data.get("extract")
if not summary or not isinstance(summary, str) or not summary.strip():
logger.warning(f"System: No summary found for:{search_term}")
return ERROR_FETCHING_DATA
sentences = [s for s in summary.split('. ') if s.strip()]
if not sentences:
logger.warning(f"System: Wikipedia summary split produced no sentences for:{search_term}")
return ERROR_FETCHING_DATA
summary = '. '.join(sentences[:wiki_return_limit])
if summary and not summary.endswith('.'):
summary += '.'
return summary.strip()[:500]
except Exception as e:
logger.warning(f"System: Wikipedia API error for:{search_term} {e}")
return ERROR_FETCHING_DATA
# def get_wikipedia_summary(search_term, location=None, force=False):
# lat, lon = location if location else (None, None)
# # Use Kiwix if configured
# if use_kiwix_server and not force:
# return get_kiwix_summary(search_term)
# try:
# # Otherwise use online Wikipedia
# wikipedia_search = wikipedia.search(search_term, results=3)
# wikipedia_suggest = wikipedia.suggest(search_term)
# #wikipedia_aroundme = wikipedia.geosearch(lat,lon, results=3)
# #logger.debug(f"System: Wikipedia Nearby:{wikipedia_aroundme}")
# except Exception as e:
# logger.debug(f"System: Wikipedia search error for:{search_term} {e}")
# return ERROR_FETCHING_DATA
# if len(wikipedia_search) == 0:
# logger.warning(f"System: No Wikipedia Results for:{search_term}")
# return ERROR_FETCHING_DATA
# try:
# logger.debug(f"System: Searching Wikipedia for:{search_term}, First Result:{wikipedia_search[0]}, Suggest Word:{wikipedia_suggest}")
# summary = wikipedia.summary(search_term, sentences=wiki_return_limit, auto_suggest=False, redirect=True)
# except wikipedia.DisambiguationError as e:
# logger.warning(f"System: Disambiguation Error for:{search_term} trying {wikipedia_search[0]}")
# summary = wikipedia.summary(wikipedia_search[0], sentences=wiki_return_limit, auto_suggest=True, redirect=True)
# except wikipedia.PageError as e:
# logger.warning(f"System: Wikipedia Page Error for:{search_term} {e} trying {wikipedia_search[0]}")
# summary = wikipedia.summary(wikipedia_search[0], sentences=wiki_return_limit, auto_suggest=True, redirect=True)
# except Exception as e:
# logger.warning(f"System: Error with Wikipedia for:{search_term} {e}")
# return ERROR_FETCHING_DATA
# return summary