Update wiki.py

This commit is contained in:
SpudGunMan
2025-10-27 21:25:12 -07:00
parent 21c2f7df18
commit c8d8880806
+23 -4
View File
@@ -115,17 +115,36 @@ def get_wikipedia_summary(search_term, location=None, force=False, truncate=True
return ERROR_FETCHING_DATA
response.raise_for_status()
data = response.json()
# Check for error response from Wikipedia API
logger.debug(f"Wikipedia API response for '{search_term}': {len(data)} keys")
if "extract" not in data or not data.get("extract"):
logger.warning(f"System: Wikipedia API returned no extract for:{search_term} (data: {data})")
#logger.debug(f"System: Wikipedia API returned no extract for:{search_term} (data: {data})")
return ERROR_FETCHING_DATA
if data.get("type") == "disambiguation" or "may refer to:" in data.get("extract", ""):
#logger.warning(f"System: Disambiguation page for:{search_term} (data: {data})")
# Fetch and parse the HTML disambiguation page
html_url = f"https://en.wikipedia.org/wiki/{requests.utils.quote(search_term)}"
html_resp = requests.get(html_url, timeout=5, headers=headers)
if html_resp.status_code == 200:
soup = bs.BeautifulSoup(html_resp.text, 'html.parser')
items = soup.select('div.mw-parser-output ul li a[href^="/wiki/"]')
choices = []
for a in items:
title = a.get('title')
href = a.get('href')
# Filter out non-article links
if title and href and ':' not in href:
choices.append(f"{title} (https://en.wikipedia.org{href})")
if len(choices) >= 5:
break
if choices:
return f"'{search_term}' is ambiguous. Did you mean:\n- " + "\n- ".join(choices)
return f"'{search_term}' is ambiguous. Please be more specific. See: {html_url}"
summary = data.get("extract")
if not summary or not isinstance(summary, str) or not summary.strip():
logger.warning(f"System: No summary found for:{search_term}")
#logger.debug(f"System: No summary found for:{search_term} (data: {data})")
return ERROR_FETCHING_DATA
sentences = [s for s in summary.split('. ') if s.strip()]
if not sentences:
logger.warning(f"System: Wikipedia summary split produced no sentences for:{search_term}")
return ERROR_FETCHING_DATA
summary = '. '.join(sentences[:wiki_return_limit])
if summary and not summary.endswith('.'):