cleanup

2026-05-18 06:55:38 +02:00 · 2025-10-24 10:32:28 -07:00
parent 57ce15de4e
commit f33da848cd
1 changed files with 75 additions and 77 deletions
@@ -2,88 +2,86 @@

 from modules.log import *
 import wikipedia # pip install wikipedia
+import requests
+import bs4 as bs
+from urllib.parse import quote
+# ...existing code...

-# Kiwix support for local wiki
-if use_kiwix_server:
-    import requests
-    import bs4 as bs
-    from urllib.parse import quote

-# Kiwix helper functions (only loaded if use_kiwix_server is True)
-if wikipedia_enabled and use_kiwix_server:
-    def tag_visible(element):
-        """Filter visible text from HTML elements for Kiwix"""
-        if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
-            return False
-        if isinstance(element, bs.element.Comment):
-            return False
-        return True
+def tag_visible(element):
+    """Filter visible text from HTML elements for Kiwix"""
+    if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
+        return False
+    if isinstance(element, bs.element.Comment):
+        return False
+    return True

-    def text_from_html(body):
-        """Extract visible text from HTML content"""
-        soup = bs.BeautifulSoup(body, 'html.parser')
-        texts = soup.find_all(string=True)
-        visible_texts = filter(tag_visible, texts)
-        return " ".join(t.strip() for t in visible_texts if t.strip())
+def text_from_html(body):
+    """Extract visible text from HTML content"""
+    soup = bs.BeautifulSoup(body, 'html.parser')
+    texts = soup.find_all(string=True)
+    visible_texts = filter(tag_visible, texts)
+    return " ".join(t.strip() for t in visible_texts if t.strip())

-    def get_kiwix_summary(search_term):
-        """Query local Kiwix server for Wikipedia article"""
-        try:
-            search_encoded = quote(search_term)
-            # Try direct article access first
-            wiki_article = search_encoded.capitalize().replace("%20", "_")
-            exact_url = f"{kiwix_url}/raw/{kiwix_library_name}/content/A/{wiki_article}"
+def get_kiwix_summary(search_term):
+    """Query local Kiwix server for Wikipedia article"""
+    try:
+        search_encoded = quote(search_term)
+        # Try direct article access first
+        wiki_article = search_encoded.capitalize().replace("%20", "_")
+        exact_url = f"{kiwix_url}/raw/{kiwix_library_name}/content/A/{wiki_article}"
+        
+        response = requests.get(exact_url, timeout=urlTimeoutSeconds)
+        if response.status_code == 200:
+            # Extract and clean text
+            text = text_from_html(response.text)
+            # Remove common Wikipedia metadata prefixes
+            text = text.split("Jump to navigation", 1)[-1]
+            text = text.split("Jump to search", 1)[-1]
+            # Truncate to reasonable length (first few sentences)
+            sentences = text.split('. ')
+            summary = '. '.join(sentences[:wiki_return_limit])
+            if summary and not summary.endswith('.'):
+                summary += '.'
+            return summary.strip()[:500]  # Hard limit at 500 chars
+        
+        # If direct access fails, try search
+        search_url = f"{kiwix_url}/search?content={kiwix_library_name}&pattern={search_encoded}"
+        response = requests.get(search_url, timeout=urlTimeoutSeconds)
+        
+        if response.status_code == 200 and "No results were found" not in response.text:
+            soup = BeautifulSoup(response.text, 'html.parser')
+            links = [a['href'] for a in soup.find_all('a', href=True) if "start=" not in a['href']]
            
-            response = requests.get(exact_url, timeout=urlTimeoutSeconds)
-            if response.status_code == 200:
-                # Extract and clean text
-                text = text_from_html(response.text)
-                # Remove common Wikipedia metadata prefixes
-                text = text.split("Jump to navigation", 1)[-1]
-                text = text.split("Jump to search", 1)[-1]
-                # Truncate to reasonable length (first few sentences)
-                sentences = text.split('. ')
-                summary = '. '.join(sentences[:wiki_return_limit])
-                if summary and not summary.endswith('.'):
-                    summary += '.'
-                return summary.strip()[:500]  # Hard limit at 500 chars
-            
-            # If direct access fails, try search
-            search_url = f"{kiwix_url}/search?content={kiwix_library_name}&pattern={search_encoded}"
-            response = requests.get(search_url, timeout=urlTimeoutSeconds)
-            
-            if response.status_code == 200 and "No results were found" not in response.text:
-                soup = BeautifulSoup(response.text, 'html.parser')
-                links = [a['href'] for a in soup.find_all('a', href=True) if "start=" not in a['href']]
-                
-                for link in links[:3]:  # Check first 3 results
-                    article_name = link.split("/")[-1]
-                    if not article_name or article_name[0].islower():
-                        continue
-                        
-                    article_url = f"{kiwix_url}{link}"
-                    article_response = requests.get(article_url, timeout=urlTimeoutSeconds)
-                    if article_response.status_code == 200:
-                        text = text_from_html(article_response.text)
-                        text = text.split("Jump to navigation", 1)[-1]
-                        text = text.split("Jump to search", 1)[-1]
-                        sentences = text.split('. ')
-                        summary = '. '.join(sentences[:wiki_return_limit])
-                        if summary and not summary.endswith('.'):
-                            summary += '.'
-                        return summary.strip()[:500]
-            
-            logger.warning(f"System: No Kiwix Results for:{search_term}")
-            # try to fall back to online Wikipedia if available
-            return get_wikipedia_summary(search_term, force=True)
-            
-            
-        except requests.RequestException as e:
-            logger.warning(f"System: Kiwix connection error: {e}")
-            return "Unable to connect to local wiki server"
-        except Exception as e:
-            logger.warning(f"System: Error with Kiwix for:{search_term} {e}")
-            return ERROR_FETCHING_DATA
+            for link in links[:3]:  # Check first 3 results
+                article_name = link.split("/")[-1]
+                if not article_name or article_name[0].islower():
+                    continue
+                    
+                article_url = f"{kiwix_url}{link}"
+                article_response = requests.get(article_url, timeout=urlTimeoutSeconds)
+                if article_response.status_code == 200:
+                    text = text_from_html(article_response.text)
+                    text = text.split("Jump to navigation", 1)[-1]
+                    text = text.split("Jump to search", 1)[-1]
+                    sentences = text.split('. ')
+                    summary = '. '.join(sentences[:wiki_return_limit])
+                    if summary and not summary.endswith('.'):
+                        summary += '.'
+                    return summary.strip()[:500]
+        
+        logger.warning(f"System: No Kiwix Results for:{search_term}")
+        # try to fall back to online Wikipedia if available
+        return get_wikipedia_summary(search_term, force=True)
+
+        
+    except requests.RequestException as e:
+        logger.warning(f"System: Kiwix connection error: {e}")
+        return "Unable to connect to local wiki server"
+    except Exception as e:
+        logger.warning(f"System: Error with Kiwix for:{search_term} {e}")
+        return ERROR_FETCHING_DATA
+    

 def get_wikipedia_summary(search_term, location=None, force=False):
    lat, lon = location if location else (None, None)