Refactor codebase: consolidate utils, improve thread safety, and standardize node data handling

This commit is contained in:
eddieoz
2025-11-27 16:27:42 +02:00
parent 1cb71b2ea4
commit 34066691b2
6 changed files with 219 additions and 238 deletions

4
.gitignore vendored
View File

@@ -17,4 +17,6 @@ venv.bak/
# Local Config
config.yaml
report-*.md
report-*.md
GEMINI.md
CLAUDE.md

View File

@@ -1,6 +1,8 @@
import logging
import time
import threading
import meshtastic.util
from .utils import get_val, haversine
logger = logging.getLogger(__name__)
@@ -22,6 +24,9 @@ class ActiveTester:
self.test_results = [] # List of dicts: {node_id, status, rtt, hops, snr, timestamp}
self.completed_cycles = 0
self.nodes_tested_in_cycle = set()
# Thread safety
self.lock = threading.Lock()
def run_next_test(self):
"""
@@ -51,7 +56,6 @@ class ActiveTester:
# Round-robin through priority nodes
# Safety check if list changed or index out of bounds
# Safety check if list changed or index out of bounds
if self.current_priority_index >= len(self.priority_nodes):
self.current_priority_index = 0
@@ -69,12 +73,6 @@ class ActiveTester:
candidates = []
nodes = self.interface.nodes
# Helper to get attribute or dict key (same as in analyzer)
def get_val(obj, key, default=None):
if isinstance(obj, dict):
return obj.get(key, default)
return getattr(obj, key, default)
# Get local position
my_lat = None
my_lon = None
@@ -173,7 +171,7 @@ class ActiveTester:
lon = lon_i / 1e7
if my_lat is not None and my_lon is not None and lat is not None and lon is not None:
dist = self._haversine(my_lat, my_lon, lat, lon)
dist = haversine(my_lat, my_lon, lat, lon)
# Add to bucket
nodes_by_role[role].append({
@@ -198,11 +196,6 @@ class ActiveTester:
continue
# Sort by lastHeard (Descending - Most Recent) then Distance (Descending - Furthest)
# Tuple sort: (lastHeard desc, dist desc)
# To sort desc, we can use reverse=True.
# But if we want different directions?
# User said: "sort by last_heard, then by distance"
# Assuming both descending (most recent, furthest).
candidates_for_role.sort(key=lambda x: (x['lastHeard'], x['dist']), reverse=True)
# Add to final list
@@ -225,19 +218,6 @@ class ActiveTester:
selected_ids = [c['id'] for c in final_candidates]
return selected_ids
def _haversine(self, lat1, lon1, lat2, lon2):
import math
try:
lon1, lat1, lon2, lat2 = map(math.radians, [float(lon1), float(lat1), float(lon2), float(lat2)])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
c = 2 * math.asin(math.sqrt(a))
r = 6371000 # Meters
return c * r
except:
return 0
def send_traceroute(self, dest_node_id):
"""
Sends a traceroute request to the destination node.
@@ -253,11 +233,11 @@ class ActiveTester:
logger.error(f"Failed to send traceroute to {dest_node_id}: {e}")
# Update state immediately so main loop knows we are busy
self.last_test_time = time.time()
self.pending_traceroute = dest_node_id
with self.lock:
self.last_test_time = time.time()
self.pending_traceroute = dest_node_id
# Start background thread
import threading
t = threading.Thread(target=_send_task, daemon=True)
t.start()
@@ -310,8 +290,6 @@ class ActiveTester:
route_back = decoded.get('routeBack', [])
# Count hops (number of nodes in route - 1, excluding source)
# Route includes: source -> hop1 -> hop2 -> destination
# So hops = len(route) - 1 (we don't count the source)
hops_to = len(route) - 1 if route and len(route) > 0 else 0
hops_back = len(route_back) - 1 if route_back and len(route_back) > 0 else 0
@@ -322,47 +300,47 @@ class ActiveTester:
logger.info(f"Route to {node_id}: {' -> '.join(route_ids)} ({hops_to} hops)")
logger.info(f"Route back: {' -> '.join(route_back_ids)} ({hops_back} hops)")
self.test_results.append({
'node_id': node_id,
'status': 'success',
'rtt': rtt,
'hops_to': hops_to,
'hops_back': hops_back,
'route': route_ids,
'route_back': route_back_ids,
'snr': packet.get('rxSnr', 0),
'timestamp': time.time()
})
self._check_cycle_completion(node_id)
self._check_cycle_completion(node_id)
if self.pending_traceroute == node_id:
self.pending_traceroute = None # Clear pending if this was the node we were waiting for
self.last_test_time = time.time() # Start cooldown
with self.lock:
self.test_results.append({
'node_id': node_id,
'status': 'success',
'rtt': rtt,
'hops_to': hops_to,
'hops_back': hops_back,
'route': route_ids,
'route_back': route_back_ids,
'snr': packet.get('rxSnr', 0),
'timestamp': time.time()
})
self._check_cycle_completion(node_id)
if self.pending_traceroute == node_id:
self.pending_traceroute = None # Clear pending if this was the node we were waiting for
self.last_test_time = time.time() # Start cooldown
def record_timeout(self, node_id):
"""
Records a failed test result (timeout).
"""
logger.info(f"Recording timeout for {node_id}")
self.test_results.append({
'node_id': node_id,
'status': 'timeout',
'timestamp': time.time()
})
self._check_cycle_completion(node_id)
if self.pending_traceroute == node_id:
self.pending_traceroute = None # Clear pending if this was the node we were waiting for
self.last_test_time = time.time() # Start cooldown
with self.lock:
self.test_results.append({
'node_id': node_id,
'status': 'timeout',
'timestamp': time.time()
})
self._check_cycle_completion(node_id)
if self.pending_traceroute == node_id:
self.pending_traceroute = None # Clear pending if this was the node we were waiting for
self.last_test_time = time.time() # Start cooldown
def _check_cycle_completion(self, node_id):
"""
Tracks which nodes have been tested in the current cycle.
Must be called within a lock.
"""
self.nodes_tested_in_cycle.add(node_id)
# Check if we have tested all priority nodes
# Note: priority_nodes might change if auto-discovery re-runs,
# but usually it's stable for a cycle.
if self.priority_nodes:
all_tested = all(n in self.nodes_tested_in_cycle for n in self.priority_nodes)
logger.debug(f"Cycle Progress: {len(self.nodes_tested_in_cycle)}/{len(self.priority_nodes)} nodes tested.")

View File

@@ -1,5 +1,6 @@
import logging
import time
from .utils import get_val, haversine, get_node_name
logger = logging.getLogger(__name__)
@@ -19,52 +20,11 @@ class NetworkHealthAnalyzer:
# --- Node DB Analysis ---
for node_id, node in nodes.items():
# Handle both dictionary and Node object
if hasattr(node, 'user'):
# It's a Node object (or similar), but we need dictionary access for existing logic
# or we update logic to use attributes.
# However, the error 'Node object has no attribute get' confirms it's an object.
# The 'nodes' dict usually contains dictionaries in some contexts, but objects in others.
# Let's try to convert to dict if possible, or access attributes safely.
# If it's a Node object, it might not have a .get() method.
# We can try to access attributes directly.
user = getattr(node, 'user', {})
metrics = getattr(node, 'deviceMetrics', {})
position = getattr(node, 'position', {})
# Note: user/metrics/position might be objects too!
# If they are objects, we need to handle them.
# But usually in the python API, these inner attributes are often dictionaries or protobuf messages.
# If protobuf messages, they act like objects but might not have .get().
# Let's assume for a moment that if we access them, we might need to treat them as objects.
# But to be safe and minimal change, let's try to see if we can just use getattr with default.
# Actually, if 'user' is a protobuf, we can't use .get() on it either.
# Let's define a helper to safely get values.
pass
else:
# It's likely a dict
user = node.get('user', {})
metrics = node.get('deviceMetrics', {})
position = node.get('position', {})
# Helper to get attribute or dict key
def get_val(obj, key, default=None):
if isinstance(obj, dict):
return obj.get(key, default)
return getattr(obj, key, default)
# Re-fetch using helper
user = get_val(node, 'user', {})
metrics = get_val(node, 'deviceMetrics', {})
position = get_val(node, 'position', {})
# Now user/metrics/position might be objects or dicts.
# We need to access fields inside them.
# e.g. user.get('longName') vs user.longName
node_name = get_val(user, 'longName', node_id)
node_name = get_node_name(node, node_id)
# 1. Check Channel Utilization
ch_util = get_val(metrics, 'channelUtilization', 0)
@@ -78,61 +38,19 @@ class NetworkHealthAnalyzer:
# 3. Check Roles
role = get_val(user, 'role', 'CLIENT')
# Role might be an enum int if it's an object, or string if dict?
# In dicts from JSON, it's often string 'ROUTER'.
# In protobuf objects, it's an int.
# We need to handle both.
is_router_client = False
# Handle role enum conversion if needed
if isinstance(role, int):
# We need to check against the enum value for ROUTER_CLIENT
# Or convert to string.
# Hardcoding enum values is risky but 3 is usually ROUTER_CLIENT?
# Let's try to handle string comparison if possible.
# If it's an int, we can't compare to 'ROUTER_CLIENT'.
pass
elif role == 'ROUTER_CLIENT':
is_router_client = True
if is_router_client:
issues.append(f"Config: Node '{node_name}' is using deprecated role 'ROUTER_CLIENT'.")
# ... (rest of logic needs similar updates) ...
# This is getting complicated to support both.
# Let's try to force conversion to dict if possible?
# The Node object doesn't seem to have a to_dict() method easily documented.
# Alternative: The 'nodes' property in Interface returns a dict of Node objects.
# But maybe we can use `interface.nodesByNum`? No.
# Let's just implement the helper fully and use it.
# 3. Check Roles (Robust)
# If role is int, we might skip the string check or assume it's fine for now?
# Actually, we really want to catch ROUTER_CLIENT.
# If we can't import the enum here easily, maybe we skip.
# But wait, if 'user' is a dict, role is 'ROUTER_CLIENT'.
# If 'user' is an object, role is an int.
# Let's assume for now we are dealing with the dict case primarily,
# BUT the error says we have an object.
# So we MUST handle the object case.
# If it's an object, we can try to access the name of the enum?
# user.role is an int.
# We need to convert it.
# Let's try to import config_pb2 here too?
try:
from meshtastic.protobuf import config_pb2
# If role is int
if isinstance(role, int):
try:
from meshtastic.protobuf import config_pb2
role_name = config_pb2.Config.DeviceConfig.Role.Name(role)
if role_name == 'ROUTER_CLIENT':
issues.append(f"Config: Node '{node_name}' is using deprecated role 'ROUTER_CLIENT'.")
role = role_name # Normalize to string for later checks
except ImportError:
pass
except ImportError:
pass
elif role == 'ROUTER_CLIENT':
issues.append(f"Config: Node '{node_name}' is using deprecated role 'ROUTER_CLIENT'.")
# 4. Check for 'Router' role without GPS/Position
if not self.ignore_no_position and (role == 'ROUTER' or role == 'REPEATER'):
@@ -185,12 +103,6 @@ class NetworkHealthAnalyzer:
"""
issues = []
# Helper to get attribute or dict key
def get_val(obj, key, default=None):
if isinstance(obj, dict):
return obj.get(key, default)
return getattr(obj, key, default)
for pkt in history:
sender_id = pkt.get('fromId')
if sender_id:
@@ -198,31 +110,10 @@ class NetworkHealthAnalyzer:
if node:
hops_away = get_val(node, 'hopsAway', 0)
if hops_away > 3:
user = get_val(node, 'user', {})
node_name = get_val(user, 'longName', sender_id)
node_name = get_node_name(node, sender_id)
issues.append(f"Topology: Node '{node_name}' is {hops_away} hops away. (Ideally <= 3)")
return list(set(issues))
def _haversine(self, lat1, lon1, lat2, lon2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
import math
try:
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(math.radians, [float(lon1), float(lat1), float(lon2), float(lat2)])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
c = 2 * math.asin(math.sqrt(a))
r = 6371 # Radius of earth in kilometers. Use 3956 for miles
return c * r * 1000 # Return in meters
except Exception:
return 0
def check_router_density(self, nodes):
"""
Checks if ROUTER nodes are too close to each other (< 500m).
@@ -230,26 +121,14 @@ class NetworkHealthAnalyzer:
issues = []
routers = []
# Helper to get attribute or dict key
def get_val(obj, key, default=None):
if isinstance(obj, dict):
return obj.get(key, default)
return getattr(obj, key, default)
# Filter for routers with valid position
for node_id, node in nodes.items():
user = get_val(node, 'user', {})
role = get_val(user, 'role')
# Handle role enum if needed (simplified check for now, assuming string or int handled elsewhere or here)
# If role is int, we might miss it here unless we convert.
# But let's assume if it's an object, we might need to check int.
# For simplicity, let's skip strict role check here or assume string if dict.
# If object, role is int.
# 2 = ROUTER, 3 = ROUTER_CLIENT, 4 = REPEATER
is_router = False
if isinstance(role, int):
if role in [2, 3, 4]:
if role in [2, 3, 4]: # ROUTER, ROUTER_CLIENT, REPEATER
is_router = True
elif role in ['ROUTER', 'REPEATER', 'ROUTER_CLIENT']:
is_router = True
@@ -261,7 +140,7 @@ class NetworkHealthAnalyzer:
if is_router and lat is not None and lon is not None:
routers.append({
'id': node_id,
'name': get_val(user, 'longName', node_id),
'name': get_node_name(node, node_id),
'lat': lat,
'lon': lon
})
@@ -271,7 +150,7 @@ class NetworkHealthAnalyzer:
for j in range(i + 1, len(routers)):
r1 = routers[i]
r2 = routers[j]
dist = self._haversine(r1['lat'], r1['lon'], r2['lat'], r2['lon'])
dist = haversine(r1['lat'], r1['lon'], r2['lat'], r2['lon'])
if dist > 0 and dist < 500: # 500 meters threshold
issues.append(f"Topology: High Density! Routers '{r1['name']}' and '{r2['name']}' are only {dist:.0f}m apart. Consider changing one to CLIENT.")
@@ -284,12 +163,6 @@ class NetworkHealthAnalyzer:
"""
issues = []
# Helper to get attribute or dict key
def get_val(obj, key, default=None):
if isinstance(obj, dict):
return obj.get(key, default)
return getattr(obj, key, default)
my_pos = get_val(my_node, 'position', {})
my_lat = get_val(my_pos, 'latitude')
my_lon = get_val(my_pos, 'longitude')
@@ -311,17 +184,16 @@ class NetworkHealthAnalyzer:
continue
# Calculate distance
dist = self._haversine(my_lat, my_lon, lat, lon)
dist = haversine(my_lat, my_lon, lat, lon)
# Check SNR (if available in snr field or similar)
# Note: 'snr' is often in the node DB if we've heard from them recently
snr = get_val(node, 'snr')
if snr is not None:
# Heuristic: If < 1km and SNR < 0, that's suspicious for LoRa (unless heavy obstruction)
# Ideally, close nodes should have high SNR (> 5-10)
if dist < 1000 and snr < -5:
node_name = get_val(user, 'longName', node_id)
node_name = get_node_name(node, node_id)
issues.append(f"Performance: Node '{node_name}' is close ({dist:.0f}m) but has poor SNR ({snr:.1f}dB). Check antenna/LOS.")
return issues

View File

@@ -2,6 +2,7 @@ import logging
import time
import os
from datetime import datetime
from .utils import get_val, haversine, get_node_name
from mesh_monitor.route_analyzer import RouteAnalyzer
@@ -168,19 +169,8 @@ class NetworkReporter:
f.write("| Node ID | Name | Status | Distance (km) | RTT (s) | Hops (To/Back) | SNR |\n")
f.write("|---|---|---|---|---|---|---|\n")
def get_node_name(node_id):
node = nodes.get(node_id)
if node:
user = node.get('user', {}) if isinstance(node, dict) else getattr(node, 'user', {})
# Handle nested object/dict for user
if hasattr(user, 'longName'): return user.longName
if isinstance(user, dict): return user.get('longName', node_id)
return node_id
def get_distance(node_id):
"""Calculate distance from local node to target node in km."""
import math
if not local_node:
return '-'
@@ -200,13 +190,9 @@ class NetworkReporter:
return '-'
# Get local position from nodes dict
local_pos = local_node_data.get('position', {}) if isinstance(local_node_data, dict) else getattr(local_node_data, 'position', {})
if isinstance(local_pos, dict):
my_lat = local_pos.get('latitude')
my_lon = local_pos.get('longitude')
else:
my_lat = getattr(local_pos, 'latitude', None)
my_lon = getattr(local_pos, 'longitude', None)
local_pos = get_val(local_node_data, 'position', {})
my_lat = get_val(local_pos, 'latitude')
my_lon = get_val(local_pos, 'longitude')
if my_lat is None or my_lon is None:
return '-'
@@ -216,32 +202,23 @@ class NetworkReporter:
if not node:
return '-'
target_pos = node.get('position', {}) if isinstance(node, dict) else getattr(node, 'position', {})
if isinstance(target_pos, dict):
target_lat = target_pos.get('latitude')
target_lon = target_pos.get('longitude')
else:
target_lat = getattr(target_pos, 'latitude', None)
target_lon = getattr(target_pos, 'longitude', None)
target_pos = get_val(node, 'position', {})
target_lat = get_val(target_pos, 'latitude')
target_lon = get_val(target_pos, 'longitude')
if target_lat is None or target_lon is None:
return '-'
# Haversine formula
try:
lon1, lat1, lon2, lat2 = map(math.radians, [float(my_lon), float(my_lat), float(target_lon), float(target_lat)])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
c = 2 * math.asin(math.sqrt(a))
km = c * 6371 # Earth radius in kilometers
return f"{km:.2f}"
except:
return '-'
dist_meters = haversine(my_lat, my_lon, target_lat, target_lon)
if dist_meters > 0:
return f"{dist_meters/1000:.2f}"
return '-'
for res in test_results:
node_id = res.get('node_id')
name = get_node_name(node_id)
node = nodes.get(node_id, {})
name = get_node_name(node, node_id)
status = res.get('status', 'unknown')
distance = get_distance(node_id)
rtt = res.get('rtt', '-')

75
mesh_monitor/utils.py Normal file
View File

@@ -0,0 +1,75 @@
import math
import logging
logger = logging.getLogger(__name__)
def haversine(lat1, lon1, lat2, lon2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees).
Returns distance in meters.
"""
try:
# Check for None values
if lat1 is None or lon1 is None or lat2 is None or lon2 is None:
return 0
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(math.radians, [float(lon1), float(lat1), float(lon2), float(lat2)])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
c = 2 * math.asin(math.sqrt(a))
r = 6371000 # Radius of earth in meters
return c * r
except Exception as e:
logger.debug(f"Error calculating haversine distance: {e}")
return 0
def get_val(obj, key, default=None):
"""
Safely retrieves a value from an object or dictionary.
Handles nested attribute access if key contains dots (e.g. 'user.id').
"""
try:
# Handle dot notation for nested access
if '.' in key:
parts = key.split('.')
current = obj
for part in parts:
current = get_val(current, part)
if current is None:
return default
return current
if isinstance(obj, dict):
return obj.get(key, default)
return getattr(obj, key, default)
except Exception:
return default
def get_node_name(node, node_id=None):
"""
Helper to get a human-readable name for a node.
"""
user = get_val(node, 'user', {})
long_name = get_val(user, 'longName')
short_name = get_val(user, 'shortName')
if long_name:
return long_name
if short_name:
return short_name
# If no name found, return ID
if node_id:
return node_id
# Try to find ID in user object
user_id = get_val(user, 'id')
if user_id:
return user_id
return "Unknown"

77
tests/verify_refactor.py Normal file
View File

@@ -0,0 +1,77 @@
import sys
import os
import logging
import threading
# Add project root to path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("Verification")
def verify_utils():
logger.info("Verifying utils.py...")
from mesh_monitor.utils import haversine, get_val, get_node_name
# Test haversine
dist = haversine(0, 0, 1, 1)
assert dist > 0, "Haversine calculation failed"
logger.info(f"Haversine check passed: {dist:.2f}m")
# Test get_val
obj = {'a': {'b': 1}}
val = get_val(obj, 'a.b')
assert val == 1, "get_val nested dict failed"
class TestObj:
def __init__(self):
self.x = 10
self.y = {'z': 20}
obj2 = TestObj()
val2 = get_val(obj2, 'x')
assert val2 == 10, "get_val object attr failed"
val3 = get_val(obj2, 'y.z')
assert val3 == 20, "get_val object nested dict failed"
logger.info("Utils check passed.")
def verify_modules():
logger.info("Verifying module imports and instantiation...")
# Mock Interface
class MockInterface:
def __init__(self):
self.nodes = {}
self.localNode = None
interface = MockInterface()
# Test Analyzer
from mesh_monitor.analyzer import NetworkHealthAnalyzer
analyzer = NetworkHealthAnalyzer()
issues = analyzer.analyze({})
assert isinstance(issues, list), "Analyzer did not return list"
logger.info("Analyzer instantiated and ran.")
# Test ActiveTester
from mesh_monitor.active_tests import ActiveTester
tester = ActiveTester(interface)
assert hasattr(tester, 'lock'), "ActiveTester missing lock"
assert isinstance(tester.lock, type(threading.Lock())), "ActiveTester lock is not a Lock"
logger.info("ActiveTester instantiated and has lock.")
# Test Reporter
from mesh_monitor.reporter import NetworkReporter
reporter = NetworkReporter()
logger.info("Reporter instantiated.")
if __name__ == "__main__":
try:
verify_utils()
verify_modules()
print("\nSUCCESS: All verification checks passed!")
except Exception as e:
print(f"\nFAILURE: Verification failed: {e}")
sys.exit(1)