mirror of
https://github.com/ipnet-mesh/meshcore-hub.git
synced 2026-03-28 17:42:56 +01:00
Merge pull request #127 from ipnet-mesh/chore/fix-metrics-labels
Add role label to node last seen metric and filter alerts by role
This commit is contained in:
@@ -7,10 +7,10 @@ groups:
|
||||
- name: meshcore
|
||||
rules:
|
||||
- alert: NodeNotSeen
|
||||
expr: time() - meshcore_node_last_seen_timestamp_seconds > 48 * 3600
|
||||
expr: time() - meshcore_node_last_seen_timestamp_seconds{role="infra"} > 48 * 3600
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Node {{ $labels.node_name }} not seen for 48+ hours"
|
||||
description: "Node {{ $labels.public_key }} ({{ $labels.adv_type }}) last seen {{ $value | humanizeDuration }} ago."
|
||||
summary: "Node {{ $labels.node_name }} ({{ $labels.role }}) not seen for 48+ hours"
|
||||
description: "Node {{ $labels.public_key }} ({{ $labels.adv_type }}, role={{ $labels.role }}) last seen {{ $value | humanizeDuration }} ago."
|
||||
|
||||
@@ -16,6 +16,7 @@ from meshcore_hub.common.models import (
|
||||
Member,
|
||||
Message,
|
||||
Node,
|
||||
NodeTag,
|
||||
Telemetry,
|
||||
TracePath,
|
||||
)
|
||||
@@ -144,19 +145,31 @@ def collect_metrics(session: Any) -> bytes:
|
||||
node_last_seen = Gauge(
|
||||
"meshcore_node_last_seen_timestamp_seconds",
|
||||
"Unix timestamp of when the node was last seen",
|
||||
["public_key", "node_name", "adv_type"],
|
||||
["public_key", "node_name", "adv_type", "role"],
|
||||
registry=registry,
|
||||
)
|
||||
role_subq = (
|
||||
select(NodeTag.node_id, NodeTag.value.label("role"))
|
||||
.where(NodeTag.key == "role")
|
||||
.subquery()
|
||||
)
|
||||
nodes_with_last_seen = session.execute(
|
||||
select(Node.public_key, Node.name, Node.adv_type, Node.last_seen).where(
|
||||
Node.last_seen.isnot(None)
|
||||
select(
|
||||
Node.public_key,
|
||||
Node.name,
|
||||
Node.adv_type,
|
||||
Node.last_seen,
|
||||
role_subq.c.role,
|
||||
)
|
||||
.outerjoin(role_subq, Node.id == role_subq.c.node_id)
|
||||
.where(Node.last_seen.isnot(None))
|
||||
).all()
|
||||
for public_key, name, adv_type, last_seen in nodes_with_last_seen:
|
||||
for public_key, name, adv_type, last_seen, role in nodes_with_last_seen:
|
||||
node_last_seen.labels(
|
||||
public_key=public_key,
|
||||
node_name=name or "",
|
||||
adv_type=adv_type or "unknown",
|
||||
role=role or "",
|
||||
).set(last_seen.timestamp())
|
||||
|
||||
# -- Messages total by type --
|
||||
|
||||
@@ -13,7 +13,7 @@ from meshcore_hub.api.dependencies import (
|
||||
get_db_session,
|
||||
get_mqtt_client,
|
||||
)
|
||||
from meshcore_hub.common.models import Node
|
||||
from meshcore_hub.common.models import Node, NodeTag
|
||||
|
||||
|
||||
def _make_basic_auth(username: str, password: str) -> str:
|
||||
@@ -198,7 +198,36 @@ class TestMetricsData:
|
||||
"meshcore_node_last_seen_timestamp_seconds"
|
||||
'{adv_type="REPEATER",'
|
||||
'node_name="Seen Node",'
|
||||
'public_key="lastseen1234lastseen1234lastseen"}'
|
||||
'public_key="lastseen1234lastseen1234lastseen",'
|
||||
'role=""}'
|
||||
) in response.text
|
||||
|
||||
def test_node_last_seen_timestamp_with_role(self, api_db_session, client_no_auth):
|
||||
"""Test that node_last_seen_timestamp includes role label from node tags."""
|
||||
seen_at = datetime(2025, 6, 15, 12, 0, 0, tzinfo=timezone.utc)
|
||||
node = Node(
|
||||
public_key="rolenode1234rolenode1234rolenode",
|
||||
name="Infra Node",
|
||||
adv_type="REPEATER",
|
||||
first_seen=seen_at,
|
||||
last_seen=seen_at,
|
||||
)
|
||||
api_db_session.add(node)
|
||||
api_db_session.flush()
|
||||
|
||||
tag = NodeTag(node_id=node.id, key="role", value="infra")
|
||||
api_db_session.add(tag)
|
||||
api_db_session.commit()
|
||||
|
||||
_clear_metrics_cache()
|
||||
response = client_no_auth.get("/metrics")
|
||||
assert response.status_code == 200
|
||||
assert (
|
||||
"meshcore_node_last_seen_timestamp_seconds"
|
||||
'{adv_type="REPEATER",'
|
||||
'node_name="Infra Node",'
|
||||
'public_key="rolenode1234rolenode1234rolenode",'
|
||||
'role="infra"}'
|
||||
) in response.text
|
||||
|
||||
def test_node_last_seen_timestamp_skips_null(self, api_db_session, client_no_auth):
|
||||
|
||||
Reference in New Issue
Block a user