mirror of
https://github.com/ipnet-mesh/meshcore-hub.git
synced 2026-03-28 17:42:56 +01:00
Add role label to node last seen metric and filter alerts by role
Joins NodeTag (key='role') to the node last seen Prometheus metric so alert rules can target infrastructure nodes only (role="infra"). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -7,10 +7,10 @@ groups:
|
|||||||
- name: meshcore
|
- name: meshcore
|
||||||
rules:
|
rules:
|
||||||
- alert: NodeNotSeen
|
- alert: NodeNotSeen
|
||||||
expr: time() - meshcore_node_last_seen_timestamp_seconds > 48 * 3600
|
expr: time() - meshcore_node_last_seen_timestamp_seconds{role="infra"} > 48 * 3600
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
summary: "Node {{ $labels.node_name }} not seen for 48+ hours"
|
summary: "Node {{ $labels.node_name }} ({{ $labels.role }}) not seen for 48+ hours"
|
||||||
description: "Node {{ $labels.public_key }} ({{ $labels.adv_type }}) last seen {{ $value | humanizeDuration }} ago."
|
description: "Node {{ $labels.public_key }} ({{ $labels.adv_type }}, role={{ $labels.role }}) last seen {{ $value | humanizeDuration }} ago."
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ from meshcore_hub.common.models import (
|
|||||||
Member,
|
Member,
|
||||||
Message,
|
Message,
|
||||||
Node,
|
Node,
|
||||||
|
NodeTag,
|
||||||
Telemetry,
|
Telemetry,
|
||||||
TracePath,
|
TracePath,
|
||||||
)
|
)
|
||||||
@@ -144,19 +145,31 @@ def collect_metrics(session: Any) -> bytes:
|
|||||||
node_last_seen = Gauge(
|
node_last_seen = Gauge(
|
||||||
"meshcore_node_last_seen_timestamp_seconds",
|
"meshcore_node_last_seen_timestamp_seconds",
|
||||||
"Unix timestamp of when the node was last seen",
|
"Unix timestamp of when the node was last seen",
|
||||||
["public_key", "node_name", "adv_type"],
|
["public_key", "node_name", "adv_type", "role"],
|
||||||
registry=registry,
|
registry=registry,
|
||||||
)
|
)
|
||||||
|
role_subq = (
|
||||||
|
select(NodeTag.node_id, NodeTag.value.label("role"))
|
||||||
|
.where(NodeTag.key == "role")
|
||||||
|
.subquery()
|
||||||
|
)
|
||||||
nodes_with_last_seen = session.execute(
|
nodes_with_last_seen = session.execute(
|
||||||
select(Node.public_key, Node.name, Node.adv_type, Node.last_seen).where(
|
select(
|
||||||
Node.last_seen.isnot(None)
|
Node.public_key,
|
||||||
|
Node.name,
|
||||||
|
Node.adv_type,
|
||||||
|
Node.last_seen,
|
||||||
|
role_subq.c.role,
|
||||||
)
|
)
|
||||||
|
.outerjoin(role_subq, Node.id == role_subq.c.node_id)
|
||||||
|
.where(Node.last_seen.isnot(None))
|
||||||
).all()
|
).all()
|
||||||
for public_key, name, adv_type, last_seen in nodes_with_last_seen:
|
for public_key, name, adv_type, last_seen, role in nodes_with_last_seen:
|
||||||
node_last_seen.labels(
|
node_last_seen.labels(
|
||||||
public_key=public_key,
|
public_key=public_key,
|
||||||
node_name=name or "",
|
node_name=name or "",
|
||||||
adv_type=adv_type or "unknown",
|
adv_type=adv_type or "unknown",
|
||||||
|
role=role or "",
|
||||||
).set(last_seen.timestamp())
|
).set(last_seen.timestamp())
|
||||||
|
|
||||||
# -- Messages total by type --
|
# -- Messages total by type --
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from meshcore_hub.api.dependencies import (
|
|||||||
get_db_session,
|
get_db_session,
|
||||||
get_mqtt_client,
|
get_mqtt_client,
|
||||||
)
|
)
|
||||||
from meshcore_hub.common.models import Node
|
from meshcore_hub.common.models import Node, NodeTag
|
||||||
|
|
||||||
|
|
||||||
def _make_basic_auth(username: str, password: str) -> str:
|
def _make_basic_auth(username: str, password: str) -> str:
|
||||||
@@ -198,7 +198,36 @@ class TestMetricsData:
|
|||||||
"meshcore_node_last_seen_timestamp_seconds"
|
"meshcore_node_last_seen_timestamp_seconds"
|
||||||
'{adv_type="REPEATER",'
|
'{adv_type="REPEATER",'
|
||||||
'node_name="Seen Node",'
|
'node_name="Seen Node",'
|
||||||
'public_key="lastseen1234lastseen1234lastseen"}'
|
'public_key="lastseen1234lastseen1234lastseen",'
|
||||||
|
'role=""}'
|
||||||
|
) in response.text
|
||||||
|
|
||||||
|
def test_node_last_seen_timestamp_with_role(self, api_db_session, client_no_auth):
|
||||||
|
"""Test that node_last_seen_timestamp includes role label from node tags."""
|
||||||
|
seen_at = datetime(2025, 6, 15, 12, 0, 0, tzinfo=timezone.utc)
|
||||||
|
node = Node(
|
||||||
|
public_key="rolenode1234rolenode1234rolenode",
|
||||||
|
name="Infra Node",
|
||||||
|
adv_type="REPEATER",
|
||||||
|
first_seen=seen_at,
|
||||||
|
last_seen=seen_at,
|
||||||
|
)
|
||||||
|
api_db_session.add(node)
|
||||||
|
api_db_session.flush()
|
||||||
|
|
||||||
|
tag = NodeTag(node_id=node.id, key="role", value="infra")
|
||||||
|
api_db_session.add(tag)
|
||||||
|
api_db_session.commit()
|
||||||
|
|
||||||
|
_clear_metrics_cache()
|
||||||
|
response = client_no_auth.get("/metrics")
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert (
|
||||||
|
"meshcore_node_last_seen_timestamp_seconds"
|
||||||
|
'{adv_type="REPEATER",'
|
||||||
|
'node_name="Infra Node",'
|
||||||
|
'public_key="rolenode1234rolenode1234rolenode",'
|
||||||
|
'role="infra"}'
|
||||||
) in response.text
|
) in response.text
|
||||||
|
|
||||||
def test_node_last_seen_timestamp_skips_null(self, api_db_session, client_no_auth):
|
def test_node_last_seen_timestamp_skips_null(self, api_db_session, client_no_auth):
|
||||||
|
|||||||
Reference in New Issue
Block a user