Files
meshcore-hub/tests/conftest.py
T
Louis King c48db03afb feat(spam): score messages at ingest and hide likely spam
Add an optional, off-by-default spam-detection feature that scores each
message's spam likelihood at ingest, stores the score on the row, and lets
the display layer hide likely-spam by default behind a "show potential spam"
toggle. Nothing is ever dropped at ingest, so the threshold can be retuned
without reprocessing.

Scoring (collector/spam.py): windowed COUNT(*) over new
(path_prefix, received_at) and (sender_normalized, received_at) indexes —
joint path+sender signal plus a sender-name signal (trailing-digit suffix
stripped so bob1/bob2 collapse to bob). When the path is short/zero-hop or
absent, the name signal stands alone at full weight so local spam is still
flaggable. A background sweep re-scores recent rows with hindsight to catch
the leading edge of bursts. The collector logs each score (WARNING at/above
the threshold).

Display: the messages API gains include_spam and a master-switch-aware
hide-filter; the SPA shows the toggle + a badge only when the feature is on.

Config: FEATURE_SPAM_DETECTION is the single operator switch, bridged in
Compose to the backend SPAM_DETECTION_ENABLED for collector + api (mirrors
the FEATURE_PACKETS / RAW_PACKET_CAPTURE_ENABLED pattern). Both default off.

Works on SQLite and Postgres: DB-agnostic queries, an Alembic batch migration
for the three new columns + two indexes, and backend-aware collector test
fixtures (lifted db_backend/db_url into the shared conftest).

Also: move the meshcore-hub image pull_policy out of the base compose file.
It lived in docker-compose.yml as pull_policy: daily and made `make up` pull
the published image over a freshly built local one. Base is now policy-neutral
(default missing); dev sets pull_policy: build on the hub services so it only
ever uses local builds. Prod refreshes images via a manual `docker compose
... pull`.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-23 00:11:39 +01:00

222 lines
7.2 KiB
Python

"""Shared pytest fixtures for all tests."""
import os
import tempfile
from typing import Generator
import dotenv
import pytest
from sqlalchemy import create_engine, text
from sqlalchemy.engine.url import make_url
from sqlalchemy.orm import sessionmaker
from meshcore_hub.common import config as config_module
from meshcore_hub.common.models import Base
# The CLI entrypoint (meshcore_hub.__main__) calls load_dotenv() at import time so
# deployments can drop a .env in place. Importing it during collection (e.g. from
# test_main.py) would otherwise leak a developer's repo-root .env straight into
# os.environ for the whole session — bypassing _ignore_dotenv, which only stops
# pydantic-settings from reading the file. conftest.py is imported before any test
# module is collected, so neutralising load_dotenv here binds first.
dotenv.load_dotenv = lambda *args, **kwargs: False
def _settings_classes():
"""CommonSettings and every subclass (recursively)."""
seen: set[type] = set()
stack = [config_module.CommonSettings]
while stack:
cls = stack.pop()
if cls in seen:
continue
seen.add(cls)
stack.extend(cls.__subclasses__())
return seen
def _cli_envvars() -> set[str]:
"""Collect Click envvar names from CLI commands (best-effort).
CLI options read env vars via ``envvar=`` independently of pydantic
Settings, so ``_settings_classes`` alone misses them (e.g. ``API_WORKERS``).
"""
import importlib
import click
envvars: set[str] = set()
def _collect(cmd: click.BaseCommand) -> None:
if isinstance(cmd, click.Group):
for subcmd in cmd.commands.values():
_collect(subcmd)
if isinstance(cmd, click.Command):
for param in cmd.params:
if isinstance(param, click.Option) and param.envvar:
ev = param.envvar
if isinstance(ev, str):
envvars.add(ev)
else:
envvars.update(ev)
for module_path in (
"meshcore_hub.api.cli",
"meshcore_hub.collector.cli",
"meshcore_hub.web.cli",
):
try:
mod = importlib.import_module(module_path)
for attr in vars(mod).values():
if isinstance(attr, click.BaseCommand):
_collect(attr)
except Exception:
pass
return envvars
@pytest.fixture(autouse=True)
def _ignore_dotenv(monkeypatch):
"""Stop pydantic-settings and Click from reading ``.env`` or leaked env vars.
Three-pronged defence:
1. Disable ``env_file`` on every settings subclass so pydantic-settings
won't read the ``.env`` file itself.
2. Delete any env vars matching a settings field name from ``os.environ``
for the duration of the test.
3. Delete any env vars matching a Click CLI ``envvar=`` name (e.g.
``API_WORKERS``) that aren't settings fields.
This catches vars exported into the shell via direnv, Makefile, CI, etc.
before pytest started. Tests must depend only on defaults and explicit
env overrides (``monkeypatch.setenv``).
"""
for cls in _settings_classes():
cfg = dict(cls.model_config)
cfg["env_file"] = None
monkeypatch.setattr(cls, "model_config", cfg)
for field_name in cls.model_fields:
monkeypatch.delenv(field_name.upper(), raising=False)
for ev in _cli_envvars():
monkeypatch.delenv(ev, raising=False)
@pytest.fixture
def db_engine():
"""Create an in-memory SQLite database engine for testing."""
engine = create_engine(
"sqlite:///:memory:",
connect_args={"check_same_thread": False},
)
Base.metadata.create_all(engine)
yield engine
Base.metadata.drop_all(engine)
engine.dispose()
@pytest.fixture
def db_session(db_engine):
"""Create a database session for testing."""
Session = sessionmaker(bind=db_engine)
session = Session()
yield session
session.close()
@pytest.fixture(scope="session")
def test_db_path():
"""Session-scoped temporary SQLite database file path.
One file per pytest session; engines below build schema on it once.
"""
fd, path = tempfile.mkstemp(suffix=".db")
os.close(fd)
yield path
if os.path.exists(path):
os.unlink(path)
@pytest.fixture(scope="session")
def db_backend() -> str:
"""Active test database backend (``sqlite`` or ``postgres``).
Controlled by ``TEST_DATABASE_BACKEND`` env var (default: ``sqlite``).
When ``postgres``, ``TEST_POSTGRES_URL`` must also be set.
"""
backend = os.environ.get("TEST_DATABASE_BACKEND", "sqlite").lower()
if backend not in ("sqlite", "postgres"):
raise ValueError(
f"TEST_DATABASE_BACKEND must be 'sqlite' or 'postgres', got: {backend}"
)
return backend
@pytest.fixture(scope="session")
def db_url(db_backend: str, test_db_path: str, request) -> Generator[str, None, None]:
"""Database URL for the active backend.
For Postgres, each pytest-xdist worker gets its own database (e.g.
``test_gw0``) to avoid truncation races between parallel workers. Shared by
the API and collector suites so both exercise the same backend.
"""
if db_backend == "postgres":
env_url = os.environ.get("TEST_POSTGRES_URL")
if not env_url:
pytest.skip(
"TEST_DATABASE_BACKEND=postgres but TEST_POSTGRES_URL is not set; "
"e.g. TEST_POSTGRES_URL=postgresql+psycopg2://postgres:postgres@localhost:55432/test"
)
assert env_url is not None
worker_id = "master"
if hasattr(request.config, "workerinput"):
worker_id = request.config.workerinput["workerid"]
base_url = make_url(env_url)
worker_db = f"{base_url.database}_{worker_id}"
worker_url = base_url.set(database=worker_db).render_as_string(
hide_password=False
)
admin_url = base_url.set(database="postgres")
admin_engine = create_engine(
admin_url.render_as_string(hide_password=False),
isolation_level="AUTOCOMMIT",
)
try:
with admin_engine.connect() as conn:
exists = conn.execute(
text("SELECT 1 FROM pg_database WHERE datname = :name"),
{"name": worker_db},
).scalar()
if not exists:
conn.execute(text(f'CREATE DATABASE "{worker_db}"'))
finally:
admin_engine.dispose()
yield worker_url
admin_engine = create_engine(
admin_url.render_as_string(hide_password=False),
isolation_level="AUTOCOMMIT",
)
try:
with admin_engine.connect() as conn:
conn.execute(
text(
"SELECT pg_terminate_backend(pid) "
"FROM pg_stat_activity "
"WHERE datname = :name AND pid <> pg_backend_pid()"
),
{"name": worker_db},
)
conn.execute(text(f'DROP DATABASE IF EXISTS "{worker_db}"'))
finally:
admin_engine.dispose()
else:
yield f"sqlite:///{test_db_path}"