Phase 2: DATABASE_BACKEND switch, component config, schema scoping

- config: add DatabaseBackend enum + DATABASE_* component vars on
  CommonSettings (host/port/name/schema/user/password, defaults
  'meshcorehub'); single effective_database_url resolver with precedence
  DATABASE_URL > postgres (assembled, fail-fast on missing vars) > SQLite
  default. effective_database_schema returns the schema only for Postgres.
  Collapses the duplicated resolver/field out of Collector/API settings.
- database: create_database_engine/DatabaseManager accept a schema arg and
  scope Postgres connections via search_path (psycopg2 -c options; asyncpg
  server_settings). No-op for SQLite.
- alembic/env: migrate into the instance schema (version_table_schema +
  CREATE SCHEMA + search_path) for Postgres; fix online render_as_batch to
  be SQLite-only (matching the offline path).
- .env.example: document the DATABASE_* block.

SQLite behaviour unchanged: default no-env path resolves to the same URL
(new regression tests), full suite green (1051 passed), fresh db upgrade OK.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Louis King
2026-06-13 22:01:00 +01:00
parent 9eab07d244
commit f342f5bc70
5 changed files with 240 additions and 52 deletions
+23
View File
@@ -66,6 +66,29 @@ LOG_LEVEL=INFO
# └── meshcore.db # SQLite database
DATA_HOME=./data
# -----------------------------------------------------------------------------
# Database
# -----------------------------------------------------------------------------
# SQLite is the zero-config default and needs nothing here — it lives at
# ${DATA_HOME}/collector/meshcore.db.
#
# To use PostgreSQL instead, set DATABASE_BACKEND=postgres and fill in the
# DATABASE_* values below (the bundled postgres container derives its
# POSTGRES_USER/PASSWORD/DB from DATABASE_USER/PASSWORD/NAME). You must also
# activate the compose 'postgres' profile, e.g. `docker compose --profile postgres up`.
#
# DATABASE_BACKEND=postgres
# DATABASE_HOST=postgres
# DATABASE_PORT=5432
# DATABASE_NAME=meshcorehub
# DATABASE_SCHEMA=meshcorehub # override per instance (e.g. prod, stg) on a shared cluster
# DATABASE_USER=meshcorehub
# DATABASE_PASSWORD= # required for postgres; e.g. `openssl rand -base64 32`
#
# Advanced: set DATABASE_URL to a full SQLAlchemy URL to override all of the above
# (e.g. a managed/external Postgres). Takes precedence over DATABASE_BACKEND.
# DATABASE_URL=postgresql+psycopg2://user:pass@host:5432/dbname
# Directory containing seed data files for import
# Default: ./seed (relative to docker-compose.yml location)
# Inside containers this is mapped to /seed
+30 -3
View File
@@ -4,7 +4,7 @@ import os
from logging.config import fileConfig
from alembic import context
from sqlalchemy import engine_from_config, pool
from sqlalchemy import engine_from_config, pool, text
from meshcore_hub.common.models import Base
@@ -41,6 +41,18 @@ def get_database_url() -> str:
return config.get_main_option("sqlalchemy.url", "sqlite:///./meshcore.db")
def get_schema(url: str) -> str | None:
"""Postgres schema to migrate into, or None for SQLite.
Each Hub instance keeps its tables and alembic_version in its own schema so
multiple instances (prod, stg, ...) can share one Postgres database with
independent migration state.
"""
if url.startswith(("postgresql", "postgres")):
return os.environ.get("DATABASE_SCHEMA", "meshcorehub")
return None
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode.
@@ -53,6 +65,7 @@ def run_migrations_offline() -> None:
script output.
"""
url = get_database_url()
schema = get_schema(url)
context.configure(
url=url,
target_metadata=target_metadata,
@@ -61,6 +74,8 @@ def run_migrations_offline() -> None:
# Batch mode is a SQLite-only workaround for its limited ALTER TABLE;
# Postgres performs ALTERs directly.
render_as_batch=url.startswith("sqlite"),
version_table_schema=schema,
include_schemas=schema is not None,
)
with context.begin_transaction():
@@ -74,7 +89,9 @@ def run_migrations_online() -> None:
and associate a connection with the context.
"""
configuration = config.get_section(config.config_ini_section, {})
configuration["sqlalchemy.url"] = get_database_url()
url = get_database_url()
configuration["sqlalchemy.url"] = url
schema = get_schema(url)
connectable = engine_from_config(
configuration,
@@ -83,10 +100,20 @@ def run_migrations_online() -> None:
)
with connectable.connect() as connection:
# Ensure the instance's schema exists and scope this connection to it so
# tables (and alembic_version) are created there. No-op for SQLite.
if schema is not None:
connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema}"'))
connection.execute(text(f'SET search_path TO "{schema}"'))
connection.commit()
context.configure(
connection=connection,
target_metadata=target_metadata,
render_as_batch=True, # SQLite batch mode for ALTER TABLE
# Batch mode is a SQLite-only workaround for its limited ALTER TABLE.
render_as_batch=url.startswith("sqlite"),
version_table_schema=schema,
include_schemas=schema is not None,
)
with context.begin_transaction():
+92 -45
View File
@@ -3,7 +3,7 @@
from enum import Enum
from typing import Optional
from pydantic import Field, field_validator
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -24,6 +24,13 @@ class MQTTTransport(str, Enum):
WEBSOCKETS = "websockets"
class DatabaseBackend(str, Enum):
"""Database backend selector."""
SQLITE = "sqlite"
POSTGRES = "postgres"
class CommonSettings(BaseSettings):
"""Common settings shared by all components."""
@@ -39,6 +46,88 @@ class CommonSettings(BaseSettings):
description="Base directory for service data (e.g., ./data or /data)",
)
# Database backend selection and connection components.
# SQLite is the zero-config default; set DATABASE_BACKEND=postgres (plus the
# DATABASE_* component vars) to use Postgres. An explicit DATABASE_URL overrides
# everything (managed/external Postgres, tests).
database_backend: DatabaseBackend = Field(
default=DatabaseBackend.SQLITE,
description="Database backend: 'sqlite' (default) or 'postgres'",
)
database_url: Optional[str] = Field(
default=None,
description=(
"Explicit SQLAlchemy database URL; overrides DATABASE_BACKEND/component vars. "
"Default: sqlite:///{data_home}/collector/meshcore.db"
),
)
database_host: Optional[str] = Field(
default=None,
description="Postgres host (required when DATABASE_BACKEND=postgres)",
)
database_port: int = Field(default=5432, description="Postgres port")
database_name: str = Field(
default="meshcorehub", description="Postgres database name"
)
database_schema: str = Field(
default="meshcorehub",
description="Postgres schema (namespace); override per instance on a shared cluster",
)
database_user: str = Field(default="meshcorehub", description="Postgres role/user")
database_password: Optional[str] = Field(
default=None,
description="Postgres password (required when DATABASE_BACKEND=postgres)",
)
@property
def effective_database_url(self) -> str:
"""Resolve the SQLAlchemy database URL.
Precedence: explicit DATABASE_URL > postgres (assembled from components) >
SQLite default under DATA_HOME. Fails fast for a misconfigured postgres backend
rather than silently falling back to SQLite.
"""
if self.database_url:
return self.database_url
if self.database_backend == DatabaseBackend.POSTGRES:
missing = [
name
for name, value in (
("DATABASE_HOST", self.database_host),
("DATABASE_NAME", self.database_name),
("DATABASE_USER", self.database_user),
("DATABASE_PASSWORD", self.database_password),
)
if not value
]
if missing:
raise ValueError(
"DATABASE_BACKEND=postgres requires: " + ", ".join(missing)
)
from urllib.parse import quote_plus
user = quote_plus(self.database_user)
password = quote_plus(self.database_password or "")
return (
f"postgresql+psycopg2://{user}:{password}"
f"@{self.database_host}:{self.database_port}/{self.database_name}"
)
from pathlib import Path
db_path = Path(self.data_home) / "collector" / "meshcore.db"
return f"sqlite:///{db_path}"
@property
def effective_database_schema(self) -> Optional[str]:
"""Postgres schema to scope connections to, or None for SQLite.
Returns the schema only when the effective URL is Postgres; SQLite has no
schema concept, so callers leave search_path untouched.
"""
if self.effective_database_url.startswith(("postgresql", "postgres")):
return self.database_schema
return None
# Logging
log_level: LogLevel = Field(default=LogLevel.INFO, description="Logging level")
@@ -68,11 +157,7 @@ class CommonSettings(BaseSettings):
class CollectorSettings(CommonSettings):
"""Settings for the Collector component."""
# Database - default uses data_home/collector/meshcore.db
database_url: Optional[str] = Field(
default=None,
description="SQLAlchemy database URL (default: sqlite:///{data_home}/collector/meshcore.db)",
)
# Database config (backend selector + connection) is inherited from CommonSettings.
# Seed home directory - contains initial data files (node_tags.yaml)
seed_home: str = Field(
@@ -171,16 +256,6 @@ class CollectorSettings(CommonSettings):
return str(Path(self.data_home) / "collector")
@property
def effective_database_url(self) -> str:
"""Get the effective database URL, using default if not set."""
if self.database_url:
return self.database_url
from pathlib import Path
db_path = Path(self.data_home) / "collector" / "meshcore.db"
return f"sqlite:///{db_path}"
@property
def effective_seed_home(self) -> str:
"""Get the effective seed home directory."""
@@ -202,13 +277,6 @@ class CollectorSettings(CommonSettings):
return str(Path(self.effective_seed_home) / "channels.yaml")
@field_validator("database_url")
@classmethod
def validate_database_url(cls, v: Optional[str]) -> Optional[str]:
"""Validate database URL format."""
# None is allowed - will use default
return v
class APISettings(CommonSettings):
"""Settings for the API component."""
@@ -217,11 +285,7 @@ class APISettings(CommonSettings):
api_host: str = Field(default="0.0.0.0", description="API server host")
api_port: int = Field(default=8000, description="API server port")
# Database - default uses data_home/collector/meshcore.db (same as collector)
database_url: Optional[str] = Field(
default=None,
description="SQLAlchemy database URL (default: sqlite:///{data_home}/collector/meshcore.db)",
)
# Database config (backend selector + connection) is inherited from CommonSettings.
# Authentication
api_read_key: Optional[str] = Field(default=None, description="Read-only API key")
@@ -252,23 +316,6 @@ class APISettings(CommonSettings):
description="Cache TTL for dashboard endpoints (seconds)",
)
@property
def effective_database_url(self) -> str:
"""Get the effective database URL, using default if not set."""
if self.database_url:
return self.database_url
from pathlib import Path
db_path = Path(self.data_home) / "collector" / "meshcore.db"
return f"sqlite:///{db_path}"
@field_validator("database_url")
@classmethod
def validate_database_url(cls, v: Optional[str]) -> Optional[str]:
"""Validate database URL format."""
# None is allowed - will use default
return v
class WebSettings(CommonSettings):
"""Settings for the Web Dashboard component."""
+25 -4
View File
@@ -30,23 +30,32 @@ def _to_async_url(database_url: str) -> str:
def create_database_engine(
database_url: str,
echo: bool = False,
schema: str | None = None,
) -> Engine:
"""Create a SQLAlchemy database engine.
Args:
database_url: SQLAlchemy database URL
echo: Enable SQL query logging
schema: Postgres schema to scope connections to via search_path. Ignored for
SQLite (which has no schema concept).
Returns:
SQLAlchemy Engine instance
"""
connect_args = {}
connect_args: dict[str, Any] = {}
engine_kwargs: dict[str, Any] = {}
# SQLite-specific configuration
if database_url.startswith("sqlite"):
connect_args["check_same_thread"] = False
# Scope Postgres connections to the configured schema via search_path. This keeps
# the models schema-agnostic (no hardcoded schema=) so the same code serves SQLite,
# single-instance Postgres, and multiple schema-isolated instances on one cluster.
if schema and database_url.startswith(("postgresql", "postgres")):
connect_args["options"] = f"-csearch_path={schema}"
# Size the pool above the default Starlette threadpool (~40 threads) so
# concurrent request handlers don't block waiting for a connection. Applies
# to file-based SQLite and networked backends (e.g. a future Postgres).
@@ -126,15 +135,20 @@ class DatabaseManager:
to avoid leaking connections when only sync operations are needed.
"""
def __init__(self, database_url: str, echo: bool = False):
def __init__(
self, database_url: str, echo: bool = False, schema: str | None = None
):
"""Initialize the database manager.
Args:
database_url: SQLAlchemy database URL
echo: Enable SQL query logging
schema: Postgres schema to scope connections to (search_path); ignored for
SQLite
"""
self.database_url = database_url
self._echo = echo
self._schema = schema
# Ensure parent directory exists for SQLite databases
if database_url.startswith("sqlite:///"):
@@ -144,7 +158,7 @@ class DatabaseManager:
db_path = Path(database_url.replace("sqlite:///", ""))
db_path.parent.mkdir(parents=True, exist_ok=True)
self.engine = create_database_engine(database_url, echo=echo)
self.engine = create_database_engine(database_url, echo=echo, schema=schema)
self.session_factory = create_session_factory(self.engine)
# Lazy-initialized async engine (created on first async_session call)
@@ -159,7 +173,14 @@ class DatabaseManager:
from sqlalchemy.ext.asyncio import async_sessionmaker
async_url = _to_async_url(self.database_url)
self._async_engine = create_async_engine(async_url, echo=self._echo)
async_connect_args: dict[str, Any] = {}
# asyncpg sets search_path via server_settings (not the libpq -c options
# string the sync psycopg2 engine uses).
if self._schema and self.database_url.startswith(("postgresql", "postgres")):
async_connect_args["server_settings"] = {"search_path": self._schema}
self._async_engine = create_async_engine(
async_url, echo=self._echo, connect_args=async_connect_args
)
# Apply the same SQLite pragmas as the sync engine (see
# create_database_engine) for the async engine's connections.
+70
View File
@@ -1,5 +1,7 @@
"""Tests for configuration settings."""
import pytest
from meshcore_hub.common.config import (
CommonSettings,
CollectorSettings,
@@ -246,3 +248,71 @@ class TestWebSettings:
assert settings.feature_radio_config is False
assert settings.features["radio_config"] is False
class TestDatabaseBackendResolution:
"""Tests for DATABASE_BACKEND selection and URL/schema resolution."""
def test_default_backend_is_sqlite_unchanged(self) -> None:
"""No DB env vars -> the same SQLite path and no schema as before."""
settings = CollectorSettings(_env_file=None, data_home="/data")
assert settings.database_backend.value == "sqlite"
assert (
settings.effective_database_url == "sqlite:////data/collector/meshcore.db"
)
assert settings.effective_database_schema is None
def test_postgres_backend_assembles_url_and_schema(self) -> None:
"""Postgres backend assembles a URL from components and exposes the schema."""
settings = APISettings(
_env_file=None,
database_backend="postgres",
database_host="pg",
database_password="pw",
)
assert settings.effective_database_url == (
"postgresql+psycopg2://meshcorehub:pw@pg:5432/meshcorehub"
)
assert settings.effective_database_schema == "meshcorehub"
def test_postgres_password_is_url_encoded(self) -> None:
"""Special characters in the password are percent-encoded."""
settings = APISettings(
_env_file=None,
database_backend="postgres",
database_host="pg",
database_password="s3cr3t/p@ss",
)
assert "s3cr3t%2Fp%40ss" in settings.effective_database_url
def test_postgres_schema_override_per_instance(self) -> None:
"""DATABASE_SCHEMA isolates instances sharing one database."""
settings = APISettings(
_env_file=None,
database_backend="postgres",
database_host="pg",
database_password="pw",
database_schema="stg",
)
assert settings.effective_database_schema == "stg"
def test_postgres_missing_required_vars_fails_fast(self) -> None:
"""Misconfigured postgres backend raises rather than silently using SQLite."""
settings = APISettings(_env_file=None, database_backend="postgres")
with pytest.raises(ValueError, match="DATABASE_BACKEND=postgres requires"):
_ = settings.effective_database_url
def test_explicit_url_overrides_backend(self) -> None:
"""An explicit DATABASE_URL wins even when a backend is selected."""
settings = CollectorSettings(
_env_file=None,
database_backend="postgres",
database_url="postgresql+psycopg2://u:p@h/db",
)
assert settings.effective_database_url == "postgresql+psycopg2://u:p@h/db"