Phase 2: DATABASE_BACKEND switch, component config, schema scoping

- config: add DatabaseBackend enum + DATABASE_* component vars on CommonSettings (host/port/name/schema/user/password, defaults 'meshcorehub'); single effective_database_url resolver with precedence DATABASE_URL > postgres (assembled, fail-fast on missing vars) > SQLite default. effective_database_schema returns the schema only for Postgres. Collapses the duplicated resolver/field out of Collector/API settings. - database: create_database_engine/DatabaseManager accept a schema arg and scope Postgres connections via search_path (psycopg2 -c options; asyncpg server_settings). No-op for SQLite. - alembic/env: migrate into the instance schema (version_table_schema + CREATE SCHEMA + search_path) for Postgres; fix online render_as_batch to be SQLite-only (matching the offline path). - .env.example: document the DATABASE_* block. SQLite behaviour unchanged: default no-env path resolves to the same URL (new regression tests), full suite green (1051 passed), fresh db upgrade OK. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-07-06 01:41:27 +02:00 · 2026-06-13 22:01:00 +01:00
parent 9eab07d244
commit f342f5bc70
5 changed files with 240 additions and 52 deletions
@@ -66,6 +66,29 @@ LOG_LEVEL=INFO
 #       └── meshcore.db    # SQLite database
 DATA_HOME=./data

+# -----------------------------------------------------------------------------
+# Database
+# -----------------------------------------------------------------------------
+# SQLite is the zero-config default and needs nothing here — it lives at
+# ${DATA_HOME}/collector/meshcore.db.
+#
+# To use PostgreSQL instead, set DATABASE_BACKEND=postgres and fill in the
+# DATABASE_* values below (the bundled postgres container derives its
+# POSTGRES_USER/PASSWORD/DB from DATABASE_USER/PASSWORD/NAME). You must also
+# activate the compose 'postgres' profile, e.g. `docker compose --profile postgres up`.
+#
+# DATABASE_BACKEND=postgres
+# DATABASE_HOST=postgres
+# DATABASE_PORT=5432
+# DATABASE_NAME=meshcorehub
+# DATABASE_SCHEMA=meshcorehub   # override per instance (e.g. prod, stg) on a shared cluster
+# DATABASE_USER=meshcorehub
+# DATABASE_PASSWORD=            # required for postgres; e.g. `openssl rand -base64 32`
+#
+# Advanced: set DATABASE_URL to a full SQLAlchemy URL to override all of the above
+# (e.g. a managed/external Postgres). Takes precedence over DATABASE_BACKEND.
+# DATABASE_URL=postgresql+psycopg2://user:pass@host:5432/dbname
+
 # Directory containing seed data files for import
 # Default: ./seed (relative to docker-compose.yml location)
 # Inside containers this is mapped to /seed
@@ -4,7 +4,7 @@ import os
 from logging.config import fileConfig

 from alembic import context
-from sqlalchemy import engine_from_config, pool
+from sqlalchemy import engine_from_config, pool, text

 from meshcore_hub.common.models import Base

@@ -41,6 +41,18 @@ def get_database_url() -> str:
    return config.get_main_option("sqlalchemy.url", "sqlite:///./meshcore.db")


+def get_schema(url: str) -> str | None:
+    """Postgres schema to migrate into, or None for SQLite.
+
+    Each Hub instance keeps its tables and alembic_version in its own schema so
+    multiple instances (prod, stg, ...) can share one Postgres database with
+    independent migration state.
+    """
+    if url.startswith(("postgresql", "postgres")):
+        return os.environ.get("DATABASE_SCHEMA", "meshcorehub")
+    return None
+
+
 def run_migrations_offline() -> None:
    """Run migrations in 'offline' mode.

@@ -53,6 +65,7 @@ def run_migrations_offline() -> None:
    script output.
    """
    url = get_database_url()
+    schema = get_schema(url)
    context.configure(
        url=url,
        target_metadata=target_metadata,
@@ -61,6 +74,8 @@ def run_migrations_offline() -> None:
        # Batch mode is a SQLite-only workaround for its limited ALTER TABLE;
        # Postgres performs ALTERs directly.
        render_as_batch=url.startswith("sqlite"),
+        version_table_schema=schema,
+        include_schemas=schema is not None,
    )

    with context.begin_transaction():
@@ -74,7 +89,9 @@ def run_migrations_online() -> None:
    and associate a connection with the context.
    """
    configuration = config.get_section(config.config_ini_section, {})
-    configuration["sqlalchemy.url"] = get_database_url()
+    url = get_database_url()
+    configuration["sqlalchemy.url"] = url
+    schema = get_schema(url)

    connectable = engine_from_config(
        configuration,
@@ -83,10 +100,20 @@ def run_migrations_online() -> None:
    )

    with connectable.connect() as connection:
+        # Ensure the instance's schema exists and scope this connection to it so
+        # tables (and alembic_version) are created there. No-op for SQLite.
+        if schema is not None:
+            connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema}"'))
+            connection.execute(text(f'SET search_path TO "{schema}"'))
+            connection.commit()
+
        context.configure(
            connection=connection,
            target_metadata=target_metadata,
-            render_as_batch=True,  # SQLite batch mode for ALTER TABLE
+            # Batch mode is a SQLite-only workaround for its limited ALTER TABLE.
+            render_as_batch=url.startswith("sqlite"),
+            version_table_schema=schema,
+            include_schemas=schema is not None,
        )

        with context.begin_transaction():
@@ -3,7 +3,7 @@
 from enum import Enum
 from typing import Optional

-from pydantic import Field, field_validator
+from pydantic import Field
 from pydantic_settings import BaseSettings, SettingsConfigDict


@@ -24,6 +24,13 @@ class MQTTTransport(str, Enum):
    WEBSOCKETS = "websockets"


+class DatabaseBackend(str, Enum):
+    """Database backend selector."""
+
+    SQLITE = "sqlite"
+    POSTGRES = "postgres"
+
+
 class CommonSettings(BaseSettings):
    """Common settings shared by all components."""

@@ -39,6 +46,88 @@ class CommonSettings(BaseSettings):
        description="Base directory for service data (e.g., ./data or /data)",
    )

+    # Database backend selection and connection components.
+    # SQLite is the zero-config default; set DATABASE_BACKEND=postgres (plus the
+    # DATABASE_* component vars) to use Postgres. An explicit DATABASE_URL overrides
+    # everything (managed/external Postgres, tests).
+    database_backend: DatabaseBackend = Field(
+        default=DatabaseBackend.SQLITE,
+        description="Database backend: 'sqlite' (default) or 'postgres'",
+    )
+    database_url: Optional[str] = Field(
+        default=None,
+        description=(
+            "Explicit SQLAlchemy database URL; overrides DATABASE_BACKEND/component vars. "
+            "Default: sqlite:///{data_home}/collector/meshcore.db"
+        ),
+    )
+    database_host: Optional[str] = Field(
+        default=None,
+        description="Postgres host (required when DATABASE_BACKEND=postgres)",
+    )
+    database_port: int = Field(default=5432, description="Postgres port")
+    database_name: str = Field(
+        default="meshcorehub", description="Postgres database name"
+    )
+    database_schema: str = Field(
+        default="meshcorehub",
+        description="Postgres schema (namespace); override per instance on a shared cluster",
+    )
+    database_user: str = Field(default="meshcorehub", description="Postgres role/user")
+    database_password: Optional[str] = Field(
+        default=None,
+        description="Postgres password (required when DATABASE_BACKEND=postgres)",
+    )
+
+    @property
+    def effective_database_url(self) -> str:
+        """Resolve the SQLAlchemy database URL.
+
+        Precedence: explicit DATABASE_URL > postgres (assembled from components) >
+        SQLite default under DATA_HOME. Fails fast for a misconfigured postgres backend
+        rather than silently falling back to SQLite.
+        """
+        if self.database_url:
+            return self.database_url
+        if self.database_backend == DatabaseBackend.POSTGRES:
+            missing = [
+                name
+                for name, value in (
+                    ("DATABASE_HOST", self.database_host),
+                    ("DATABASE_NAME", self.database_name),
+                    ("DATABASE_USER", self.database_user),
+                    ("DATABASE_PASSWORD", self.database_password),
+                )
+                if not value
+            ]
+            if missing:
+                raise ValueError(
+                    "DATABASE_BACKEND=postgres requires: " + ", ".join(missing)
+                )
+            from urllib.parse import quote_plus
+
+            user = quote_plus(self.database_user)
+            password = quote_plus(self.database_password or "")
+            return (
+                f"postgresql+psycopg2://{user}:{password}"
+                f"@{self.database_host}:{self.database_port}/{self.database_name}"
+            )
+        from pathlib import Path
+
+        db_path = Path(self.data_home) / "collector" / "meshcore.db"
+        return f"sqlite:///{db_path}"
+
+    @property
+    def effective_database_schema(self) -> Optional[str]:
+        """Postgres schema to scope connections to, or None for SQLite.
+
+        Returns the schema only when the effective URL is Postgres; SQLite has no
+        schema concept, so callers leave search_path untouched.
+        """
+        if self.effective_database_url.startswith(("postgresql", "postgres")):
+            return self.database_schema
+        return None
+
    # Logging
    log_level: LogLevel = Field(default=LogLevel.INFO, description="Logging level")

@@ -68,11 +157,7 @@ class CommonSettings(BaseSettings):
 class CollectorSettings(CommonSettings):
    """Settings for the Collector component."""

-    # Database - default uses data_home/collector/meshcore.db
-    database_url: Optional[str] = Field(
-        default=None,
-        description="SQLAlchemy database URL (default: sqlite:///{data_home}/collector/meshcore.db)",
-    )
+    # Database config (backend selector + connection) is inherited from CommonSettings.

    # Seed home directory - contains initial data files (node_tags.yaml)
    seed_home: str = Field(
@@ -171,16 +256,6 @@ class CollectorSettings(CommonSettings):

        return str(Path(self.data_home) / "collector")

-    @property
-    def effective_database_url(self) -> str:
-        """Get the effective database URL, using default if not set."""
-        if self.database_url:
-            return self.database_url
-        from pathlib import Path
-
-        db_path = Path(self.data_home) / "collector" / "meshcore.db"
-        return f"sqlite:///{db_path}"
-
    @property
    def effective_seed_home(self) -> str:
        """Get the effective seed home directory."""
@@ -202,13 +277,6 @@ class CollectorSettings(CommonSettings):

        return str(Path(self.effective_seed_home) / "channels.yaml")

-    @field_validator("database_url")
-    @classmethod
-    def validate_database_url(cls, v: Optional[str]) -> Optional[str]:
-        """Validate database URL format."""
-        # None is allowed - will use default
-        return v
-

 class APISettings(CommonSettings):
    """Settings for the API component."""
@@ -217,11 +285,7 @@ class APISettings(CommonSettings):
    api_host: str = Field(default="0.0.0.0", description="API server host")
    api_port: int = Field(default=8000, description="API server port")

-    # Database - default uses data_home/collector/meshcore.db (same as collector)
-    database_url: Optional[str] = Field(
-        default=None,
-        description="SQLAlchemy database URL (default: sqlite:///{data_home}/collector/meshcore.db)",
-    )
+    # Database config (backend selector + connection) is inherited from CommonSettings.

    # Authentication
    api_read_key: Optional[str] = Field(default=None, description="Read-only API key")
@@ -252,23 +316,6 @@ class APISettings(CommonSettings):
        description="Cache TTL for dashboard endpoints (seconds)",
    )

-    @property
-    def effective_database_url(self) -> str:
-        """Get the effective database URL, using default if not set."""
-        if self.database_url:
-            return self.database_url
-        from pathlib import Path
-
-        db_path = Path(self.data_home) / "collector" / "meshcore.db"
-        return f"sqlite:///{db_path}"
-
-    @field_validator("database_url")
-    @classmethod
-    def validate_database_url(cls, v: Optional[str]) -> Optional[str]:
-        """Validate database URL format."""
-        # None is allowed - will use default
-        return v
-

 class WebSettings(CommonSettings):
    """Settings for the Web Dashboard component."""
@@ -30,23 +30,32 @@ def _to_async_url(database_url: str) -> str:
 def create_database_engine(
    database_url: str,
    echo: bool = False,
+    schema: str | None = None,
 ) -> Engine:
    """Create a SQLAlchemy database engine.

    Args:
        database_url: SQLAlchemy database URL
        echo: Enable SQL query logging
+        schema: Postgres schema to scope connections to via search_path. Ignored for
+            SQLite (which has no schema concept).

    Returns:
        SQLAlchemy Engine instance
    """
-    connect_args = {}
+    connect_args: dict[str, Any] = {}
    engine_kwargs: dict[str, Any] = {}

    # SQLite-specific configuration
    if database_url.startswith("sqlite"):
        connect_args["check_same_thread"] = False

+    # Scope Postgres connections to the configured schema via search_path. This keeps
+    # the models schema-agnostic (no hardcoded schema=) so the same code serves SQLite,
+    # single-instance Postgres, and multiple schema-isolated instances on one cluster.
+    if schema and database_url.startswith(("postgresql", "postgres")):
+        connect_args["options"] = f"-csearch_path={schema}"
+
    # Size the pool above the default Starlette threadpool (~40 threads) so
    # concurrent request handlers don't block waiting for a connection. Applies
    # to file-based SQLite and networked backends (e.g. a future Postgres).
@@ -126,15 +135,20 @@ class DatabaseManager:
    to avoid leaking connections when only sync operations are needed.
    """

-    def __init__(self, database_url: str, echo: bool = False):
+    def __init__(
+        self, database_url: str, echo: bool = False, schema: str | None = None
+    ):
        """Initialize the database manager.

        Args:
            database_url: SQLAlchemy database URL
            echo: Enable SQL query logging
+            schema: Postgres schema to scope connections to (search_path); ignored for
+                SQLite
        """
        self.database_url = database_url
        self._echo = echo
+        self._schema = schema

        # Ensure parent directory exists for SQLite databases
        if database_url.startswith("sqlite:///"):
@@ -144,7 +158,7 @@ class DatabaseManager:
            db_path = Path(database_url.replace("sqlite:///", ""))
            db_path.parent.mkdir(parents=True, exist_ok=True)

-        self.engine = create_database_engine(database_url, echo=echo)
+        self.engine = create_database_engine(database_url, echo=echo, schema=schema)
        self.session_factory = create_session_factory(self.engine)

        # Lazy-initialized async engine (created on first async_session call)
@@ -159,7 +173,14 @@ class DatabaseManager:
        from sqlalchemy.ext.asyncio import async_sessionmaker

        async_url = _to_async_url(self.database_url)
-        self._async_engine = create_async_engine(async_url, echo=self._echo)
+        async_connect_args: dict[str, Any] = {}
+        # asyncpg sets search_path via server_settings (not the libpq -c options
+        # string the sync psycopg2 engine uses).
+        if self._schema and self.database_url.startswith(("postgresql", "postgres")):
+            async_connect_args["server_settings"] = {"search_path": self._schema}
+        self._async_engine = create_async_engine(
+            async_url, echo=self._echo, connect_args=async_connect_args
+        )

        # Apply the same SQLite pragmas as the sync engine (see
        # create_database_engine) for the async engine's connections.
@@ -1,5 +1,7 @@
 """Tests for configuration settings."""

+import pytest
+
 from meshcore_hub.common.config import (
    CommonSettings,
    CollectorSettings,
@@ -246,3 +248,71 @@ class TestWebSettings:

        assert settings.feature_radio_config is False
        assert settings.features["radio_config"] is False
+
+
+class TestDatabaseBackendResolution:
+    """Tests for DATABASE_BACKEND selection and URL/schema resolution."""
+
+    def test_default_backend_is_sqlite_unchanged(self) -> None:
+        """No DB env vars -> the same SQLite path and no schema as before."""
+        settings = CollectorSettings(_env_file=None, data_home="/data")
+
+        assert settings.database_backend.value == "sqlite"
+        assert (
+            settings.effective_database_url == "sqlite:////data/collector/meshcore.db"
+        )
+        assert settings.effective_database_schema is None
+
+    def test_postgres_backend_assembles_url_and_schema(self) -> None:
+        """Postgres backend assembles a URL from components and exposes the schema."""
+        settings = APISettings(
+            _env_file=None,
+            database_backend="postgres",
+            database_host="pg",
+            database_password="pw",
+        )
+
+        assert settings.effective_database_url == (
+            "postgresql+psycopg2://meshcorehub:pw@pg:5432/meshcorehub"
+        )
+        assert settings.effective_database_schema == "meshcorehub"
+
+    def test_postgres_password_is_url_encoded(self) -> None:
+        """Special characters in the password are percent-encoded."""
+        settings = APISettings(
+            _env_file=None,
+            database_backend="postgres",
+            database_host="pg",
+            database_password="s3cr3t/p@ss",
+        )
+
+        assert "s3cr3t%2Fp%40ss" in settings.effective_database_url
+
+    def test_postgres_schema_override_per_instance(self) -> None:
+        """DATABASE_SCHEMA isolates instances sharing one database."""
+        settings = APISettings(
+            _env_file=None,
+            database_backend="postgres",
+            database_host="pg",
+            database_password="pw",
+            database_schema="stg",
+        )
+
+        assert settings.effective_database_schema == "stg"
+
+    def test_postgres_missing_required_vars_fails_fast(self) -> None:
+        """Misconfigured postgres backend raises rather than silently using SQLite."""
+        settings = APISettings(_env_file=None, database_backend="postgres")
+
+        with pytest.raises(ValueError, match="DATABASE_BACKEND=postgres requires"):
+            _ = settings.effective_database_url
+
+    def test_explicit_url_overrides_backend(self) -> None:
+        """An explicit DATABASE_URL wins even when a backend is selected."""
+        settings = CollectorSettings(
+            _env_file=None,
+            database_backend="postgres",
+            database_url="postgresql+psycopg2://u:p@h/db",
+        )
+
+        assert settings.effective_database_url == "postgresql+psycopg2://u:p@h/db"