Source code for sqldbagent.core.config

"""Pydantic settings and datasource configuration."""

from __future__ import annotations

from functools import lru_cache
from typing import Literal

from pydantic import (
    AliasChoices,
    BaseModel,
    ConfigDict,
    Field,
    field_validator,
    model_validator,
)
from pydantic_settings import BaseSettings, SettingsConfigDict
from sqlalchemy.engine import URL

from sqldbagent.core.enums import Dialect
from sqldbagent.core.errors import ConfigurationError



[docs]
class PoolSettings(BaseModel):
    """Database pool settings.

    Attributes:
        size: Base pool size for sync engine usage.
        max_overflow: Additional overflow connections allowed above `size`.
        timeout_seconds: Time to wait for a pooled connection.
    """

    model_config = ConfigDict(extra="forbid")

    size: int = Field(default=5, ge=1)
    max_overflow: int = Field(default=5, ge=0)
    timeout_seconds: float = Field(default=30.0, gt=0)




[docs]
class SafetySettings(BaseModel):
    """Default SQL safety policy settings.

    Attributes:
        read_only: Whether execution paths should default to read-only behavior.
        statement_timeout_seconds: Default execution timeout.
        max_rows: Default maximum row count for user-facing query surfaces.
        allowed_schemas: Optional schema allowlist enforced by higher layers.
    """

    model_config = ConfigDict(extra="forbid")

    read_only: bool = True
    statement_timeout_seconds: float = Field(default=30.0, gt=0)
    max_rows: int = Field(default=500, ge=1)
    allowed_schemas: list[str] = Field(default_factory=list)




[docs]
class ProfilingSettings(BaseModel):
    """Default profiling settings.

    Attributes:
        default_sample_size: Default number of sample rows.
        max_sample_size: Maximum sample rows allowed by default tooling.
        exact_unique_counts: Whether generic profilers should compute exact unique counts.
    """

    model_config = ConfigDict(extra="forbid")

    default_sample_size: int = Field(default=5, ge=1)
    max_sample_size: int = Field(default=50, ge=1)
    exact_unique_counts: bool = True




[docs]
class ArtifactSettings(BaseModel):
    """Artifact persistence settings.

    Attributes:
        root_dir: Base directory for generated artifacts.
        snapshots_dir: Snapshot subdirectory under `root_dir`.
        documents_dir: Document-export subdirectory under `root_dir`.
        diagrams_dir: Diagram-export subdirectory under `root_dir`.
        prompts_dir: Prompt-export subdirectory under `root_dir`.
        prompt_enhancements_dir: Prompt-enhancement subdirectory under `root_dir`.
        embeddings_cache_dir: Embedding cache subdirectory under `root_dir`.
        vectorstores_dir: Retrieval/vectorstore manifest subdirectory under `root_dir`.
    """

    model_config = ConfigDict(extra="forbid")

    root_dir: str = "var/sqldbagent"
    snapshots_dir: str = "snapshots"
    documents_dir: str = "documents"
    diagrams_dir: str = "diagrams"
    prompts_dir: str = "prompts"
    prompt_enhancements_dir: str = "prompt-enhancements"
    embeddings_cache_dir: str = "embeddings-cache"
    vectorstores_dir: str = "vectorstores"




[docs]
class EmbeddingSettings(BaseModel):
    """Embedding-provider settings.

    Attributes:
        provider: Embedding backend identifier.
        model: Provider-specific embedding model name.
        dimensions: Optional embedding dimensionality override.
        batch_size: Batch size used by embedding providers when supported.
        cache_query_embeddings: Whether query embeddings should also be cached.
    """

    model_config = ConfigDict(extra="ignore", populate_by_name=True)

    provider: Literal["openai", "hash"] = "openai"
    model: str = "text-embedding-3-large"
    dimensions: int | None = Field(default=None, ge=8)
    batch_size: int = Field(default=128, ge=1)
    cache_query_embeddings: bool = True




[docs]
class RetrievalSettings(BaseModel):
    """Retrieval and vectorstore settings.

    Attributes:
        backend: Retrieval backend identifier.
        qdrant_url: Base Qdrant HTTP URL.
        qdrant_api_key: Optional Qdrant API key.
        qdrant_grpc_port: Qdrant gRPC port when gRPC transport is enabled.
        qdrant_prefer_grpc: Whether to prefer gRPC transport.
        collection_prefix: Prefix used when generating Qdrant collection names.
        default_top_k: Default number of documents returned from retrieval.
        default_fetch_k: Fetch pool size used by MMR retrieval.
        use_mmr: Whether retrievers should prefer MMR over plain similarity.
        score_threshold: Optional score threshold applied to retrieval.
        create_payload_indexes: Whether payload indexes should be created in Qdrant.
    """

    model_config = ConfigDict(extra="ignore", populate_by_name=True)

    backend: Literal["qdrant"] = "qdrant"
    qdrant_url: str = "http://127.0.0.1:6333"
    qdrant_api_key: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_QDRANT_API_KEY", "QDRANT_API_KEY"),
    )
    qdrant_grpc_port: int = Field(default=6334, ge=1)
    qdrant_prefer_grpc: bool = False
    collection_prefix: str = "sqldbagent"
    default_top_k: int = Field(default=6, ge=1)
    default_fetch_k: int = Field(default=24, ge=1)
    use_mmr: bool = True
    score_threshold: float | None = None
    create_payload_indexes: bool = True




[docs]
class LLMSettings(BaseModel):
    """Optional model-provider settings.

    Attributes:
        default_provider: Default provider identifier for future LLM features.
        default_model: Default model name for future LLM features.
        reasoning_effort: Default reasoning effort for supported reasoning models.
        openai_api_key: Optional OpenAI API key loaded from the environment.
        openai_base_url: Optional OpenAI-compatible base URL.
        anthropic_api_key: Optional Anthropic API key loaded from the environment.
        anthropic_base_url: Optional Anthropic base URL override.
    """

    model_config = ConfigDict(extra="ignore", populate_by_name=True)

    default_provider: str | None = "openai"
    default_model: str | None = "gpt-5.2"
    reasoning_effort: str | None = "xhigh"
    openai_api_key: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_OPENAI_API_KEY", "OPENAI_API_KEY"),
    )
    openai_base_url: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_OPENAI_BASE_URL", "OPENAI_BASE_URL"),
    )
    anthropic_api_key: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_ANTHROPIC_API_KEY", "ANTHROPIC_API_KEY"
        ),
    )
    anthropic_base_url: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_ANTHROPIC_BASE_URL", "ANTHROPIC_BASE_URL"
        ),
    )




[docs]
class LangSmithSettings(BaseModel):
    """LangSmith tracing and project settings.

    Attributes:
        tracing: Whether LangSmith tracing is enabled for supported surfaces.
        project: LangSmith project name used for traces.
        api_key: Optional LangSmith API key loaded from the environment.
        endpoint: Optional LangSmith API endpoint override.
        workspace_id: Optional LangSmith workspace identifier.
        tags: Default LangSmith trace tags applied across surfaces.
    """

    model_config = ConfigDict(extra="ignore", populate_by_name=True)

    tracing: bool = False
    project: str = "sqldbagent"
    api_key: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_LANGSMITH_API_KEY",
            "LANGSMITH_API_KEY",
        ),
    )
    endpoint: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_LANGSMITH_ENDPOINT",
            "LANGSMITH_ENDPOINT",
        ),
    )
    workspace_id: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_LANGSMITH_WORKSPACE_ID",
            "LANGSMITH_WORKSPACE_ID",
        ),
    )
    tags: list[str] = Field(default_factory=lambda: ["sqldbagent"])


[docs]
    @field_validator("tags", mode="before")
    @classmethod
    def validate_tags(cls, value: object) -> object:
        """Normalize tag values from env strings or iterables.

        Args:
            value: Raw LangSmith tags value.

        Returns:
            object: Normalized list value for Pydantic parsing.
        """

        if value is None:
            return ["sqldbagent"]
        if isinstance(value, str):
            return [item.strip() for item in value.split(",") if item.strip()]
        return value





[docs]
class AgentCheckpointSettings(BaseModel):
    """Agent checkpoint persistence settings.

    Attributes:
        backend: Checkpoint backend to use for agent persistence.
        postgres_url: Optional Postgres connection string for LangGraph checkpointing.
        auto_setup: Whether Postgres checkpointer tables should be initialized automatically.
        pipeline: Whether the Postgres saver should use pipelining when supported.
    """

    model_config = ConfigDict(extra="ignore", populate_by_name=True)

    backend: Literal["memory", "postgres"] = "memory"
    postgres_url: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_AGENT_CHECKPOINT_POSTGRES_URL",
            "AGENT_CHECKPOINT_POSTGRES_URL",
            "LANGGRAPH_CHECKPOINT_POSTGRES_URL",
        ),
    )
    auto_setup: bool = True
    pipeline: bool = False




[docs]
class AgentSettings(BaseModel):
    """Agent orchestration settings.

    Attributes:
        name: Stable agent name for LangChain/LangGraph surfaces.
        include_latest_snapshot_context: Whether agents should inject latest snapshot summaries.
        max_model_calls_per_run: Optional cap for model calls in a single run.
        max_tool_calls_per_run: Optional cap for tool calls in a single run.
        enable_todo_middleware: Whether the LangChain todo middleware should be enabled.
        enable_human_in_the_loop: Whether `safe_query_sql` should require approval middleware.
        enable_summarization_middleware: Whether context summarization middleware is enabled.
        summarization_trigger_fraction: Fractional context threshold for summarization.
        summarization_keep_messages: Number of recent messages to preserve after summarization.
        summarization_model: Optional dedicated model identifier for summarization.
        tool_call_digest_limit: Maximum number of compressed tool-call summaries to retain.
        checkpoint: Agent checkpoint persistence settings.
        enable_prompt_enhancements: Whether dynamic prompts should merge saved
            prompt-enhancement artifacts.
    """

    model_config = ConfigDict(extra="ignore", populate_by_name=True)

    name: str = "sqldbagent"
    include_latest_snapshot_context: bool = True
    max_model_calls_per_run: int | None = Field(default=8, ge=1)
    max_tool_calls_per_run: int | None = Field(default=24, ge=1)
    enable_todo_middleware: bool = True
    enable_human_in_the_loop: bool = False
    enable_summarization_middleware: bool = False
    summarization_trigger_fraction: float = Field(default=0.9, gt=0, le=1)
    summarization_keep_messages: int = Field(default=20, ge=1)
    summarization_model: str | None = None
    tool_call_digest_limit: int = Field(default=10, ge=1)
    checkpoint: AgentCheckpointSettings = Field(default_factory=AgentCheckpointSettings)
    enable_prompt_enhancements: bool = True




[docs]
class MCPSettings(BaseModel):
    """FastMCP server settings.

    Attributes:
        transport: Default MCP transport to serve.
        host: Default host for HTTP-based transports.
        port: Default port for HTTP-based transports.
        path: Default HTTP path for streamable transports.
        log_level: Default FastMCP/Uvicorn log level for HTTP transports.
        show_banner: Whether the FastMCP banner should be shown on startup.
        stateless_http: Whether streamable HTTP should run in stateless mode.
    """

    model_config = ConfigDict(extra="ignore", populate_by_name=True)

    transport: Literal["stdio", "http", "sse", "streamable-http"] = "stdio"
    host: str = "127.0.0.1"
    port: int = Field(default=8000, ge=1)
    path: str = "/mcp"
    log_level: str = "info"
    show_banner: bool = True
    stateless_http: bool = False




[docs]
class DatasourceSettings(BaseModel):
    """Single datasource definition.

    Attributes:
        name: Stable datasource identifier used by services and adapters.
        dialect: Database dialect used by the datasource.
        url: SQLAlchemy-compatible connection URL.
        echo: Whether SQLAlchemy should emit SQL logs.
        pool: Pool configuration for this datasource.
        safety: Default safety policy for this datasource.
    """

    model_config = ConfigDict(extra="forbid")

    name: str
    dialect: Dialect
    url: str
    echo: bool = False
    pool: PoolSettings = Field(default_factory=PoolSettings)
    safety: SafetySettings = Field(default_factory=SafetySettings)


[docs]
    @field_validator("name")
    @classmethod
    def validate_name(cls, value: str) -> str:
        """Validate and normalize datasource names.

        Args:
            value: Raw datasource name.

        Returns:
            str: Normalized datasource name.
        """

        normalized = value.strip()
        if not normalized:
            raise ValueError("datasource name must not be empty")
        return normalized





[docs]
class AppSettings(BaseSettings):
    """Top-level application settings.

    Attributes:
        env: Deployment environment label.
        log_level: Default application log level.
        datasources: Configured datasource definitions.
        datasource_aliases: Optional alias map from short names to datasource names.
        profiling: Default profiling settings.
        artifacts: Artifact persistence settings.
        llm: Optional model-provider settings.
        langsmith: LangSmith tracing settings.
        embeddings: Embedding-provider settings.
        retrieval: Retrieval/vectorstore settings.
        agent: Agent orchestration settings.
        mcp: FastMCP server settings.
    """

    model_config = SettingsConfigDict(
        env_prefix="SQLDBAGENT_",
        env_file=".env",
        env_nested_delimiter="__",
        extra="ignore",
        populate_by_name=True,
    )

    env: str = "dev"
    log_level: str = "INFO"
    datasources: list[DatasourceSettings] = Field(default_factory=list)
    datasource_aliases: dict[str, str] = Field(default_factory=dict)
    profiling: ProfilingSettings = Field(default_factory=ProfilingSettings)
    artifacts: ArtifactSettings = Field(default_factory=ArtifactSettings)
    llm: LLMSettings = Field(default_factory=LLMSettings)
    langsmith: LangSmithSettings = Field(default_factory=LangSmithSettings)
    embeddings: EmbeddingSettings = Field(default_factory=EmbeddingSettings)
    retrieval: RetrievalSettings = Field(default_factory=RetrievalSettings)
    agent: AgentSettings = Field(default_factory=AgentSettings)
    mcp: MCPSettings = Field(default_factory=MCPSettings)
    default_datasource_name: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_DEFAULT_DATASOURCE", "DEFAULT_DATASOURCE"
        ),
    )
    default_schema_name: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_DEFAULT_SCHEMA", "DEFAULT_SCHEMA"),
    )
    llm_default_provider: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_LLM_DEFAULT_PROVIDER", "LLM_DEFAULT_PROVIDER"
        ),
        exclude=True,
        repr=False,
    )
    llm_default_model: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_LLM_DEFAULT_MODEL", "LLM_DEFAULT_MODEL"
        ),
        exclude=True,
        repr=False,
    )
    llm_reasoning_effort: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_LLM_REASONING_EFFORT", "LLM_REASONING_EFFORT"
        ),
        exclude=True,
        repr=False,
    )
    openai_api_key: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_OPENAI_API_KEY", "OPENAI_API_KEY"),
        exclude=True,
        repr=False,
    )
    openai_base_url: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_OPENAI_BASE_URL", "OPENAI_BASE_URL"),
        exclude=True,
        repr=False,
    )
    anthropic_api_key: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_ANTHROPIC_API_KEY", "ANTHROPIC_API_KEY"
        ),
        exclude=True,
        repr=False,
    )
    anthropic_base_url: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_ANTHROPIC_BASE_URL", "ANTHROPIC_BASE_URL"
        ),
        exclude=True,
        repr=False,
    )
    langsmith_tracing: bool | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_LANGSMITH_TRACING",
            "LANGSMITH_TRACING",
        ),
        exclude=True,
        repr=False,
    )
    langsmith_project: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_LANGSMITH_PROJECT",
            "LANGSMITH_PROJECT",
        ),
        exclude=True,
        repr=False,
    )
    langsmith_api_key: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_LANGSMITH_API_KEY",
            "LANGSMITH_API_KEY",
        ),
        exclude=True,
        repr=False,
    )
    langsmith_endpoint: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_LANGSMITH_ENDPOINT",
            "LANGSMITH_ENDPOINT",
        ),
        exclude=True,
        repr=False,
    )
    langsmith_workspace_id: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_LANGSMITH_WORKSPACE_ID",
            "LANGSMITH_WORKSPACE_ID",
        ),
        exclude=True,
        repr=False,
    )
    langsmith_tags: list[str] | str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_LANGSMITH_TAGS",
            "LANGSMITH_TAGS",
        ),
        exclude=True,
        repr=False,
    )
    agent_name: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_AGENT_NAME", "AGENT_NAME"),
        exclude=True,
        repr=False,
    )
    agent_include_latest_snapshot_context: bool | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_AGENT_INCLUDE_LATEST_SNAPSHOT_CONTEXT",
            "AGENT_INCLUDE_LATEST_SNAPSHOT_CONTEXT",
        ),
        exclude=True,
        repr=False,
    )
    agent_max_model_calls_per_run: int | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_AGENT_MAX_MODEL_CALLS_PER_RUN",
            "AGENT_MAX_MODEL_CALLS_PER_RUN",
        ),
        exclude=True,
        repr=False,
    )
    agent_max_tool_calls_per_run: int | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_AGENT_MAX_TOOL_CALLS_PER_RUN",
            "AGENT_MAX_TOOL_CALLS_PER_RUN",
        ),
        exclude=True,
        repr=False,
    )
    agent_enable_todo_middleware: bool | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_AGENT_ENABLE_TODO_MIDDLEWARE",
            "AGENT_ENABLE_TODO_MIDDLEWARE",
        ),
        exclude=True,
        repr=False,
    )
    agent_enable_human_in_the_loop: bool | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_AGENT_ENABLE_HUMAN_IN_THE_LOOP",
            "AGENT_ENABLE_HUMAN_IN_THE_LOOP",
        ),
        exclude=True,
        repr=False,
    )
    agent_enable_summarization_middleware: bool | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_AGENT_ENABLE_SUMMARIZATION_MIDDLEWARE",
            "AGENT_ENABLE_SUMMARIZATION_MIDDLEWARE",
        ),
        exclude=True,
        repr=False,
    )
    agent_summarization_trigger_fraction: float | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_AGENT_SUMMARIZATION_TRIGGER_FRACTION",
            "AGENT_SUMMARIZATION_TRIGGER_FRACTION",
        ),
        exclude=True,
        repr=False,
    )
    agent_summarization_keep_messages: int | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_AGENT_SUMMARIZATION_KEEP_MESSAGES",
            "AGENT_SUMMARIZATION_KEEP_MESSAGES",
        ),
        exclude=True,
        repr=False,
    )
    agent_summarization_model: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_AGENT_SUMMARIZATION_MODEL",
            "AGENT_SUMMARIZATION_MODEL",
        ),
        exclude=True,
        repr=False,
    )
    agent_tool_call_digest_limit: int | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_AGENT_TOOL_CALL_DIGEST_LIMIT",
            "AGENT_TOOL_CALL_DIGEST_LIMIT",
        ),
        exclude=True,
        repr=False,
    )
    agent_checkpoint_backend: Literal["memory", "postgres"] | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_AGENT_CHECKPOINT_BACKEND",
            "AGENT_CHECKPOINT_BACKEND",
        ),
        exclude=True,
        repr=False,
    )
    agent_checkpoint_postgres_url: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_AGENT_CHECKPOINT_POSTGRES_URL",
            "AGENT_CHECKPOINT_POSTGRES_URL",
            "LANGGRAPH_CHECKPOINT_POSTGRES_URL",
        ),
        exclude=True,
        repr=False,
    )
    agent_checkpoint_auto_setup: bool | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_AGENT_CHECKPOINT_AUTO_SETUP",
            "AGENT_CHECKPOINT_AUTO_SETUP",
        ),
        exclude=True,
        repr=False,
    )
    agent_checkpoint_pipeline: bool | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_AGENT_CHECKPOINT_PIPELINE",
            "AGENT_CHECKPOINT_PIPELINE",
        ),
        exclude=True,
        repr=False,
    )
    qdrant_url: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_QDRANT_URL", "QDRANT_URL"),
        exclude=True,
        repr=False,
    )
    qdrant_api_key: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_QDRANT_API_KEY", "QDRANT_API_KEY"),
        exclude=True,
        repr=False,
    )
    qdrant_grpc_port: int | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_QDRANT_GRPC_PORT", "QDRANT_GRPC_PORT"
        ),
        exclude=True,
        repr=False,
    )
    qdrant_prefer_grpc: bool | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_QDRANT_PREFER_GRPC", "QDRANT_PREFER_GRPC"
        ),
        exclude=True,
        repr=False,
    )
    embeddings_provider: Literal["openai", "hash"] | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_EMBEDDINGS_PROVIDER", "EMBEDDINGS_PROVIDER"
        ),
        exclude=True,
        repr=False,
    )
    embeddings_model: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_EMBEDDINGS_MODEL", "EMBEDDINGS_MODEL"
        ),
        exclude=True,
        repr=False,
    )
    embeddings_dimensions: int | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_EMBEDDINGS_DIMENSIONS", "EMBEDDINGS_DIMENSIONS"
        ),
        exclude=True,
        repr=False,
    )
    embeddings_batch_size: int | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_EMBEDDINGS_BATCH_SIZE", "EMBEDDINGS_BATCH_SIZE"
        ),
        exclude=True,
        repr=False,
    )
    embeddings_cache_query_embeddings: bool | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_EMBEDDINGS_CACHE_QUERY_EMBEDDINGS",
            "EMBEDDINGS_CACHE_QUERY_EMBEDDINGS",
        ),
        exclude=True,
        repr=False,
    )
    sqlite_path: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_SQLITE_PATH", "SQLITE_PATH"),
    )
    postgres_host: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_POSTGRES_HOST", "POSTGRES_HOST"),
    )
    postgres_port: int = Field(
        default=5432,
        validation_alias=AliasChoices("SQLDBAGENT_POSTGRES_PORT", "POSTGRES_PORT"),
    )
    postgres_db: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_POSTGRES_DB", "POSTGRES_DB"),
    )
    postgres_user: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_POSTGRES_USER", "POSTGRES_USER"),
    )
    postgres_password: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_POSTGRES_PASSWORD", "POSTGRES_PASSWORD"
        ),
    )
    postgres_demo_host: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_POSTGRES_DEMO_HOST", "POSTGRES_DEMO_HOST"
        ),
    )
    postgres_demo_port: int = Field(
        default=5433,
        validation_alias=AliasChoices(
            "SQLDBAGENT_POSTGRES_DEMO_PORT", "POSTGRES_DEMO_PORT"
        ),
    )
    postgres_demo_db: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_POSTGRES_DEMO_DB", "POSTGRES_DEMO_DB"
        ),
    )
    postgres_demo_user: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_POSTGRES_DEMO_USER", "POSTGRES_DEMO_USER"
        ),
    )
    postgres_demo_password: str | None = Field(
        default=None,
        validation_alias=AliasChoices(
            "SQLDBAGENT_POSTGRES_DEMO_PASSWORD", "POSTGRES_DEMO_PASSWORD"
        ),
    )
    mssql_host: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_MSSQL_HOST", "MSSQL_HOST"),
    )
    mssql_port: int = Field(
        default=1433,
        validation_alias=AliasChoices("SQLDBAGENT_MSSQL_PORT", "MSSQL_PORT"),
    )
    mssql_database: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_MSSQL_DATABASE", "MSSQL_DATABASE"),
    )
    mssql_user: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_MSSQL_USER", "MSSQL_USER"),
    )
    mssql_password: str | None = Field(
        default=None,
        validation_alias=AliasChoices("SQLDBAGENT_MSSQL_PASSWORD", "MSSQL_PASSWORD"),
    )
    mssql_driver: str = Field(
        default="ODBC Driver 18 for SQL Server",
        validation_alias=AliasChoices("SQLDBAGENT_MSSQL_DRIVER", "MSSQL_DRIVER"),
    )


[docs]
    @field_validator("datasources")
    @classmethod
    def validate_unique_datasources(
        cls, value: list[DatasourceSettings]
    ) -> list[DatasourceSettings]:
        """Reject duplicate datasource names.

        Args:
            value: Configured datasource definitions.

        Returns:
            list[DatasourceSettings]: Validated datasource definitions.
        """

        seen: set[str] = set()
        duplicates: set[str] = set()

        for datasource in value:
            if datasource.name in seen:
                duplicates.add(datasource.name)
            seen.add(datasource.name)

        if duplicates:
            names = ", ".join(sorted(duplicates))
            raise ValueError(f"duplicate datasource names: {names}")

        return value



[docs]
    @field_validator("datasource_aliases")
    @classmethod
    def validate_datasource_aliases(cls, value: dict[str, str]) -> dict[str, str]:
        """Validate datasource alias keys and targets.

        Args:
            value: Alias mapping loaded from settings.

        Returns:
            dict[str, str]: Normalized alias mapping.
        """

        normalized: dict[str, str] = {}
        for alias, target in value.items():
            normalized_alias = alias.strip()
            normalized_target = target.strip()
            if not normalized_alias:
                raise ValueError("datasource alias names must not be empty")
            if not normalized_target:
                raise ValueError("datasource alias targets must not be empty")
            normalized[normalized_alias] = normalized_target
        return normalized



[docs]
    @model_validator(mode="after")
    def build_default_datasources(self) -> "AppSettings":
        """Build convenience datasource definitions from environment fields.

        Returns:
            AppSettings: Settings with synthesized datasources when needed.
        """

        self.llm = self.llm.model_copy(
            update={
                "default_provider": (
                    self.llm.default_provider
                    if self.llm_default_provider is None
                    else self.llm_default_provider
                ),
                "default_model": (
                    self.llm.default_model
                    if self.llm_default_model is None
                    else self.llm_default_model
                ),
                "reasoning_effort": (
                    self.llm.reasoning_effort
                    if self.llm_reasoning_effort is None
                    else self.llm_reasoning_effort
                ),
                "openai_api_key": self.llm.openai_api_key or self.openai_api_key,
                "openai_base_url": self.llm.openai_base_url or self.openai_base_url,
                "anthropic_api_key": self.llm.anthropic_api_key
                or self.anthropic_api_key,
                "anthropic_base_url": self.llm.anthropic_base_url
                or self.anthropic_base_url,
            }
        )
        self.langsmith = self.langsmith.model_copy(
            update={
                "tracing": (
                    self.langsmith.tracing
                    if self.langsmith_tracing is None
                    else self.langsmith_tracing
                ),
                "project": (
                    self.langsmith.project
                    if self.langsmith_project is None
                    else self.langsmith_project
                ),
                "api_key": self.langsmith.api_key or self.langsmith_api_key,
                "endpoint": self.langsmith.endpoint or self.langsmith_endpoint,
                "workspace_id": (
                    self.langsmith.workspace_id or self.langsmith_workspace_id
                ),
                "tags": (
                    self.langsmith.tags
                    if self.langsmith_tags is None
                    else (
                        [
                            item.strip()
                            for item in self.langsmith_tags.split(",")
                            if item.strip()
                        ]
                        if isinstance(self.langsmith_tags, str)
                        else list(self.langsmith_tags)
                    )
                ),
            }
        )
        self.embeddings = self.embeddings.model_copy(
            update={
                "provider": (
                    self.embeddings.provider
                    if self.embeddings_provider is None
                    else self.embeddings_provider
                ),
                "model": (
                    self.embeddings.model
                    if self.embeddings_model is None
                    else self.embeddings_model
                ),
                "dimensions": (
                    self.embeddings.dimensions
                    if self.embeddings_dimensions is None
                    else self.embeddings_dimensions
                ),
                "batch_size": (
                    self.embeddings.batch_size
                    if self.embeddings_batch_size is None
                    else self.embeddings_batch_size
                ),
                "cache_query_embeddings": (
                    self.embeddings.cache_query_embeddings
                    if self.embeddings_cache_query_embeddings is None
                    else self.embeddings_cache_query_embeddings
                ),
            }
        )
        self.retrieval = self.retrieval.model_copy(
            update={
                "qdrant_url": (
                    self.retrieval.qdrant_url
                    if self.qdrant_url is None
                    else self.qdrant_url
                ),
                "qdrant_grpc_port": (
                    self.retrieval.qdrant_grpc_port
                    if self.qdrant_grpc_port is None
                    else self.qdrant_grpc_port
                ),
                "qdrant_prefer_grpc": (
                    self.retrieval.qdrant_prefer_grpc
                    if self.qdrant_prefer_grpc is None
                    else self.qdrant_prefer_grpc
                ),
                "qdrant_api_key": (
                    self.retrieval.qdrant_api_key or self.qdrant_api_key
                ),
            }
        )
        self.agent = self.agent.model_copy(
            update={
                "name": self.agent.name if self.agent_name is None else self.agent_name,
                "include_latest_snapshot_context": (
                    self.agent.include_latest_snapshot_context
                    if self.agent_include_latest_snapshot_context is None
                    else self.agent_include_latest_snapshot_context
                ),
                "max_model_calls_per_run": (
                    self.agent.max_model_calls_per_run
                    if self.agent_max_model_calls_per_run is None
                    else self.agent_max_model_calls_per_run
                ),
                "max_tool_calls_per_run": (
                    self.agent.max_tool_calls_per_run
                    if self.agent_max_tool_calls_per_run is None
                    else self.agent_max_tool_calls_per_run
                ),
                "enable_todo_middleware": (
                    self.agent.enable_todo_middleware
                    if self.agent_enable_todo_middleware is None
                    else self.agent_enable_todo_middleware
                ),
                "enable_human_in_the_loop": (
                    self.agent.enable_human_in_the_loop
                    if self.agent_enable_human_in_the_loop is None
                    else self.agent_enable_human_in_the_loop
                ),
                "enable_summarization_middleware": (
                    self.agent.enable_summarization_middleware
                    if self.agent_enable_summarization_middleware is None
                    else self.agent_enable_summarization_middleware
                ),
                "summarization_trigger_fraction": (
                    self.agent.summarization_trigger_fraction
                    if self.agent_summarization_trigger_fraction is None
                    else self.agent_summarization_trigger_fraction
                ),
                "summarization_keep_messages": (
                    self.agent.summarization_keep_messages
                    if self.agent_summarization_keep_messages is None
                    else self.agent_summarization_keep_messages
                ),
                "summarization_model": (
                    self.agent.summarization_model or self.agent_summarization_model
                ),
                "tool_call_digest_limit": (
                    self.agent.tool_call_digest_limit
                    if self.agent_tool_call_digest_limit is None
                    else self.agent_tool_call_digest_limit
                ),
                "checkpoint": self.agent.checkpoint.model_copy(
                    update={
                        "backend": (
                            self.agent.checkpoint.backend
                            if self.agent_checkpoint_backend is None
                            else self.agent_checkpoint_backend
                        ),
                        "postgres_url": self.agent.checkpoint.postgres_url
                        or self.agent_checkpoint_postgres_url,
                        "auto_setup": (
                            self.agent.checkpoint.auto_setup
                            if self.agent_checkpoint_auto_setup is None
                            else self.agent_checkpoint_auto_setup
                        ),
                        "pipeline": (
                            self.agent.checkpoint.pipeline
                            if self.agent_checkpoint_pipeline is None
                            else self.agent_checkpoint_pipeline
                        ),
                    }
                ),
            }
        )
        if self.agent.checkpoint.postgres_url is None and all(
            (
                self.postgres_host,
                self.postgres_db,
                self.postgres_user,
                self.postgres_password,
            )
        ):
            self.agent = self.agent.model_copy(
                update={
                    "checkpoint": self.agent.checkpoint.model_copy(
                        update={
                            "postgres_url": URL.create(
                                "postgresql+psycopg",
                                username=self.postgres_user,
                                password=self.postgres_password,
                                host=self.postgres_host,
                                port=self.postgres_port,
                                database=self.postgres_db,
                            ).render_as_string(hide_password=False),
                        }
                    )
                }
            )

        if self.datasources:
            self._validate_datasource_alias_targets()
            return self

        datasources: list[DatasourceSettings] = []

        if self.sqlite_path:
            datasources.append(
                DatasourceSettings(
                    name="sqlite",
                    dialect=Dialect.SQLITE,
                    url=f"sqlite+pysqlite:///{self.sqlite_path}",
                )
            )

        if all(
            (
                self.postgres_host,
                self.postgres_db,
                self.postgres_user,
                self.postgres_password,
            )
        ):
            datasources.append(
                DatasourceSettings(
                    name="postgres",
                    dialect=Dialect.POSTGRES,
                    url=URL.create(
                        "postgresql+psycopg",
                        username=self.postgres_user,
                        password=self.postgres_password,
                        host=self.postgres_host,
                        port=self.postgres_port,
                        database=self.postgres_db,
                    ).render_as_string(hide_password=False),
                )
            )

        if all(
            (
                self.postgres_demo_host,
                self.postgres_demo_db,
                self.postgres_demo_user,
                self.postgres_demo_password,
            )
        ):
            datasources.append(
                DatasourceSettings(
                    name="postgres_demo",
                    dialect=Dialect.POSTGRES,
                    url=URL.create(
                        "postgresql+psycopg",
                        username=self.postgres_demo_user,
                        password=self.postgres_demo_password,
                        host=self.postgres_demo_host,
                        port=self.postgres_demo_port,
                        database=self.postgres_demo_db,
                    ).render_as_string(hide_password=False),
                )
            )

        if all(
            (
                self.mssql_host,
                self.mssql_database,
                self.mssql_user,
                self.mssql_password,
            )
        ):
            datasources.append(
                DatasourceSettings(
                    name="mssql",
                    dialect=Dialect.MSSQL,
                    url=URL.create(
                        "mssql+pyodbc",
                        username=self.mssql_user,
                        password=self.mssql_password,
                        host=self.mssql_host,
                        port=self.mssql_port,
                        database=self.mssql_database,
                        query={
                            "driver": self.mssql_driver,
                            "TrustServerCertificate": "yes",
                        },
                    ).render_as_string(hide_password=False),
                )
            )

        self.datasources = datasources
        self._validate_datasource_alias_targets()
        return self


    def _validate_datasource_alias_targets(self) -> None:
        """Ensure datasource aliases point at configured datasources."""

        known_names = {datasource.name for datasource in self.datasources}
        invalid_targets = {
            alias: target
            for alias, target in self.datasource_aliases.items()
            if target not in known_names
        }
        if invalid_targets:
            rendered = ", ".join(
                f"{alias}->{target}"
                for alias, target in sorted(invalid_targets.items())
            )
            raise ValueError(f"unknown datasource alias targets: {rendered}")


[docs]
    def resolve_datasource_name(self, name: str) -> str:
        """Resolve a datasource name or alias to its canonical datasource name.

        Args:
            name: Datasource name or alias.

        Returns:
            str: Canonical datasource name.
        """

        normalized = name.strip()
        return self.datasource_aliases.get(normalized, normalized)



[docs]
    def get_datasource(self, name: str) -> DatasourceSettings:
        """Return a datasource by name.

        Args:
            name: Datasource identifier.

        Returns:
            DatasourceSettings: Matching datasource configuration.

        Raises:
            ConfigurationError: If the datasource is unknown.
        """

        resolved_name = self.resolve_datasource_name(name)
        for datasource in self.datasources:
            if datasource.name == resolved_name:
                return datasource

        raise ConfigurationError(f"unknown datasource: {name}")



[docs]
    def resolve_default_datasource_name(self) -> str:
        """Return the default datasource name for runtime surfaces.

        Returns:
            str: Preferred datasource name.

        Raises:
            ConfigurationError: If no datasources are configured.
        """

        if self.default_datasource_name is not None:
            resolved_name = self.resolve_datasource_name(self.default_datasource_name)
            self.get_datasource(resolved_name)
            return resolved_name

        if not self.datasources:
            raise ConfigurationError("no datasources are configured")

        return self.datasources[0].name





[docs]
@lru_cache(maxsize=1)
def load_settings() -> AppSettings:
    """Load and cache application settings.

    Returns:
        AppSettings: Cached application settings.
    """

    return AppSettings()