Source code for sqlspec.loader

"""SQL file loader for managing SQL statements from files.

Provides functionality to load, cache, and manage SQL statements
from files using named SQL queries.

SQL files declare query metadata with comment directives like ``-- name: query_name`` (hyphens and suffixes allowed)
and ``-- dialect: dialect_name``.
"""

import hashlib
import logging
import re
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import TYPE_CHECKING, Any, Final
from urllib.parse import unquote, urlparse

from sqlspec.core import SQL, ParameterDeclaration, ParameterValidator, get_cache, get_cache_config
from sqlspec.exceptions import (
    FileNotFoundInStorageError,
    SQLFileNotFoundError,
    SQLFileParseError,
    SQLStatementNotFoundError,
    StorageOperationFailedError,
)
from sqlspec.storage.registry import storage_registry as default_storage_registry
from sqlspec.utils.correlation import CorrelationContext
from sqlspec.utils.logging import get_logger, log_with_context
from sqlspec.utils.text import slugify
from sqlspec.utils.type_guards import is_local_path

if TYPE_CHECKING:
    from collections.abc import Sequence

    from sqlspec.observability import ObservabilityRuntime
    from sqlspec.storage.registry import StorageRegistry

__all__ = ("NamedStatement", "SQLFile", "SQLFileCacheEntry", "SQLFileLoader")

logger = get_logger("sqlspec.loader")

QUERY_NAME_PATTERN = re.compile(r"^\s*--\s*name\s*:\s*([\w-]+[^\w\s]*)\s*$", re.MULTILINE | re.IGNORECASE)
TRIM_SPECIAL_CHARS = re.compile(r"[^\w.-]")

DIALECT_PATTERN = re.compile(r"^\s*--\s*dialect\s*:\s*(?P<dialect>[a-zA-Z0-9_]+)\s*$", re.IGNORECASE | re.MULTILINE)

PARAM_PATTERN = re.compile(
    r"^\s*--\s*param\s*:\s*(?P<name>\w+)\s+(?P<type>[\w.]+(?:\[[\w., ]+\])?)(?P<optional>\?)?(?:\s+(?P<desc>.*\S))?\s*$",
    re.IGNORECASE,
)

PARAM_PREFIX_PATTERN = re.compile(r"^\s*--\s*param\s*:", re.IGNORECASE)
PARAM_OPTIONAL_DESCRIPTION_PATTERN = re.compile(r"(?:^|\s)\(optional\)\s*$", re.IGNORECASE)


DIALECT_ALIASES: Final = {
    "postgresql": "postgres",
    "pg": "postgres",
    "pgplsql": "postgres",
    "plsql": "oracle",
    "oracledb": "oracle",
    "tsql": "mssql",
}

MIN_QUERY_PARTS: Final = 3


def _parse_parameter_declaration(param_match: "re.Match[str]") -> ParameterDeclaration:
    """Build a parameter declaration from a matched ``-- param:`` line."""
    description = param_match.group("desc")
    required = param_match.group("optional") != "?"
    if description is not None and PARAM_OPTIONAL_DESCRIPTION_PATTERN.search(description):
        required = False
        description = PARAM_OPTIONAL_DESCRIPTION_PATTERN.sub("", description).strip() or None
    return ParameterDeclaration(
        name=param_match.group("name"), type_str=param_match.group("type"), description=description, required=required
    )


class NamedStatement:
    """Represents a parsed SQL statement with metadata.

    Contains individual SQL statements extracted from files with their
    normalized names, SQL content, optional dialect specifications,
    and line position for error reporting.
    """

    __slots__ = ("dialect", "name", "parameters", "sql", "start_line")

    def __init__(
        self,
        name: str,
        sql: str,
        dialect: "str | None" = None,
        start_line: int = 0,
        parameters: "tuple[ParameterDeclaration, ...]" = (),
    ) -> None:
        self.name = name
        self.sql = sql
        self.dialect = dialect
        self.start_line = start_line
        self.parameters = parameters