Skip to content

guarded-llm

Strict JSON LLM calls — schema validation, budget guard, retry policy, multi-provider support.

pip install guarded-llm

Quick start (class-based API)

from pydantic import BaseModel
from guarded_llm import GuardedLLM, Budget, RetryPolicy

class Verdict(BaseModel):
    verdict: str
    confidence: float

llm = GuardedLLM(
    provider="deepseek",
    model="deepseek-v4-flash",
    schema=Verdict,
    budget=Budget(usd_total=0.50, usd_per_call=0.05),
    retry=RetryPolicy(max_attempts=3, backoff_seconds=1.0),
)
out = llm.call("Is gravity a self-organized criticality system?")
print(out.verdict, out.confidence)

High-level class API

GuardedLLM

GuardedLLM(provider: str, model: str, schema: Any, budget: Budget | None = None, retry: RetryPolicy | None = None, max_tokens: int = 2048, **provider_kwargs: Any)

Reusable strict-JSON LLM caller with budget + retry + multi-vendor support.

Parameters:

Name Type Description Default
provider str

registered provider name ("anthropic", "deepseek", "openai", "kimi", "glm", or any custom registered provider).

required
model str

vendor-specific model id (e.g. "claude-sonnet-4.5", "deepseek-v4-flash", "kimi-k2.5", "glm-4.6").

required
schema Any

how to validate LLM output. One of: * a pydantic.BaseModel subclass (returns typed instance) * a dict (treated as a JSON Schema) * an LLMSchema instance * a class exposing .validate(d) -> (ok, err, instance)

required
budget Budget | None

optional Budget for cumulative cost cap.

None
retry RetryPolicy | None

optional RetryPolicy (defaults to 3 attempts, 1s backoff).

None
max_tokens int

max tokens per LLM call (default 2048).

2048
provider_kwargs Any

extra kwargs forwarded to the provider on every call (e.g. api_key=, base_url=, temperature=).

{}

Public API::

llm = GuardedLLM(provider, model, schema, budget=..., retry=...)
out = llm.call("prompt")            # returns validated instance
llm.last_stats.cost_usd             # cost of the last call
llm.budget.spent_usd                # cumulative spend
Source code in packages/guarded-llm/src/guarded_llm/core.py
def __init__(
    self,
    provider: str,
    model: str,
    schema: Any,
    budget: Budget | None = None,
    retry: RetryPolicy | None = None,
    max_tokens: int = 2048,
    **provider_kwargs: Any,
):
    if not isinstance(provider, str) or not provider:
        raise ValueError("provider must be a non-empty string")
    if not isinstance(model, str) or not model:
        raise ValueError("model must be a non-empty string")
    if schema is None:
        raise ValueError("schema is required")
    if budget is not None and not isinstance(budget, Budget):
        raise TypeError("budget must be a Budget instance or None")
    if retry is not None and not isinstance(retry, RetryPolicy):
        raise TypeError("retry must be a RetryPolicy instance or None")

    self.provider = provider
    self.model = model
    self.schema = _coerce_schema(schema)
    self.budget = budget
    self.retry = retry or RetryPolicy()
    self.max_tokens = max_tokens
    self.provider_kwargs = dict(provider_kwargs)
    self.last_stats: GuardedCallStats = GuardedCallStats()

call

call(prompt: str, *, system: str | None = None, messages: list[dict] | None = None, **kwargs: Any) -> Any

Run an LLM call and return the validated instance.

Parameters:

Name Type Description Default
prompt str

user prompt string (ignored if messages= is passed).

required
system str | None

optional system prompt prepended as the first message.

None
messages list[dict] | None

optional fully-formed messages list (overrides prompt).

None
**kwargs Any

forwarded to the provider (e.g. api_key=, temperature=).

{}

Returns:

Type Description
Any

The validated instance (Pydantic model instance, dict, or legacy

Any

dataclass instance — whatever the schema returns).

Raises:

Type Description
BudgetExceededError

if cumulative cost exceeds the Budget cap.

RetryExhausted

if all attempts fail validation.

LLMCallError

if the provider itself fails (network, auth, etc.) and all retries are exhausted.

Source code in packages/guarded-llm/src/guarded_llm/core.py
def call(
    self,
    prompt: str,
    *,
    system: str | None = None,
    messages: list[dict] | None = None,
    **kwargs: Any,
) -> Any:
    """Run an LLM call and return the validated instance.

    Args:
        prompt: user prompt string (ignored if `messages=` is passed).
        system: optional system prompt prepended as the first message.
        messages: optional fully-formed messages list (overrides `prompt`).
        **kwargs: forwarded to the provider (e.g. `api_key=`, `temperature=`).

    Returns:
        The validated instance (Pydantic model instance, dict, or legacy
        dataclass instance — whatever the schema returns).

    Raises:
        BudgetExceededError: if cumulative cost exceeds the Budget cap.
        RetryExhausted: if all attempts fail validation.
        LLMCallError: if the provider itself fails (network, auth, etc.)
            and all retries are exhausted.
    """
    from .providers import get_provider

    if messages is None:
        messages = []
        if system:
            messages.append({"role": "system", "content": system})
        messages.append({"role": "user", "content": prompt})

    prov = get_provider(self.provider)
    merged_kwargs = {**self.provider_kwargs, **kwargs}

    stats = GuardedCallStats()
    last_err: str | None = None

    for attempt in range(self.retry.max_attempts):
        if attempt > 0:
            sleep_s = self.retry.sleep_seconds(attempt)
            if sleep_s > 0:
                time.sleep(sleep_s)

        attempt_messages = list(messages)
        if last_err is not None and attempt_messages:
            hint = (
                f"\n\nPrevious output failed validation: {last_err}\n"
                "Output valid JSON only — no prose, no markdown fences."
            )
            last = dict(attempt_messages[-1])
            if last.get("role") == "user":
                last["content"] = last.get("content", "") + hint
                attempt_messages[-1] = last
            else:
                attempt_messages.append({"role": "user", "content": hint.strip()})

        stats.attempts += 1
        try:
            call_out = prov.call(
                messages=attempt_messages,
                model=self.model,
                max_tokens=self.max_tokens,
                schema=self.schema,
                **merged_kwargs,
            )
        except Exception as exc:
            err = f"attempt {attempt + 1}: provider {self.provider!r} raised: {exc}"
            stats.errors.append(err)
            last_err = err
            continue

        raw = (
            call_out.get("text", "") if isinstance(call_out, dict) else str(call_out)
        )
        cost = (
            float(call_out.get("cost_usd", 0.0))
            if isinstance(call_out, dict)
            else 0.0
        )
        stats.raw_outputs.append(raw)
        stats.cost_usd += cost

        # Charge the budget BEFORE checking schema, so a runaway loop
        # can't keep burning money on a bad prompt.
        if self.budget is not None:
            # Budget.consume() raises BudgetExceeded if we'd exceed any cap.
            # Reset stats first so caller can inspect partial spend.
            self.last_stats = stats
            self.budget.consume(cost)

        cleaned = state_machine_fix(raw)
        ok, err, parsed = validate_json(cleaned, self.schema)
        if ok:
            self.last_stats = stats
            return parsed
        err_msg = f"attempt {attempt + 1}: {err}"
        stats.errors.append(err_msg)
        last_err = err

    self.last_stats = stats
    raise RetryExhausted(
        f"all {self.retry.max_attempts} attempts failed validation",
        attempts=stats.errors,
        last_raw=stats.raw_outputs[-1] if stats.raw_outputs else None,
    )

call_as_result

call_as_result(prompt: str, *, system: str | None = None, messages: list[dict] | None = None, **kwargs: Any) -> GuardrailResult

Like .call() but returns a GuardrailResult (never raises on validation failure — errors are accumulated in the result).

Source code in packages/guarded-llm/src/guarded_llm/core.py
def call_as_result(
    self,
    prompt: str,
    *,
    system: str | None = None,
    messages: list[dict] | None = None,
    **kwargs: Any,
) -> GuardrailResult:
    """Like `.call()` but returns a `GuardrailResult` (never raises on
    validation failure — errors are accumulated in the result)."""
    try:
        parsed = self.call(prompt, system=system, messages=messages, **kwargs)
    except RetryExhausted:
        return GuardrailResult(
            parsed=None,
            errors=list(self.last_stats.errors),
            attempts=self.last_stats.attempts,
            cost_usd=self.last_stats.cost_usd,
            raw_outputs=list(self.last_stats.raw_outputs),
        )
    except BudgetExceededError:
        # Re-raise — budget is a hard stop, not a soft validation failure.
        raise
    return GuardrailResult(
        parsed=parsed,
        errors=list(self.last_stats.errors),
        attempts=self.last_stats.attempts,
        cost_usd=self.last_stats.cost_usd,
        raw_outputs=list(self.last_stats.raw_outputs),
    )

GuardedCallStats dataclass

GuardedCallStats(attempts: int = 0, cost_usd: float = 0.0, errors: list[str] = list(), raw_outputs: list[str] = list())

Per-call metadata returned alongside the parsed instance via .last_stats.

Useful for cost dashboards / debugging without changing the return type of .call() (which by default returns the parsed instance directly).

Budget dataclass

Budget(usd_total: float, usd_per_call: float = math.inf)

Track and cap LLM spend across one or more GuardedLLM.call(...) runs.

Parameters:

Name Type Description Default
usd_total float

total budget cap in USD across the lifetime of this Budget.

required
usd_per_call float

max spend allowed for any single .consume(...) call. Defaults to infinity (no per-call cap).

inf

Example::

b = Budget(usd_total=0.50, usd_per_call=0.10)
b.consume(0.03)            # OK
b.consume(0.50)            # raises BudgetExceeded (over per-call cap)
b.spent_usd                # 0.03
b.remaining_usd            # 0.47

consume

consume(usd: float) -> None

Record a charge. Raise BudgetExceeded if it would exceed any cap.

The charge is NOT recorded when an exception is raised — so a caller can catch BudgetExceeded and the Budget state stays consistent.

Source code in packages/guarded-llm/src/guarded_llm/budget.py
def consume(self, usd: float) -> None:
    """Record a charge. Raise `BudgetExceeded` if it would exceed any cap.

    The charge is NOT recorded when an exception is raised — so a caller
    can catch `BudgetExceeded` and the Budget state stays consistent.
    """
    if not isinstance(usd, (int, float)) or isinstance(usd, bool):
        raise TypeError("usd must be a number")
    if usd < 0:
        raise ValueError("usd must be >= 0")
    if usd > self.usd_per_call:
        raise BudgetExceeded(
            f"single call cost ${usd:.4f} > per-call cap ${self.usd_per_call:.4f}",
            spent_usd=self.spent_usd,
            cap_usd=self.usd_per_call,
        )
    if self.spent_usd + usd > self.usd_total:
        raise BudgetExceeded(
            f"cumulative cost ${self.spent_usd + usd:.4f} "
            f"> total cap ${self.usd_total:.4f}",
            spent_usd=self.spent_usd + usd,
            cap_usd=self.usd_total,
        )
    self.spent_usd += usd

reset

reset() -> None

Zero out the spent-so-far counter (kept caps unchanged).

Source code in packages/guarded-llm/src/guarded_llm/budget.py
def reset(self) -> None:
    """Zero out the spent-so-far counter (kept caps unchanged)."""
    self.spent_usd = 0.0

BudgetExceeded module-attribute

BudgetExceeded = BudgetExceededError

RetryPolicy dataclass

RetryPolicy(max_attempts: int = 3, backoff_seconds: float = 1.0, jitter: bool = True)

Backoff configuration for retry loops.

Parameters:

Name Type Description Default
max_attempts int

total number of LLM calls to make before giving up (>= 1).

3
backoff_seconds float

base sleep between attempts (linear * attempt#).

1.0
jitter bool

if True, multiply sleep by uniform(0.5, 1.5) to avoid thundering-herd retry storms when many parallel clients share a single backend.

True

Example::

policy = RetryPolicy(max_attempts=5, backoff_seconds=2.0)
for attempt in range(policy.max_attempts):
    try:
        return _try_call()
    except RetryableError:
        time.sleep(policy.sleep_seconds(attempt))
raise RetryExhausted("...")

sleep_seconds

sleep_seconds(attempt: int, rng: Random | None = None) -> float

Compute backoff for the given attempt number (0-indexed).

attempt 0 → 0 sec (don't sleep before first call),

attempt N → N * backoff_seconds (* jitter, if enabled).

Source code in packages/guarded-llm/src/guarded_llm/retry.py
def sleep_seconds(self, attempt: int, rng: random.Random | None = None) -> float:
    """Compute backoff for the given attempt number (0-indexed).

    Linear: attempt 0 → 0 sec (don't sleep before first call),
            attempt N → N * backoff_seconds (* jitter, if enabled).
    """
    if attempt <= 0:
        return 0.0
    base = attempt * self.backoff_seconds
    if not self.jitter:
        return base
    r = rng if rng is not None else random
    return base * r.uniform(0.5, 1.5)

RetryExhausted

RetryExhausted(message: str, attempts: list[str] | None = None, last_raw: str | None = None)

Bases: SchemaValidationError

Raised when all retry attempts fail.

Carries the per-attempt error list and the final raw LLM output so callers can inspect what went wrong without re-running the loop.

Source code in packages/guarded-llm/src/guarded_llm/exceptions.py
def __init__(self, message: str, attempts: list[str] | None = None, last_raw: str | None = None):
    super().__init__(message)
    self.attempts = attempts or []
    self.last_raw = last_raw

SchemaValidator

SchemaValidator(model: Any)

Wrap a pydantic.BaseModel to fit guarded-llm's (ok, err, instance) API.

Parameters:

Name Type Description Default
model Any

a Pydantic v2 BaseModel subclass.

required

Example::

class Out(BaseModel):
    verdict: str
    confidence: float

v = SchemaValidator(Out)
v.validate({"verdict": "KEEP", "confidence": 0.9})  # -> (True, None, Out(...))
Source code in packages/guarded-llm/src/guarded_llm/validator.py
def __init__(self, model: Any):
    if not _HAS_PYDANTIC:
        raise ImportError(
            "SchemaValidator requires `pydantic>=2`. "
            "Install with `pip install pydantic`."
        )
    if not (isinstance(model, type) and issubclass(model, BaseModel)):
        raise TypeError(
            f"model must be a pydantic.BaseModel subclass, got {model!r}"
        )
    self._model = model

validate

validate(d: Any) -> tuple[bool, str | None, Any]

Validate d against the Pydantic model.

Returns (ok, error_message_or_none, model_instance_or_none).

Source code in packages/guarded-llm/src/guarded_llm/validator.py
def validate(self, d: Any) -> tuple[bool, str | None, Any]:
    """Validate `d` against the Pydantic model.

    Returns (ok, error_message_or_none, model_instance_or_none).
    """
    try:
        inst = self._model.model_validate(d)
    except ValidationError as e:
        # Pretty-print the first error path so it's useful in retry hints
        errs = e.errors()
        if errs:
            first = errs[0]
            path = ".".join(str(p) for p in first.get("loc", [])) or "<root>"
            msg = f"{path}: {first.get('msg', 'validation error')}"
        else:
            msg = str(e)
        return False, msg, None
    except Exception as e:
        return False, f"pydantic error: {e}", None
    return True, None, inst

Functional / legacy API

guardrailed_llm_call

guardrailed_llm_call(prompt_fn: Callable[[str | None], str] | None = None, llm_caller: Callable[[str], str] | None = None, schema_cls: Any = None, max_retries: int = 3, *, provider: str | None = None, model: str | None = None, messages: list[dict] | None = None, schema: Any = None, max_tokens: int = 2048, budget_cap_usd: float | None = None, retry_backoff_s: float = 0.0, **kwargs: Any) -> Any

Run an LLM call wrapped in the full guardrail stack.

Two call styles supported:

Legacy (positional, kept for backwards compat with v4/lib):

parsed, errors = guardrailed_llm_call(prompt_fn, llm_caller, MySchema, max_retries=3)

Provider (keyword, new public API):

result = guardrailed_llm_call(
    provider="deepseek",
    model="deepseek-v4-flash",
    messages=[{"role": "user", "content": "..."}],
    schema=my_schema,
    max_retries=3,
    budget_cap_usd=0.05,
)
if result.ok:
    print(result.parsed)

The provider style returns a GuardrailResult with cost / attempts / raw_outputs metadata. The legacy style returns the (parsed, errors) tuple unchanged from v4/lib/llm_guardrail.py.

Source code in packages/guarded-llm/src/guarded_llm/guardrail.py
def guardrailed_llm_call(
    prompt_fn: Callable[[str | None], str] | None = None,
    llm_caller: Callable[[str], str] | None = None,
    schema_cls: Any = None,
    max_retries: int = 3,
    *,
    provider: str | None = None,
    model: str | None = None,
    messages: list[dict] | None = None,
    schema: Any = None,
    max_tokens: int = 2048,
    budget_cap_usd: float | None = None,
    retry_backoff_s: float = 0.0,
    **kwargs: Any,
) -> Any:
    """Run an LLM call wrapped in the full guardrail stack.

    Two call styles supported:

    **Legacy (positional, kept for backwards compat with v4/lib):**

        parsed, errors = guardrailed_llm_call(prompt_fn, llm_caller, MySchema, max_retries=3)

    **Provider (keyword, new public API):**

        result = guardrailed_llm_call(
            provider="deepseek",
            model="deepseek-v4-flash",
            messages=[{"role": "user", "content": "..."}],
            schema=my_schema,
            max_retries=3,
            budget_cap_usd=0.05,
        )
        if result.ok:
            print(result.parsed)

    The provider style returns a `GuardrailResult` with cost / attempts /
    raw_outputs metadata. The legacy style returns the (parsed, errors) tuple
    unchanged from v4/lib/llm_guardrail.py.
    """
    # Provider-style: keyword args route here
    if provider is not None or messages is not None:
        if provider is None or model is None or messages is None or schema is None:
            raise ValueError(
                "provider-style call requires provider=, model=, messages=, schema="
            )
        return _provider_call(
            provider=provider,
            model=model,
            messages=messages,
            schema=schema,
            max_retries=max_retries,
            max_tokens=max_tokens,
            budget_cap_usd=budget_cap_usd,
            retry_backoff_s=retry_backoff_s,
            **kwargs,
        )

    # Legacy style
    if prompt_fn is None or llm_caller is None or schema_cls is None:
        raise ValueError(
            "legacy call requires prompt_fn, llm_caller, schema_cls positional args"
        )
    return _legacy_call(prompt_fn, llm_caller, schema_cls, max_retries)

GuardrailResult dataclass

GuardrailResult(parsed: Any, errors: list[str] = list(), attempts: int = 0, cost_usd: float = 0.0, raw_outputs: list[str] = list())

Outcome of a guarded LLM call.

Attributes:

Name Type Description
parsed Any

validated instance (dict for LLMSchema, dataclass for legacy schemas) or None if all retries failed

errors list[str]

per-attempt error strings (empty if first try succeeded)

attempts int

number of LLM calls actually made

cost_usd float

estimated cumulative cost in USD (provider-reported, may be 0)

raw_outputs list[str]

raw text returned by each attempt (for debugging)

state_machine_fix

state_machine_fix(raw: str) -> str

Best-effort repair of common LLM JSON drift bugs.

Applies, in order: 1. fence strip + JSON envelope locate 2. comment strip 3. NaN / Infinity -> null 4. single-quote -> double-quote 5. unescaped interior quote escape 6. trailing comma removal

Never raises; hands back its best guess.

Source code in packages/guarded-llm/src/guarded_llm/guardrail.py
def state_machine_fix(raw: str) -> str:
    """Best-effort repair of common LLM JSON drift bugs.

    Applies, in order:
        1. fence strip + JSON envelope locate
        2. comment strip
        3. NaN / Infinity -> null
        4. single-quote -> double-quote
        5. unescaped interior quote escape
        6. trailing comma removal

    Never raises; hands back its best guess.
    """
    if not isinstance(raw, str):
        return str(raw)
    s = _strip_fences_and_locate(raw)
    s = _strip_comments(s)
    s = _replace_nan_infinity(s)
    s = _single_to_double_quotes(s)
    s = _fix_unescaped_interior_quotes(s)
    s = _remove_trailing_commas(s)
    return s

validate_json

validate_json(raw_or_dict: Any, schema_cls: Any) -> tuple[bool, str | None, Any]

Parse + schema-validate.

Accepts a raw string (json.loads first) or an already-parsed dict/list. Schema can be either a dataclass schema class or an LLMSchema instance. Returns (success, error_or_none, instance_or_none).

Source code in packages/guarded-llm/src/guarded_llm/guardrail.py
def validate_json(raw_or_dict: Any, schema_cls: Any) -> tuple[bool, str | None, Any]:
    """Parse + schema-validate.

    Accepts a raw string (json.loads first) or an already-parsed dict/list.
    Schema can be either a dataclass schema class or an LLMSchema instance.
    Returns (success, error_or_none, instance_or_none).
    """
    if isinstance(raw_or_dict, (dict, list)):
        parsed = raw_or_dict
    else:
        try:
            parsed = json.loads(raw_or_dict)
        except json.JSONDecodeError as e:
            return False, f"json parse error: {e.msg} (line {e.lineno} col {e.colno})", None
        except Exception as e:
            return False, f"json parse error: {e}", None

    return validate_response(parsed, schema_cls)

Schemas

LLMSchema

LLMSchema(schema: dict[str, Any])

Generic JSON Schema wrapper compatible with guardrailed_llm_call.

Example

schema = LLMSchema({ ... "type": "object", ... "properties": { ... "verdict": {"type": "string", "enum": ["KEEP", "REJECT"]}, ... "confidence": {"type": "number", "minimum": 0, "maximum": 1}, ... }, ... "required": ["verdict", "confidence"], ... }) ok, err, inst = schema.validate({"verdict": "KEEP", "confidence": 0.9}) assert ok and inst == {"verdict": "KEEP", "confidence": 0.9}

Notes
  • The returned instance is just the validated dict (no class wrapping).
  • Requires jsonschema>=4.0; if not installed the constructor raises.
Source code in packages/guarded-llm/src/guarded_llm/schemas.py
def __init__(self, schema: dict[str, Any]):
    if not _HAS_JSONSCHEMA:
        raise ImportError(
            "LLMSchema requires `jsonschema>=4.0`. "
            "Install with `pip install jsonschema` or `pip install guarded-llm[dev]`."
        )
    if not isinstance(schema, dict):
        raise TypeError(f"schema must be a dict, got {type(schema).__name__}")
    validator_cls = _best_validator()
    if validator_cls is None:
        raise ImportError(
            "LLMSchema requires `jsonschema>=3.2`. "
            "Install with `pip install jsonschema`."
        )
    # Eagerly validate the meta-schema so misconfiguration surfaces early.
    try:
        validator_cls.check_schema(schema)
    except jsonschema.exceptions.SchemaError as e:
        raise ValueError(f"invalid JSON Schema: {e.message}") from e
    self._schema = schema
    self._validator = validator_cls(schema)

validate

validate(d: Any) -> tuple[bool, str | None, Any]

Validate d against this schema.

Returns (ok, error_message_or_none, validated_instance_or_none).

Source code in packages/guarded-llm/src/guarded_llm/schemas.py
def validate(self, d: Any) -> tuple[bool, str | None, Any]:
    """Validate `d` against this schema.

    Returns (ok, error_message_or_none, validated_instance_or_none).
    """
    errors = sorted(self._validator.iter_errors(d), key=lambda e: list(e.absolute_path))
    if not errors:
        return True, None, d
    first = errors[0]
    path = ".".join(str(p) for p in first.absolute_path) or "<root>"
    msg = f"{path}: {first.message}"
    return False, msg, None

validate_response

validate_response(d: Any, schema: 'LLMSchema | type') -> tuple[bool, str | None, Any]

Validate a parsed dict against either an LLMSchema or a dataclass schema class.

Source code in packages/guarded-llm/src/guarded_llm/schemas.py
def validate_response(d: Any, schema: "LLMSchema | type") -> tuple[bool, str | None, Any]:
    """Validate a parsed dict against either an LLMSchema or a dataclass schema class."""
    if isinstance(schema, LLMSchema):
        return schema.validate(d)
    if hasattr(schema, "validate"):
        return schema.validate(d)
    return False, f"schema {schema!r} has no .validate() method", None

Layer3CriticVerdict dataclass

Layer3CriticVerdict(class_id: str, review_verdict: str, confidence: str, flagged_count: int, reasoning: str)

B1 critic pass output per class.

review_verdict: "KEEP" | "SPLIT" | "REJECT" | "MERGE_WITH()" confidence: "low" | "medium" | "high"

Layer4Prediction dataclass

Layer4Prediction(class_id: str, target_system: str, physical_quantity: str, predicted_band: list[float], evidence_url: str | None = None, journal_target: str | None = None)

A predicted observation in a target system.

B3EnsembleReview dataclass

B3EnsembleReview(class_id: str, model_id: str, verdict: str, confidence: float, rationale: str)

One model's verdict on one class in an N-model ensemble vote.

Providers

BaseProvider

Bases: ABC

Interface every provider adapter implements.

call abstractmethod

call(messages: list[dict], model: str, max_tokens: int, schema: Any = None, **kwargs: Any) -> dict

Send messages to the LLM and return {"text": str, "cost_usd": float}.

Source code in packages/guarded-llm/src/guarded_llm/providers/__init__.py
@abstractmethod
def call(
    self,
    messages: list[dict],
    model: str,
    max_tokens: int,
    schema: Any = None,
    **kwargs: Any,
) -> dict:
    """Send `messages` to the LLM and return {"text": str, "cost_usd": float}."""
    ...

get_provider

get_provider(name: str) -> BaseProvider

Instantiate and return the provider named name.

Raises ValueError if the provider isn't registered.

Source code in packages/guarded-llm/src/guarded_llm/providers/__init__.py
def get_provider(name: str) -> BaseProvider:
    """Instantiate and return the provider named `name`.

    Raises ValueError if the provider isn't registered.
    """
    if name not in PROVIDERS:
        raise ValueError(
            f"Unknown provider: {name!r}. Registered: {sorted(PROVIDERS.keys())}"
        )
    return PROVIDERS[name]()

list_providers

list_providers() -> list[str]

Return the sorted list of registered provider names.

Source code in packages/guarded-llm/src/guarded_llm/providers/__init__.py
def list_providers() -> list[str]:
    """Return the sorted list of registered provider names."""
    return sorted(PROVIDERS.keys())

register_provider

register_provider(name: str, cls: type[BaseProvider]) -> None

Add (or override) a provider in the registry.

Source code in packages/guarded-llm/src/guarded_llm/providers/__init__.py
def register_provider(name: str, cls: type[BaseProvider]) -> None:
    """Add (or override) a provider in the registry."""
    if not issubclass(cls, BaseProvider):
        raise TypeError(f"{cls!r} must subclass BaseProvider")
    PROVIDERS[name] = cls

Exceptions

GuardrailError

Bases: Exception

Base class for all guarded-llm errors.

SchemaValidationError

SchemaValidationError(message: str, attempts: list[str] | None = None, last_raw: str | None = None)

Bases: GuardrailError

Raised when LLM output fails schema validation after all retries.

Attributes:

Name Type Description
attempts

list of per-attempt error strings (length == max_retries)

last_raw

the raw LLM text from the final attempt (may aid debugging)

Source code in packages/guarded-llm/src/guarded_llm/exceptions.py
def __init__(self, message: str, attempts: list[str] | None = None, last_raw: str | None = None):
    super().__init__(message)
    self.attempts = attempts or []
    self.last_raw = last_raw

LLMCallError

Bases: GuardrailError

Raised when the underlying LLM HTTP/SDK call fails (network, auth, etc.).

BudgetExceededError

BudgetExceededError(message: str, spent_usd: float, cap_usd: float)

Bases: GuardrailError

Raised when cumulative cost in a single call exceeds the user-supplied cap.

Source code in packages/guarded-llm/src/guarded_llm/exceptions.py
def __init__(self, message: str, spent_usd: float, cap_usd: float):
    super().__init__(message)
    self.spent_usd = spent_usd
    self.cap_usd = cap_usd