"""
Resilient model call — fallback pattern (simplified, representative excerpt).

The production version is more involved (per-provider error mapping, token
accounting, the cost-router downgrade step, Langfuse tracing). This is the core
idea, trimmed to the part worth discussing: how one call survives a provider
failing, and how that stays cheap.

Design decisions worth defending:
  - Distinguish *retryable* failures (429 / 5xx / timeout) from *terminal* ones
    (bad request, content rejected) — only the former should burn the fallback chain.
  - Retry the SAME model briefly first (transient blips are common) before paying
    the latency of switching providers.
  - The chain ends on a self-hosted local model, so the system degrades to
    "slower but up" rather than "down" when every hosted provider is unhappy.
  - Every attempt emits a trace event, so fallback frequency is observable
    (it's almost always rate-limits, not outages).
"""
from dataclasses import dataclass

RETRYABLE = {429, 500, 502, 503, 504}  # rate-limit + transient server errors


@dataclass
class Result:
    text: str
    model: str          # which model actually served the request
    fell_back: bool     # did we leave the primary?


class ProviderError(Exception):
    def __init__(self, status: int):
        self.status = status


def call_with_fallback(prompt, primary, fallback_chain, *, call, trace,
                       retries_per_model=2):
    """Try `primary`, then each model in `fallback_chain`, in order.

    `call(model, prompt)` performs one real request (or raises ProviderError).
    `trace(event, **fields)` records an observability event.
    """
    chain = [primary, *fallback_chain]
    last_status = None

    for model in chain:
        for attempt in range(retries_per_model):
            try:
                text = call(model, prompt)
                trace("model_call_ok", model=model, attempt=attempt)
                return Result(text=text, model=model, fell_back=(model != primary))
            except ProviderError as e:
                last_status = e.status
                trace("model_call_err", model=model, attempt=attempt, status=e.status)
                if e.status not in RETRYABLE:
                    break          # terminal for this model — stop retrying, fall through
        # exhausted retries (or hit a terminal error) on this model → next in chain

    # whole chain exhausted: fail loudly, with the last status for the caller
    raise RuntimeError(f"all models failed; last status={last_status}")