# Model-routing config (sanitized excerpt of the real configuration)
# -----------------------------------------------------------------
# Each agent ROLE resolves to a model via an internal alias, and a cost-aware
# router (applied before each call) can swap to a cheaper model for routine work.
# On failure (rate-limit / 5xx / timeout) the request falls through an ordered
# chain so one provider's outage degrades gracefully instead of dropping work.
#
# Model IDs and the fallback chain below are real (OpenRouter-hosted, plus a
# self-hosted local model as the final tier). Secrets, endpoints, API keys, and
# the full 8-role roster are omitted — this shows 4 of 8 roles as an illustration.

router:
  # A cheaper "downgrade" tier handles routine/low-stakes calls; the router
  # escalates to a role's primary only when the task warrants it.
  cost_downgrade_model: openrouter/google/gemini-2.5-flash-lite
  # Ordered fallback applied on a failed call, after retries on the primary:
  fallback_chain:
    - openrouter/nvidia/nemotron-3-super-120b-a12b   # cross-provider backstop
    - openrouter/x-ai/grok-3-mini                     # final hosted fallback
    - ollama_chat/llama3.3                            # self-hosted, last resort
  retries_per_model: 2
  timeout_seconds: 60

roles:                       # 4 of 8 shown
  operator:                  # plans + routes work; latency-sensitive, deterministic
    model: openrouter/minimax/minimax-m2.5
    temperature: 0.15
    max_output_tokens: 8192

  coder:                     # writes/debugs code
    model: openrouter/qwen/qwen3-coder-next
    temperature: 0.25
    max_output_tokens: 16384

  researcher:                # retrieval-heavy synthesis
    model: openrouter/deepseek/deepseek-v4-flash
    temperature: 0.70
    max_output_tokens: 8192

  analyst:                   # structured analysis over data
    model: openrouter/qwen/qwen3-235b-a22b-2507
    temperature: 0.40
    max_output_tokens: 8192

budget:
  # Every call is metered before dispatch. A hard daily cap bounds spend; the
  # router prefers the downgrade tier and only escalates within budget.
  daily_usd_cap: 7.5
  track_per_request: true
  on_exceed: block          # fail closed at the daily cap rather than overspend
