Systems Library / AI Model Setup / How to Set Up AI Model Versioning
AI Model Setup routing optimization

How to Set Up AI Model Versioning

Manage model version transitions without breaking production systems.

Jay Banlasan

Jay Banlasan

The AI Systems Guy

Anthropic deprecated claude-2.1 with 30 days notice. I had 14 workflows using it, spread across three different codebases. Finding them all and updating them took a full day, and two workflows broke in production during the transition because the output format changed between versions. Proper ai model version management production practices means one config file controls every model reference in your stack, and version transitions happen in a controlled test-then-swap process, not a fire drill.

Model versioning is infrastructure hygiene. You probably don't hardcode database connection strings in application code. Model names should get the same treatment.

What You Need Before Starting

Step 1: Build a Central Model Registry

One file defines all model aliases used across your stack. Nothing hardcodes a model string directly.

# model_registry.py
from dataclasses import dataclass, field
from typing import Optional
import json, os
from pathlib import Path

@dataclass
class ModelVersion:
    alias: str           # What your code uses: "fast", "balanced", "powerful"
    provider: str        # "anthropic", "openai", "google"
    model_id: str        # The actual model string the API expects
    max_tokens: int      # Default max output for this model
    context_window: int  # Total context limit
    deprecated: bool = False
    deprecated_on: Optional[str] = None
    notes: str = ""

REGISTRY: dict[str, ModelVersion] = {
    "fast": ModelVersion(
        alias="fast",
        provider="anthropic",
        model_id="claude-haiku-3",
        max_tokens=4096,
        context_window=200_000,
        notes="Default for classification, extraction, simple generation"
    ),
    "balanced": ModelVersion(
        alias="balanced",
        provider="anthropic",
        model_id="claude-sonnet-4-5",
        max_tokens=8192,
        context_window=200_000,
        notes="Default for complex reasoning, structured outputs"
    ),
    "powerful": ModelVersion(
        alias="powerful",
        provider="anthropic",
        model_id="claude-opus-4-5",
        max_tokens=8192,
        context_window=200_000,
        notes="Reserved for highest complexity tasks"
    ),
    "code": ModelVersion(
        alias="code",
        provider="openai",
        model_id="gpt-4o",
        max_tokens=4096,
        context_window=128_000,
        notes="Code review and generation"
    ),
}

def get_model(alias: str) -> ModelVersion:
    if alias not in REGISTRY:
        raise KeyError(f"Unknown model alias: '{alias}'. "
                       f"Known aliases: {list(REGISTRY.keys())}")
    model = REGISTRY[alias]
    if model.deprecated:
        import warnings
        warnings.warn(f"Model alias '{alias}' is deprecated since {model.deprecated_on}. "
                      f"Update your code to use a current alias.")
    return model

def get_model_id(alias: str) -> str:
    return get_model(alias).model_id

Every AI call in your stack uses get_model_id("fast") instead of "claude-haiku-3". When the model ID needs to change, you update one line in the registry.

Step 2: Build an Environment Override Layer

Production, staging, and development can point at different model versions without code changes.

import os

ENV_OVERRIDES = {
    # Format: MODEL_OVERRIDE_{ALIAS_UPPER}=model-id
    # e.g. MODEL_OVERRIDE_FAST=claude-haiku-3-5
}

def get_model_id(alias: str) -> str:
    # Check environment override first
    env_key = f"MODEL_OVERRIDE_{alias.upper().replace('-', '_')}"
    override = os.getenv(env_key)
    if override:
        return override
    return get_model(alias).model_id

# Usage in .env file for staged rollout:
# MODEL_OVERRIDE_FAST=claude-haiku-3-5  <-- test new version in staging
# (production .env has no override — uses registry default)

This lets you test a new model version in staging by setting an environment variable, with zero code changes.

Step 3: Build a Transition Plan for Version Updates

When a model version needs to change (provider deprecation, cost change, quality improvement), follow a structured process.

# version_transition.py

@dataclass
class TransitionPlan:
    alias: str
    current_model_id: str
    candidate_model_id: str
    test_inputs: list[str]
    evaluator: callable  # function(old_output, new_output) -> float (0-1 similarity/quality)
    rollout_pct: float = 0.0  # 0.0 = testing only, 1.0 = fully rolled out
    status: str = "planned"   # planned | testing | partial | complete | rolled_back

import anthropic
import random

_client = anthropic.Anthropic()

def run_transition_test(plan: TransitionPlan) -> dict:
    current_results = []
    candidate_results = []
    
    for inp in plan.test_inputs:
        for model_id, results_list in [
            (plan.current_model_id, current_results),
            (plan.candidate_model_id, candidate_results)
        ]:
            response = _client.messages.create(
                model=model_id, max_tokens=512,
                messages=[{"role": "user", "content": inp}]
            )
            results_list.append(response.content[0].text)
    
    scores = [
        plan.evaluator(curr, cand)
        for curr, cand in zip(current_results, candidate_results)
    ]
    
    return {
        "alias": plan.alias,
        "current": plan.current_model_id,
        "candidate": plan.candidate_model_id,
        "test_count": len(plan.test_inputs),
        "avg_quality_score": round(sum(scores) / len(scores), 3),
        "min_score": round(min(scores), 3),
        "passed": all(s >= 0.85 for s in scores),
        "results": list(zip(current_results, candidate_results))
    }

Step 4: Implement Canary Rollout

Don't switch 100% of traffic at once. Route a small percentage to the candidate model first.

import hashlib

def select_model_for_request(alias: str, request_id: str,
                               rollout_pct: float = 0.0) -> str:
    """
    Consistently routes a given request_id to the same model
    (hash-based so the same user always gets the same experience).
    rollout_pct: 0.0-1.0 fraction of traffic to route to candidate.
    """
    if rollout_pct <= 0.0:
        return get_model_id(alias)
    
    candidate_id = get_active_candidate(alias)  # from your transition plan
    if not candidate_id:
        return get_model_id(alias)
    
    # Deterministic routing based on request_id hash
    hash_val = int(hashlib.md5(request_id.encode()).hexdigest(), 16)
    pct = (hash_val % 10000) / 10000.0
    
    if pct < rollout_pct:
        return candidate_id
    return get_model_id(alias)

# Canary plan stored in Redis or a simple JSON file
ACTIVE_TRANSITIONS: dict[str, dict] = {}

def get_active_candidate(alias: str) -> str | None:
    transition = ACTIVE_TRANSITIONS.get(alias)
    if transition and transition["status"] in ("partial",):
        return transition["candidate_model_id"]
    return None

def start_canary(alias: str, candidate_id: str, rollout_pct: float = 0.05):
    ACTIVE_TRANSITIONS[alias] = {
        "candidate_model_id": candidate_id,
        "rollout_pct": rollout_pct,
        "status": "partial"
    }
    print(f"Canary started: {alias} -> {candidate_id} at {rollout_pct*100}% traffic")

def promote_to_full(alias: str):
    """Once canary looks good, update registry and retire transition."""
    candidate = ACTIVE_TRANSITIONS.get(alias, {}).get("candidate_model_id")
    if candidate:
        REGISTRY[alias].model_id = candidate
        ACTIVE_TRANSITIONS.pop(alias, None)
        print(f"Promoted {alias} to {candidate}")

Step 5: Track Version-Level Performance

Log which model version handled each request so you can compare performance before and after a transition.

import sqlite3
from datetime import datetime

def log_model_call(alias: str, model_id: str, latency_ms: int,
                    input_tokens: int, output_tokens: int,
                    cost_usd: float, quality_score: float = None):
    conn = sqlite3.connect("model_versions.db")
    conn.execute("""
        CREATE TABLE IF NOT EXISTS model_version_calls (
            ts TEXT, alias TEXT, model_id TEXT, latency_ms INTEGER,
            input_tokens INTEGER, output_tokens INTEGER, cost_usd REAL,
            quality_score REAL
        )
    """)
    conn.execute("INSERT INTO model_version_calls VALUES (?,?,?,?,?,?,?,?)",
                 (datetime.utcnow().isoformat(), alias, model_id,
                  latency_ms, input_tokens, output_tokens, cost_usd, quality_score))
    conn.commit()
    conn.close()

def compare_versions(alias: str, days: int = 7) -> list:
    conn = sqlite3.connect("model_versions.db")
    rows = conn.execute("""
        SELECT model_id,
               COUNT(*) as calls,
               AVG(latency_ms) as avg_latency,
               AVG(cost_usd) as avg_cost,
               AVG(quality_score) as avg_quality
        FROM model_version_calls
        WHERE alias = ? AND ts >= datetime('now', ?)
        GROUP BY model_id
    """, (alias, f'-{days} days')).fetchall()
    conn.close()
    return [{"model": r[0], "calls": r[1], "avg_latency_ms": round(r[2] or 0),
             "avg_cost": round(r[3] or 0, 6), "avg_quality": round(r[4] or 0, 3)}
            for r in rows]

Step 6: Add Deprecation Warnings Ahead of Cutoff

When you know a model is being deprecated, mark it in the registry early so developers get warnings during development.

def mark_deprecated(alias: str, deprecated_on: str, replacement_alias: str = None):
    if alias not in REGISTRY:
        raise KeyError(f"Unknown alias: {alias}")
    REGISTRY[alias].deprecated = True
    REGISTRY[alias].deprecated_on = deprecated_on
    if replacement_alias:
        REGISTRY[alias].notes += f" DEPRECATED: use '{replacement_alias}' instead."
    print(f"Marked {alias} as deprecated since {deprecated_on}")

# Run this when you get deprecation notice from a provider
# mark_deprecated("fast", "2025-01-01", "fast-v2")

What to Build Next

Related Reading

Want this system built for your business?

Get a free assessment. We will map every system your business needs and show you the ROI.

Get Your Free Assessment

Related Systems