Systems Library / AI Model Setup / How to Build Token Budget Management Systems
AI Model Setup routing optimization

How to Build Token Budget Management Systems

Set per-team and per-project AI token budgets with automatic alerts.

Jay Banlasan

Jay Banlasan

The AI Systems Guy

One team at a client's company ran an experimental AI workflow on a Friday afternoon and burned through their entire month's token allocation by Saturday morning. No alert fired. No one noticed until the invoice came. Building ai token budget management tracking stops this from happening. Every team gets a budget, every call counts against it, and when they're running low the system tells them before it's a problem.

Token budgets do more than prevent cost overruns. They force teams to think carefully about which AI tasks are worth running. When tokens have a visible ceiling, people stop automating low-value work just because they can.

What You Need Before Starting

Step 1: Define the Budget Schema

Budgets live at three levels: global (total spend cap), team (per department), and project (per workflow).

import json
from dataclasses import dataclass, asdict
from typing import Optional

@dataclass
class TokenBudget:
    id: str                    # e.g. "team:marketing" or "project:lead-enrichment"
    name: str
    monthly_token_limit: int   # total tokens (input + output)
    alert_threshold: float     # 0.8 = alert at 80%
    hard_cap: bool             # True = block requests at limit, False = alert only
    owner_email: Optional[str] = None
    notes: Optional[str] = None

# Example budgets
BUDGETS = [
    TokenBudget("team:marketing",   "Marketing Team",    5_000_000,  0.8, False, "[email protected]"),
    TokenBudget("team:engineering", "Engineering Team",  20_000_000, 0.9, False, "[email protected]"),
    TokenBudget("project:nightly",  "Nightly Enrichment",10_000_000, 0.85, True,  None),
    TokenBudget("global",           "Global Cap",        50_000_000, 0.9,  True,  "[email protected]"),
]

The hard_cap flag is important. Some budgets should be advisory (alert but don't block). Others, like nightly batch jobs, should stop cold when they hit the limit to prevent surprise overruns.

Step 2: Build Redis-Based Real-Time Counters

Redis gives you atomic increments with near-zero latency. This is what makes per-request budget enforcement practical.

import redis
from datetime import datetime

r = redis.Redis(host='localhost', port=6379, db=1)

def month_key(budget_id: str) -> str:
    month = datetime.utcnow().strftime("%Y-%m")
    return f"budget:{budget_id}:{month}"

def add_tokens(budget_id: str, tokens: int) -> int:
    """Increment counter and return new total."""
    key = month_key(budget_id)
    new_total = r.incrby(key, tokens)
    # Auto-expire at end of next month (60 days is safe buffer)
    r.expire(key, 60 * 24 * 3600)
    return new_total

def get_usage(budget_id: str) -> int:
    key = month_key(budget_id)
    val = r.get(key)
    return int(val) if val else 0

def reset_budget(budget_id: str):
    """Manual reset — use sparingly."""
    key = month_key(budget_id)
    r.delete(key)

Step 3: Build the Budget Enforcement Layer

This wraps every AI call and enforces limits before and after.

import anthropic
import os

_ai = anthropic.Anthropic()

class BudgetExceededError(Exception):
    pass

def get_budget_by_id(budget_id: str) -> TokenBudget | None:
    return next((b for b in BUDGETS if b.id == budget_id), None)

def ai_call_with_budget(prompt: str, budget_id: str,
                        model: str = "claude-haiku-3") -> str:
    budget = get_budget_by_id(budget_id)
    if not budget:
        raise ValueError(f"Unknown budget: {budget_id}")
    
    # Pre-flight check
    current = get_usage(budget_id)
    pct = current / budget.monthly_token_limit
    
    if pct >= 1.0 and budget.hard_cap:
        raise BudgetExceededError(
            f"Budget '{budget.name}' is exhausted "
            f"({current:,} / {budget.monthly_token_limit:,} tokens)"
        )
    
    # Make the call
    response = _ai.messages.create(
        model=model,
        max_tokens=1024,
        messages=[{"role": "user", "content": prompt}]
    )
    
    used = response.usage.input_tokens + response.usage.output_tokens
    
    # Update counters for this budget AND global
    new_total = add_tokens(budget_id, used)
    add_tokens("global", used)
    
    # Check alert threshold after the call
    new_pct = new_total / budget.monthly_token_limit
    if new_pct >= budget.alert_threshold:
        send_budget_alert(budget, new_total, new_pct)
    
    return response.content[0].text

Step 4: Build the Alert System

Alerts fire once when the threshold is crossed, not on every subsequent request.

import requests, smtplib
from email.mime.text import MIMEText

def alert_already_sent(budget_id: str) -> bool:
    key = f"budget:alert_sent:{budget_id}:{datetime.utcnow().strftime('%Y-%m')}"
    return r.exists(key)

def mark_alert_sent(budget_id: str):
    key = f"budget:alert_sent:{budget_id}:{datetime.utcnow().strftime('%Y-%m')}"
    r.setex(key, 35 * 24 * 3600, "1")  # expires next month

SLACK_WEBHOOK = os.getenv("SLACK_WEBHOOK_URL")
GMAIL_ADDRESS  = os.getenv("GMAIL_ADDRESS")
GMAIL_PASSWORD = os.getenv("GMAIL_APP_PASSWORD")

def send_budget_alert(budget: TokenBudget, used: int, pct: float):
    if alert_already_sent(budget.id):
        return
    
    mark_alert_sent(budget.id)
    
    msg = (f"Token budget alert: {budget.name}\n"
           f"Used: {used:,} / {budget.monthly_token_limit:,} tokens ({pct*100:.0f}%)\n"
           f"Hard cap: {'Yes - requests will be blocked at 100%' if budget.hard_cap else 'No - advisory only'}")
    
    # Slack
    if SLACK_WEBHOOK:
        requests.post(SLACK_WEBHOOK, json={"text": f":warning: {msg}"}, timeout=5)
    
    # Email
    if budget.owner_email and GMAIL_ADDRESS:
        send_email(budget.owner_email, f"AI Token Budget Alert: {budget.name}", msg)

def send_email(to: str, subject: str, body: str):
    msg = MIMEText(body)
    msg["Subject"] = subject
    msg["From"]    = GMAIL_ADDRESS
    msg["To"]      = to
    with smtplib.SMTP("smtp.gmail.com", 587) as smtp:
        smtp.starttls()
        smtp.login(GMAIL_ADDRESS, GMAIL_PASSWORD)
        smtp.send_message(msg)

Step 5: Build a Budget Status Report

Generate a snapshot of all budgets — useful in a daily Slack brief or ops dashboard.

def budget_status_report() -> list[dict]:
    report = []
    for b in BUDGETS:
        used = get_usage(b.id)
        pct = used / b.monthly_token_limit
        status = "OK"
        if pct >= 1.0:
            status = "EXCEEDED"
        elif pct >= b.alert_threshold:
            status = "WARNING"
        
        report.append({
            "name":        b.name,
            "used":        used,
            "limit":       b.monthly_token_limit,
            "pct":         round(pct * 100, 1),
            "status":      status,
            "hard_cap":    b.hard_cap
        })
    return sorted(report, key=lambda x: x["pct"], reverse=True)

# Print report
for row in budget_status_report():
    bar = "#" * int(row["pct"] / 5) + "-" * (20 - int(row["pct"] / 5))
    print(f"{row['name']:<25} [{bar}] {row['pct']}% {row['status']}")

Step 6: Auto-Reset Budgets Monthly

Run this at midnight on the first of each month. Redis keys auto-expire, but this gives you a clean audit trail.

import sqlite3
from datetime import date

def archive_and_reset_budgets():
    conn = sqlite3.connect("token_budgets.db")
    conn.execute("""
        CREATE TABLE IF NOT EXISTS budget_history (
            month TEXT, budget_id TEXT, budget_name TEXT,
            tokens_used INTEGER, limit_tokens INTEGER,
            recorded_at TEXT
        )
    """)
    
    month = datetime.utcnow().strftime("%Y-%m")
    for b in BUDGETS:
        used = get_usage(b.id)
        conn.execute("""
            INSERT INTO budget_history VALUES (?,?,?,?,?,datetime('now'))
        """, (month, b.id, b.name, used, b.monthly_token_limit))
    
    conn.commit()
    conn.close()
    print(f"Archived {month} budget data for {len(BUDGETS)} budgets")

# crontab: 0 0 1 * * python /scripts/reset_budgets.py

What to Build Next

Related Reading

Want this system built for your business?

Get a free assessment. We will map every system your business needs and show you the ROI.

Get Your Free Assessment

Related Systems