How to Create an AI Cost Dashboard
Track AI spending across all providers in a single real-time dashboard.
Jay Banlasan
The AI Systems Guy
I was running Claude for content, GPT-4 for code review, and Gemini for document parsing. Three providers, three billing dashboards, zero unified view. By the time I noticed the month's spend was trending 40% over budget, it was day 22. An ai api cost tracking dashboard monitor fixed that in one afternoon and has saved me from budget surprises every month since.
When you're operating AI at scale across a business, provider invoices arrive days after the fact and individual team members have no visibility into what their workflows are costing. A live dashboard changes the behavior of everyone who can see it. Costs come down when they're visible.
What You Need Before Starting
- Python 3.10+
- SQLite (built into Python) for local storage
flaskfor the dashboard UI (pip install flask)anthropic,openaiSDKs if you want live token counts- API keys for each provider stored in environment variables
Step 1: Design the Cost Tracking Database
One table, built to be fast to query by date, provider, team, and task type.
import sqlite3
from datetime import datetime
DB_PATH = "ai_costs.db"
def init_db():
conn = sqlite3.connect(DB_PATH)
conn.execute("""
CREATE TABLE IF NOT EXISTS ai_usage (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ts TEXT NOT NULL,
provider TEXT NOT NULL,
model TEXT NOT NULL,
team TEXT NOT NULL,
task_type TEXT NOT NULL,
input_tokens INTEGER DEFAULT 0,
output_tokens INTEGER DEFAULT 0,
cost_usd REAL NOT NULL,
request_id TEXT,
latency_ms INTEGER
)
""")
conn.execute("CREATE INDEX IF NOT EXISTS idx_ts ON ai_usage(ts)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_provider ON ai_usage(provider)")
conn.commit()
conn.close()
init_db()
Step 2: Build the Cost Logging Function
Every AI call in your stack routes through this before returning. It adds about 1ms of overhead.
COST_PER_1K = {
("anthropic", "claude-opus-4-5"): {"input": 0.015, "output": 0.075},
("anthropic", "claude-haiku-3"): {"input": 0.00025,"output": 0.00125},
("openai", "gpt-4o"): {"input": 0.005, "output": 0.015},
("openai", "gpt-4o-mini"): {"input": 0.00015,"output": 0.0006},
("google", "gemini-1.5-pro"): {"input": 0.00125,"output": 0.005},
}
def log_usage(provider: str, model: str, team: str, task_type: str,
input_tokens: int, output_tokens: int,
request_id: str = None, latency_ms: int = None):
rates = COST_PER_1K.get((provider, model), {"input": 0, "output": 0})
cost = (input_tokens / 1000 * rates["input"]) + (output_tokens / 1000 * rates["output"])
conn = sqlite3.connect(DB_PATH)
conn.execute("""
INSERT INTO ai_usage (ts, provider, model, team, task_type,
input_tokens, output_tokens, cost_usd,
request_id, latency_ms)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (datetime.utcnow().isoformat(), provider, model, team, task_type,
input_tokens, output_tokens, cost, request_id, latency_ms))
conn.commit()
conn.close()
return cost
Update COST_PER_1K monthly. Provider pricing changes and your budget math will drift if you don't.
Step 3: Wrap Your AI Calls
Replace raw API calls with a thin wrapper that logs automatically.
import anthropic
import time
_client = anthropic.Anthropic()
def ai_call(prompt: str, team: str, task_type: str,
model: str = "claude-haiku-3") -> str:
start = time.time()
response = _client.messages.create(
model=model,
max_tokens=1024,
messages=[{"role": "user", "content": prompt}]
)
latency = int((time.time() - start) * 1000)
log_usage(
provider="anthropic",
model=model,
team=team,
task_type=task_type,
input_tokens=response.usage.input_tokens,
output_tokens=response.usage.output_tokens,
request_id=response.id,
latency_ms=latency
)
return response.content[0].text
Every call now writes one row. No manual tracking needed anywhere in your stack.
Step 4: Build the Query Layer
These four queries power 90% of the dashboard.
def get_summary(days: int = 30) -> dict:
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
cur.execute("""
SELECT provider, model,
SUM(cost_usd) as total_cost,
SUM(input_tokens + output_tokens) as total_tokens,
COUNT(*) as request_count,
AVG(latency_ms) as avg_latency
FROM ai_usage
WHERE ts >= datetime('now', ?)
GROUP BY provider, model
ORDER BY total_cost DESC
""", (f'-{days} days',))
rows = cur.fetchall()
conn.close()
return [
{"provider": r[0], "model": r[1], "cost": round(r[2], 4),
"tokens": r[3], "requests": r[4], "avg_latency_ms": round(r[5] or 0)}
for r in rows
]
def get_daily_spend(days: int = 30) -> list:
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
cur.execute("""
SELECT DATE(ts) as day, SUM(cost_usd) as daily_cost
FROM ai_usage
WHERE ts >= datetime('now', ?)
GROUP BY DATE(ts)
ORDER BY day
""", (f'-{days} days',))
rows = cur.fetchall()
conn.close()
return [{"day": r[0], "cost": round(r[1], 4)} for r in rows]
Step 5: Build the Flask Dashboard
A minimal but functional UI. No frontend frameworks needed.
from flask import Flask, render_template_string, jsonify
app = Flask(__name__)
TEMPLATE = """
<!DOCTYPE html><html>
<head><title>AI Cost Dashboard</title>
<style>
body { font-family: monospace; background: #0d0d0d; color: #e0e0e0; padding: 2rem; }
h1 { color: #7cf; }
table { width: 100%; border-collapse: collapse; margin-bottom: 2rem; }
th { text-align: left; border-bottom: 1px solid #333; padding: 0.5rem; color: #7cf; }
td { padding: 0.4rem 0.5rem; border-bottom: 1px solid #1a1a1a; }
.cost { color: #f87; font-weight: bold; }
</style></head>
<body>
<h1>AI Cost Dashboard</h1>
<h2>Last 30 Days by Model</h2>
<table>
<tr><th>Provider</th><th>Model</th><th>Cost</th><th>Requests</th><th>Avg Latency</th></tr>
{% for row in summary %}
<tr>
<td>{{ row.provider }}</td>
<td>{{ row.model }}</td>
<td class="cost">${{ row.cost }}</td>
<td>{{ row.requests }}</td>
<td>{{ row.avg_latency_ms }}ms</td>
</tr>
{% endfor %}
</table>
</body></html>
"""
@app.route("/")
def index():
return render_template_string(TEMPLATE, summary=get_summary())
@app.route("/api/summary")
def api_summary():
return jsonify(get_summary())
@app.route("/api/daily")
def api_daily():
return jsonify(get_daily_spend())
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5050, debug=False)
Run this on your VPS and proxy it through Nginx with basic auth. You now have a live cost view accessible from anywhere.
Step 6: Add Budget Alerts
Check the 30-day running total against your monthly budget and push a Slack alert when you hit 80%.
import requests, os
MONTHLY_BUDGET = float(os.getenv("AI_MONTHLY_BUDGET", "200"))
SLACK_WEBHOOK = os.getenv("SLACK_WEBHOOK_URL")
def check_budget_alert():
summary = get_summary(days=30)
total = sum(r["cost"] for r in summary)
pct = total / MONTHLY_BUDGET * 100
if pct >= 80:
msg = f":warning: AI spend at {pct:.0f}% of monthly budget (${total:.2f} / ${MONTHLY_BUDGET:.2f})"
requests.post(SLACK_WEBHOOK, json={"text": msg})
# Run check_budget_alert() daily via cron
What to Build Next
- Add a per-team budget breakdown so individual teams see their own spend
- Build a week-over-week trend chart using the
/api/dailyendpoint and Chart.js - Export monthly cost reports to a Google Sheet automatically on the 1st of each month
Related Reading
- How to Build a Multi-Model AI Router - route to cheaper models to keep dashboard costs low
- How to Optimize Batch AI Processing for Cost - biggest cost lever once you have visibility
- How to Build AI Request Throttling Systems - enforce budgets at the request level
Want this system built for your business?
Get a free assessment. We will map every system your business needs and show you the ROI.
Get Your Free Assessment