How to Build an AI Monitoring and Alert Agent
Create an agent that monitors systems and takes action on alerts.
Jay Banlasan
The AI Systems Guy
The ai monitoring alert agent system health I built handles watch systems 24/7. I use this across client work where repetitive multi-step processes need to run without constant oversight.
The approach: check API health, database connections, and disk space, then diagnose issues with AI. One script, one run, results delivered.
What You Need
- Python 3.9+
- Anthropic API key
- Relevant API credentials for the tools your agent uses
Step 1: Define the Agent Tools
import anthropic
import json
import os
from dotenv import load_dotenv
load_dotenv()
client = anthropic.Anthropic()
tools = [
{
"name": "health_check",
"description": "Primary tool for the agent's core function",
"input_schema": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "Input for the tool"}
},
"required": ["query"]
}
},
{
"name": "diagnose",
"description": "Secondary tool for processing or storing results",
"input_schema": {
"type": "object",
"properties": {
"data": {"type": "string", "description": "Data to process"}
},
"required": ["data"]
}
}
]
Step 2: Implement Tool Functions
def execute_tool(tool_name, tool_input):
if tool_name == "health_check":
return handle_health_check(tool_input)
elif tool_name == "diagnose":
return handle_diagnose(tool_input)
return "Unknown tool"
def handle_health_check(input_data):
# Your implementation here
query = input_data.get("query", "")
print(f"Running health_check: {query}")
return f"Results for: {query}"
def handle_diagnose(input_data):
data = input_data.get("data", "")
print(f"Processing: {data[:100]}")
return "Processed successfully"
Step 3: Build the Agent Loop
def run_agent(task, max_steps=10):
messages = [{"role": "user", "content": task}]
for step in range(max_steps):
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=4096,
system="You are an autonomous monitoring and alert agent. Use the available tools to complete the task. Think step by step. Be thorough.",
tools=tools,
messages=messages
)
# Check if agent is done
if response.stop_reason == "end_turn":
final = next((b.text for b in response.content if b.type == "text"), "")
print(f"Agent completed in {step + 1} steps")
return final
# Process tool calls
messages.append({"role": "assistant", "content": response.content})
tool_results = []
for block in response.content:
if block.type == "tool_use":
result = execute_tool(block.name, block.input)
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": str(result)
})
messages.append({"role": "user", "content": tool_results})
return "Max steps reached"
Step 4: Run and Log Results
import sqlite3
from datetime import datetime
def log_agent_run(task, result):
conn = sqlite3.connect("agent_runs.db")
conn.execute("""CREATE TABLE IF NOT EXISTS runs (
task TEXT, result TEXT, ran_at TEXT
)""")
conn.execute("INSERT INTO runs VALUES (?, ?, ?)",
(task, result[:5000], datetime.now().isoformat()))
conn.commit()
task = "Analyze our top competitors and create a summary report"
result = run_agent(task)
log_agent_run(task, result)
print(result)
What to Build Next
Add error recovery so the agent retries failed tool calls with adjusted parameters. Then add a cost tracker that monitors API token usage per agent run so you can optimize which model handles which steps.
Related Reading
- The Health Check System - practical guidance for building AI-powered business systems
- Building a Reconciliation System - practical guidance for building AI-powered business systems
- Building a Creative Testing System - practical guidance for building AI-powered business systems
Want this system built for your business?
Get a free assessment. We will map every system your business needs and show you the ROI.
Get Your Free Assessment