How to Implement Output Validation for AI Responses
Build validation layers that catch hallucinations and format errors automatically.
Jay Banlasan
The AI Systems Guy
AI output validation for quality control is what stands between your AI pipeline and users seeing wrong, malformed, or hallucinated responses. Without validation, a JSON format error in an AI response silently breaks downstream systems. A hallucinated name in a customer email damages trust. A classification returning an unexpected category throws an exception nobody catches. I add validation to every AI output that feeds into a downstream system or goes to a human. No exceptions.
Validation has two layers: structural (did it return valid JSON, the right fields, the right types) and semantic (does the content make sense, are values in valid ranges, are there signs of hallucination). Both layers catch different failure modes.
What You Need Before Starting
- Python 3.10+ with
anthropicandpydantic(pip install anthropic pydantic) - The AI function you want to validate
- Clear definitions of what a valid output looks like
Step 1: Define Your Output Schema With Pydantic
Pydantic models define exactly what a valid response looks like and validate automatically on parsing.
from pydantic import BaseModel, Field, validator
from typing import Literal, Optional
class TicketClassification(BaseModel):
category: Literal["billing", "technical", "account", "feature_request", "other"]
priority: Literal["urgent", "high", "normal", "low"]
summary: str = Field(min_length=10, max_length=200)
sentiment: Literal["positive", "neutral", "frustrated", "angry"]
suggested_team: str = Field(min_length=2, max_length=50)
@validator("summary")
def summary_not_generic(cls, v):
generic_phrases = ["the user", "this ticket", "see above"]
for phrase in generic_phrases:
if phrase.lower() in v.lower():
raise ValueError(f"Summary is too generic: contains '{phrase}'")
return v
class FollowUpEmail(BaseModel):
subject: Optional[str] = None
body: str = Field(min_length=30, max_length=300)
cta: str = Field(min_length=5, max_length=100)
tone: Literal["warm", "professional", "casual"]
@validator("body")
def no_placeholders(cls, v):
placeholders = ["[Name]", "[Company]", "{name}", "INSERT"]
for ph in placeholders:
if ph in v:
raise ValueError(f"Unfilled placeholder found: {ph}")
return v
@validator("body")
def no_filler_phrases(cls, v):
filler = ["great question", "happy to help", "as an ai"]
for phrase in filler:
if phrase.lower() in v.lower():
raise ValueError(f"Filler phrase found: '{phrase}'")
return v
Step 2: Build a Validated AI Call Function
Wrap your AI call with parsing and validation. Retry on validation failure.
import anthropic
import json
import re
from pydantic import ValidationError
client = anthropic.Anthropic()
def validated_ai_call(
system_prompt: str,
user_message: str,
output_schema: type,
model: str = "claude-haiku-4-5",
max_retries: int = 2
) -> tuple:
for attempt in range(max_retries + 1):
response = client.messages.create(
model=model,
max_tokens=500,
system=system_prompt,
messages=[{"role": "user", "content": user_message}]
)
raw_output = response.content[0].text.strip()
# Try to parse as JSON
json_data = extract_json(raw_output)
if json_data is None:
if attempt < max_retries:
print(f"Attempt {attempt+1}: Not valid JSON, retrying...")
continue
return None, f"Could not extract JSON after {max_retries+1} attempts"
# Validate with Pydantic schema
try:
validated = output_schema(**json_data)
return validated, None
except ValidationError as e:
error_summary = "; ".join([f"{err['loc']}: {err['msg']}" for err in e.errors()])
if attempt < max_retries:
print(f"Attempt {attempt+1}: Validation failed ({error_summary}), retrying...")
# Include the validation error in the retry to help the model fix it
user_message = f"{user_message}\n\nPrevious attempt failed validation: {error_summary}. Please fix these issues."
continue
return None, error_summary
return None, "Max retries exceeded"
def extract_json(text: str) -> Optional[dict]:
"""Extract JSON from text that may have surrounding content."""
# Try direct parse first
try:
return json.loads(text)
except json.JSONDecodeError:
pass
# Try extracting from code block
code_block = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
if code_block:
try:
return json.loads(code_block.group(1))
except json.JSONDecodeError:
pass
# Try finding first JSON object
json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)?\}', text, re.DOTALL)
if json_match:
try:
return json.loads(json_match.group())
except json.JSONDecodeError:
pass
return None
Step 3: Add Hallucination Detection
Structural validation catches format errors. Hallucination detection catches content that looks real but is not.
def check_for_hallucination_signals(text: str, context: dict) -> list:
issues = []
# Check if model invented details not in the input
if "name" in context:
# Look for names that are not the provided name
words_in_text = set(text.lower().split())
common_name_patterns = re.findall(r'\b[A-Z][a-z]+\b', text)
for name in common_name_patterns:
if (name not in context.get("name", "") and
name not in ["I", "The", "This", "Dear", "Hi"]):
# Flag potential invented names
pass # Would need NER model to be precise here
# Check for false certainty markers
false_certainty = [
"I know that", "definitely", "certainly",
"I can confirm", "it is a fact that"
]
for phrase in false_certainty:
if phrase.lower() in text.lower():
issues.append(f"False certainty: '{phrase}'")
# Check for contradictions with provided data
if "company" in context and context["company"] in text:
# Positive: model referenced actual provided company
pass
elif "company" in context:
# Check if model invented a different company name
company_mentions = re.findall(r'(?:at|from|with|for)\s+([A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*)', text)
for mention in company_mentions:
if mention != context["company"] and len(mention) > 3:
issues.append(f"Possible invented company mention: '{mention}' (provided: '{context['company']}')")
return issues
def safe_ai_call(
system_prompt: str,
user_message: str,
output_schema: type,
context: dict = None,
model: str = "claude-haiku-4-5"
) -> dict:
result, error = validated_ai_call(system_prompt, user_message, output_schema, model)
if error:
return {
"success": False,
"error": error,
"output": None,
"issues": []
}
# Run hallucination checks on text fields
issues = []
if context and result:
for field_name, field_value in result.dict().items():
if isinstance(field_value, str):
field_issues = check_for_hallucination_signals(field_value, context)
issues.extend([f"{field_name}: {issue}" for issue in field_issues])
return {
"success": True,
"output": result,
"issues": issues,
"error": None
}
Step 4: Build a Validation Reporter
Track validation failures over time to identify prompt problems.
import sqlite3
from datetime import datetime
def init_validation_log(db_path: str = "validation_log.db"):
conn = sqlite3.connect(db_path)
conn.execute("""
CREATE TABLE IF NOT EXISTS validation_events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
prompt_id TEXT,
model TEXT,
success INTEGER,
error_type TEXT,
error_detail TEXT,
attempts INTEGER,
logged_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
conn.commit()
conn.close()
def log_validation(
prompt_id: str,
model: str,
success: bool,
error_type: str = None,
error_detail: str = None,
attempts: int = 1,
db_path: str = "validation_log.db"
):
conn = sqlite3.connect(db_path)
conn.execute(
"INSERT INTO validation_events (prompt_id, model, success, error_type, error_detail, attempts) VALUES (?, ?, ?, ?, ?, ?)",
(prompt_id, model, int(success), error_type, error_detail, attempts)
)
conn.commit()
conn.close()
def get_validation_stats(prompt_id: str = None, db_path: str = "validation_log.db") -> dict:
conn = sqlite3.connect(db_path)
cur = conn.cursor()
if prompt_id:
cur.execute("""
SELECT COUNT(*), SUM(success), AVG(attempts)
FROM validation_events WHERE prompt_id = ?
""", (prompt_id,))
else:
cur.execute("SELECT COUNT(*), SUM(success), AVG(attempts) FROM validation_events")
total, successes, avg_attempts = cur.fetchone()
conn.close()
return {
"total": total or 0,
"success_rate": round((successes or 0) / (total or 1), 3),
"avg_attempts": round(avg_attempts or 1, 2)
}
Step 5: Implement Graceful Degradation
When validation fails completely after retries, you need a fallback that does not break the user experience.
from enum import Enum
class ValidationOutcome(Enum):
SUCCESS = "success"
DEGRADED = "degraded" # Got something but with warnings
FALLBACK = "fallback" # Used fallback response
FAILED = "failed" # Complete failure
def resilient_ai_call(
system_prompt: str,
user_message: str,
output_schema: type,
fallback_response: dict = None,
model: str = "claude-haiku-4-5"
) -> tuple:
result = safe_ai_call(system_prompt, user_message, output_schema, model=model)
if result["success"] and not result["issues"]:
return result["output"], ValidationOutcome.SUCCESS, []
elif result["success"] and result["issues"]:
# Output passed schema but has quality warnings
return result["output"], ValidationOutcome.DEGRADED, result["issues"]
elif fallback_response:
# Validation completely failed, use fallback
try:
fallback = output_schema(**fallback_response)
return fallback, ValidationOutcome.FALLBACK, [f"Validation failed: {result['error']}"]
except Exception:
pass
return None, ValidationOutcome.FAILED, [result["error"]]
Step 6: Wire Into a Complete Example
TICKET_SYSTEM_PROMPT = """Classify support tickets. Return JSON only.
Schema: {"category": "billing|technical|account|feature_request|other", "priority": "urgent|high|normal|low", "summary": "specific one sentence description", "sentiment": "positive|neutral|frustrated|angry", "suggested_team": "billing|engineering|success|sales"}"""
TICKET_FALLBACK = {
"category": "other",
"priority": "normal",
"summary": "Unable to auto-classify - requires manual review",
"sentiment": "neutral",
"suggested_team": "success"
}
def process_support_ticket(ticket: dict) -> dict:
user_message = f"Subject: {ticket.get('subject', '')}\nBody: {ticket.get('body', '')}"
output, outcome, issues = resilient_ai_call(
TICKET_SYSTEM_PROMPT,
user_message,
TicketClassification,
fallback_response=TICKET_FALLBACK
)
return {
"ticket_id": ticket.get("id"),
"classification": output.dict() if output else None,
"outcome": outcome.value,
"issues": issues,
"requires_review": outcome != ValidationOutcome.SUCCESS
}
What to Build Next
- Build an LLM-as-judge validator for semantic quality that catches subtle problems structural validation misses
- Set up alerting when validation failure rates exceed a threshold (e.g., more than 5% failures in an hour)
- Create a validation dashboard that shows pass/fail rates per prompt over time
Related Reading
- The Output Validation Pipeline - output validation pipeline ai
- Testing and Validation for AI Operations - testing validation ai operations
- The Validation Loop Pattern - validation loop pattern ai
Want this system built for your business?
Get a free assessment. We will map every system your business needs and show you the ROI.
Get Your Free Assessment