How to Build an AI Legal Document Reviewer
Review legal documents automatically with AI-powered clause analysis.
Jay Banlasan
The AI Systems Guy
AI legal document review for contract analysis handles the first pass of document review that eats hours of attorney time. I built this for a firm that was spending 2-3 hours per contract review on routine clauses. Now AI flags the unusual terms, missing clauses, and risky language in minutes. The attorney focuses on judgment calls, not reading boilerplate.
This is not legal advice and does not replace attorney review. It accelerates it.
What You Need Before Starting
- Python 3.8+
- Anthropic API key
- PDF or text versions of contracts to review
- A checklist of clauses your firm typically reviews
Step 1: Extract Text from Documents
import os
def extract_text_from_file(file_path):
ext = os.path.splitext(file_path)[1].lower()
if ext == ".txt":
with open(file_path) as f:
return f.read()
elif ext == ".pdf":
import fitz # PyMuPDF
doc = fitz.open(file_path)
text = ""
for page in doc:
text += page.get_text()
doc.close()
return text
elif ext == ".docx":
from docx import Document
doc = Document(file_path)
return "\n".join([p.text for p in doc.paragraphs])
return ""
Step 2: Define the Review Checklist
REVIEW_CHECKLIST = {
"employment_agreement": [
"compensation_and_benefits",
"termination_clauses",
"non_compete",
"non_solicitation",
"confidentiality",
"intellectual_property",
"dispute_resolution",
"governing_law"
],
"service_agreement": [
"scope_of_services",
"payment_terms",
"liability_limitation",
"indemnification",
"termination",
"confidentiality",
"force_majeure",
"governing_law"
],
"lease_agreement": [
"rent_and_deposits",
"lease_term",
"maintenance_obligations",
"early_termination",
"insurance_requirements",
"subletting",
"dispute_resolution"
]
}
Step 3: Build the AI Reviewer
import anthropic
from dotenv import load_dotenv
load_dotenv()
def review_document(text, doc_type="service_agreement"):
client = anthropic.Anthropic()
checklist = REVIEW_CHECKLIST.get(doc_type, REVIEW_CHECKLIST["service_agreement"])
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=4096,
system="""You are a legal document review assistant. Analyze the document and produce a structured review.
For each checklist item, report:
- PRESENT / MISSING / PARTIAL
- If present: summarize the key terms in 1-2 sentences
- Flag anything unusual, one-sided, or potentially problematic
- Note any ambiguous language
Also flag:
- Inconsistencies within the document
- Undefined terms that are used
- Missing standard protections
- Unusually broad or narrow definitions
Format as structured sections. Be precise. Quote relevant language when flagging issues.""",
messages=[{
"role": "user",
"content": f"Review this {doc_type.replace('_', ' ')} against these required clauses: {', '.join(checklist)}\n\nDocument text:\n{text[:15000]}"
}]
)
return response.content[0].text
Step 4: Generate a Risk Summary
def generate_risk_summary(review_text, doc_type):
client = anthropic.Anthropic()
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1024,
system="Summarize the legal document review into a risk assessment. Categorize issues as HIGH/MEDIUM/LOW risk. Be concise. List the top 3 action items for the reviewing attorney.",
messages=[{
"role": "user",
"content": f"Based on this review of a {doc_type.replace('_', ' ')}:\n\n{review_text}\n\nGenerate a risk summary and top 3 action items."
}]
)
return response.content[0].text
Step 5: Produce the Full Report
from datetime import datetime
def review_legal_document(file_path, doc_type="service_agreement"):
print(f"Extracting text from {file_path}...")
text = extract_text_from_file(file_path)
if not text.strip():
return {"error": "Could not extract text from document"}
print(f"Running AI review ({len(text)} chars)...")
review = review_document(text, doc_type)
print("Generating risk summary...")
risk_summary = generate_risk_summary(review, doc_type)
report = {
"file": file_path,
"doc_type": doc_type,
"reviewed_at": datetime.utcnow().isoformat(),
"word_count": len(text.split()),
"detailed_review": review,
"risk_summary": risk_summary
}
output_path = file_path.rsplit(".", 1)[0] + "_review.txt"
with open(output_path, "w") as f:
f.write(f"Legal Document Review\n{'=' * 40}\n")
f.write(f"File: {report['file']}\n")
f.write(f"Type: {report['doc_type']}\n")
f.write(f"Date: {report['reviewed_at']}\n\n")
f.write(f"RISK SUMMARY\n{'-' * 20}\n{report['risk_summary']}\n\n")
f.write(f"DETAILED REVIEW\n{'-' * 20}\n{report['detailed_review']}\n")
print(f"Review saved to {output_path}")
return report
if __name__ == "__main__":
import sys
file_path = sys.argv[1] if len(sys.argv) > 1 else "contract.pdf"
doc_type = sys.argv[2] if len(sys.argv) > 2 else "service_agreement"
review_legal_document(file_path, doc_type)
What to Build Next
Add comparison mode. Feed the AI both a contract draft and your firm's preferred template, and have it highlight every deviation. That catches non-standard terms faster than reading line by line.
Related Reading
- AI in Legal and Compliance - AI applications across legal practice
- AI for Proposal and Document Creation - document automation patterns
- AI for Workflow Optimization - streamlining professional workflows
Want this system built for your business?
Get a free assessment. We will map every system your business needs and show you the ROI.
Get Your Free Assessment