Systems Library / Industry Applications / How to Build an AI Legal Document Reviewer
Industry Applications legal

How to Build an AI Legal Document Reviewer

Review legal documents automatically with AI-powered clause analysis.

Jay Banlasan

Jay Banlasan

The AI Systems Guy

AI legal document review for contract analysis handles the first pass of document review that eats hours of attorney time. I built this for a firm that was spending 2-3 hours per contract review on routine clauses. Now AI flags the unusual terms, missing clauses, and risky language in minutes. The attorney focuses on judgment calls, not reading boilerplate.

This is not legal advice and does not replace attorney review. It accelerates it.

What You Need Before Starting

Step 1: Extract Text from Documents

import os

def extract_text_from_file(file_path):
    ext = os.path.splitext(file_path)[1].lower()
    
    if ext == ".txt":
        with open(file_path) as f:
            return f.read()
    elif ext == ".pdf":
        import fitz  # PyMuPDF
        doc = fitz.open(file_path)
        text = ""
        for page in doc:
            text += page.get_text()
        doc.close()
        return text
    elif ext == ".docx":
        from docx import Document
        doc = Document(file_path)
        return "\n".join([p.text for p in doc.paragraphs])
    
    return ""

Step 2: Define the Review Checklist

REVIEW_CHECKLIST = {
    "employment_agreement": [
        "compensation_and_benefits",
        "termination_clauses",
        "non_compete",
        "non_solicitation",
        "confidentiality",
        "intellectual_property",
        "dispute_resolution",
        "governing_law"
    ],
    "service_agreement": [
        "scope_of_services",
        "payment_terms",
        "liability_limitation",
        "indemnification",
        "termination",
        "confidentiality",
        "force_majeure",
        "governing_law"
    ],
    "lease_agreement": [
        "rent_and_deposits",
        "lease_term",
        "maintenance_obligations",
        "early_termination",
        "insurance_requirements",
        "subletting",
        "dispute_resolution"
    ]
}

Step 3: Build the AI Reviewer

import anthropic
from dotenv import load_dotenv
load_dotenv()

def review_document(text, doc_type="service_agreement"):
    client = anthropic.Anthropic()
    checklist = REVIEW_CHECKLIST.get(doc_type, REVIEW_CHECKLIST["service_agreement"])
    
    response = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=4096,
        system="""You are a legal document review assistant. Analyze the document and produce a structured review.
For each checklist item, report:
- PRESENT / MISSING / PARTIAL
- If present: summarize the key terms in 1-2 sentences
- Flag anything unusual, one-sided, or potentially problematic
- Note any ambiguous language

Also flag:
- Inconsistencies within the document
- Undefined terms that are used
- Missing standard protections
- Unusually broad or narrow definitions

Format as structured sections. Be precise. Quote relevant language when flagging issues.""",
        messages=[{
            "role": "user",
            "content": f"Review this {doc_type.replace('_', ' ')} against these required clauses: {', '.join(checklist)}\n\nDocument text:\n{text[:15000]}"
        }]
    )
    
    return response.content[0].text

Step 4: Generate a Risk Summary

def generate_risk_summary(review_text, doc_type):
    client = anthropic.Anthropic()
    
    response = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=1024,
        system="Summarize the legal document review into a risk assessment. Categorize issues as HIGH/MEDIUM/LOW risk. Be concise. List the top 3 action items for the reviewing attorney.",
        messages=[{
            "role": "user",
            "content": f"Based on this review of a {doc_type.replace('_', ' ')}:\n\n{review_text}\n\nGenerate a risk summary and top 3 action items."
        }]
    )
    
    return response.content[0].text

Step 5: Produce the Full Report

from datetime import datetime

def review_legal_document(file_path, doc_type="service_agreement"):
    print(f"Extracting text from {file_path}...")
    text = extract_text_from_file(file_path)
    
    if not text.strip():
        return {"error": "Could not extract text from document"}
    
    print(f"Running AI review ({len(text)} chars)...")
    review = review_document(text, doc_type)
    
    print("Generating risk summary...")
    risk_summary = generate_risk_summary(review, doc_type)
    
    report = {
        "file": file_path,
        "doc_type": doc_type,
        "reviewed_at": datetime.utcnow().isoformat(),
        "word_count": len(text.split()),
        "detailed_review": review,
        "risk_summary": risk_summary
    }
    
    output_path = file_path.rsplit(".", 1)[0] + "_review.txt"
    with open(output_path, "w") as f:
        f.write(f"Legal Document Review\n{'=' * 40}\n")
        f.write(f"File: {report['file']}\n")
        f.write(f"Type: {report['doc_type']}\n")
        f.write(f"Date: {report['reviewed_at']}\n\n")
        f.write(f"RISK SUMMARY\n{'-' * 20}\n{report['risk_summary']}\n\n")
        f.write(f"DETAILED REVIEW\n{'-' * 20}\n{report['detailed_review']}\n")
    
    print(f"Review saved to {output_path}")
    return report

if __name__ == "__main__":
    import sys
    file_path = sys.argv[1] if len(sys.argv) > 1 else "contract.pdf"
    doc_type = sys.argv[2] if len(sys.argv) > 2 else "service_agreement"
    review_legal_document(file_path, doc_type)

What to Build Next

Add comparison mode. Feed the AI both a contract draft and your firm's preferred template, and have it highlight every deviation. That catches non-standard terms faster than reading line by line.

Related Reading

Want this system built for your business?

Get a free assessment. We will map every system your business needs and show you the ROI.

Get Your Free Assessment

Related Systems