How to Build an AI Legal Research Assistant
Search case law and statutes automatically using AI research.
Jay Banlasan
The AI Systems Guy
An AI legal research assistant for case law accelerates the research phase that often takes paralegals hours. I built this to take a legal question, search through a firm's internal knowledge base, and produce a structured research memo with relevant precedents and statutory references. The attorney still verifies everything, but they start from a solid first draft instead of a blank page.
What You Need Before Starting
- Python 3.8+
- Anthropic API key
- A database of case summaries or legal references
- SQLite for the knowledge base
Step 1: Build the Legal Knowledge Base
import sqlite3
def init_legal_kb(db_path="legal_kb.db"):
conn = sqlite3.connect(db_path)
conn.execute("""
CREATE TABLE IF NOT EXISTS case_summaries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
case_name TEXT,
citation TEXT,
court TEXT,
year INTEGER,
practice_area TEXT,
key_holding TEXT,
summary TEXT,
full_text TEXT
)
""")
conn.execute("""
CREATE TABLE IF NOT EXISTS statutes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT,
section TEXT,
jurisdiction TEXT,
text TEXT,
last_updated TEXT
)
""")
conn.commit()
conn.close()
def add_case(case_data, db_path="legal_kb.db"):
conn = sqlite3.connect(db_path)
conn.execute("""
INSERT INTO case_summaries (case_name, citation, court, year, practice_area, key_holding, summary)
VALUES (?,?,?,?,?,?,?)
""", (case_data["name"], case_data["citation"], case_data["court"],
case_data["year"], case_data["practice_area"], case_data["holding"], case_data["summary"]))
conn.commit()
conn.close()
Step 2: Build the Research Query Engine
def search_cases(query_terms, practice_area=None, db_path="legal_kb.db"):
conn = sqlite3.connect(db_path)
terms = query_terms.split()
conditions = " AND ".join([f"(summary LIKE ? OR key_holding LIKE ?)" for _ in terms])
params = []
for t in terms:
params.extend([f"%{t}%", f"%{t}%"])
sql = f"SELECT case_name, citation, court, year, key_holding, summary FROM case_summaries WHERE {conditions}"
if practice_area:
sql += " AND practice_area = ?"
params.append(practice_area)
sql += " ORDER BY year DESC LIMIT 20"
results = conn.execute(sql, params).fetchall()
conn.close()
return [{"name": r[0], "citation": r[1], "court": r[2], "year": r[3], "holding": r[4], "summary": r[5]} for r in results]
Step 3: Generate the Research Memo
import anthropic
from dotenv import load_dotenv
load_dotenv()
def generate_research_memo(question, relevant_cases, jurisdiction=""):
client = anthropic.Anthropic()
case_text = "\n\n".join([
f"Case: {c['name']} ({c['citation']}, {c['court']}, {c['year']})\nHolding: {c['holding']}\nSummary: {c['summary']}"
for c in relevant_cases[:10]
])
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=4096,
system="""You are a legal research assistant producing a research memo.
Structure:
1. Issue: Restate the legal question
2. Brief Answer: 2-3 sentence conclusion
3. Analysis: Discuss relevant cases and how they apply
4. Applicable Statutes: Note any relevant statutory provisions
5. Gaps: What additional research is needed
Cite cases properly. Note when a case may be distinguishable. Be precise about holdings vs dicta. Flag if any cases may have been overturned or modified.""",
messages=[{
"role": "user",
"content": f"Legal research question: {question}\nJurisdiction: {jurisdiction or 'General'}\n\nRelevant cases from our database:\n{case_text}"
}]
)
return response.content[0].text
Step 4: Build the Research Pipeline
from datetime import datetime
def conduct_research(question, practice_area=None, jurisdiction=""):
search_terms = extract_search_terms(question)
cases = search_cases(search_terms, practice_area)
if not cases:
cases = search_cases(question[:100], practice_area)
memo = generate_research_memo(question, cases, jurisdiction)
result = {
"question": question,
"jurisdiction": jurisdiction,
"cases_found": len(cases),
"memo": memo,
"cases_cited": [{"name": c["name"], "citation": c["citation"]} for c in cases],
"researched_at": datetime.utcnow().isoformat(),
"disclaimer": "AI-generated research memo. All citations must be verified by an attorney."
}
return result
def extract_search_terms(question):
client = anthropic.Anthropic()
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=100,
system="Extract 5-8 key legal search terms from this question. Return only the terms separated by spaces. No explanation.",
messages=[{"role": "user", "content": question}]
)
return response.content[0].text.strip()
Step 5: Save and Share Results
def save_research(result, output_dir="research_memos/"):
import os
os.makedirs(output_dir, exist_ok=True)
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
filename = f"memo_{timestamp}.txt"
filepath = os.path.join(output_dir, filename)
with open(filepath, "w") as f:
f.write(f"LEGAL RESEARCH MEMO\n{'=' * 40}\n")
f.write(f"Date: {result['researched_at']}\n")
f.write(f"Question: {result['question']}\n")
f.write(f"Jurisdiction: {result['jurisdiction']}\n")
f.write(f"Cases Found: {result['cases_found']}\n\n")
f.write(f"{result['disclaimer']}\n\n")
f.write(f"{'=' * 40}\n\n")
f.write(result["memo"])
print(f"Memo saved to {filepath}")
return filepath
if __name__ == "__main__":
result = conduct_research(
"Can an employer enforce a non-compete clause against a remote worker who moves to a different state?",
practice_area="employment",
jurisdiction="California"
)
save_research(result)
What to Build Next
Add citation verification. For every case cited in the memo, run a check against your database to confirm the citation exists and hasn't been overturned. Flag unverified citations with a warning.
Related Reading
- AI in Legal and Compliance - AI across legal workflows
- How to Set Up AI-Powered Content Recommendations - knowledge retrieval patterns
- AI for Proposal and Document Creation - document generation
Want this system built for your business?
Get a free assessment. We will map every system your business needs and show you the ROI.
Get Your Free Assessment