How to Build an AI Documentation Generator
Generate code documentation automatically using AI analysis.
Jay Banlasan
The AI Systems Guy
An AI documentation generator for automated code docs solves the problem every developer ignores until they can't remember what their own code does. I run this across every project I maintain. It reads the source, generates function docs, API references, and architecture overviews, then writes them to markdown files.
Documentation that writes itself actually gets maintained.
What You Need Before Starting
- Python 3.8+
- Anthropic API key
- A codebase with Python or JavaScript files
- A docs/ directory for output
Step 1: Parse Source Files for Functions and Classes
import ast
import os
def extract_code_elements(file_path):
with open(file_path) as f:
source = f.read()
tree = ast.parse(source)
elements = []
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
args = [a.arg for a in node.args.args]
elements.append({
"type": "function",
"name": node.name,
"args": args,
"lineno": node.lineno,
"source": ast.get_source_segment(source, node)
})
elif isinstance(node, ast.ClassDef):
methods = [n.name for n in node.body if isinstance(n, ast.FunctionDef)]
elements.append({
"type": "class",
"name": node.name,
"methods": methods,
"lineno": node.lineno,
"source": ast.get_source_segment(source, node)
})
return elements
Step 2: Generate Documentation with AI
import anthropic
from dotenv import load_dotenv
load_dotenv()
def generate_doc(element, file_path):
client = anthropic.Anthropic()
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1024,
system="""Generate concise documentation in markdown format.
Include: purpose, parameters, return value, example usage.
Keep it practical. No filler. Write for a developer who needs to use this code today.""",
messages=[{
"role": "user",
"content": f"Document this {element['type']} from {file_path}:\n\n```python\n{element['source']}\n```"
}]
)
return response.content[0].text
Step 3: Build the Doc Generator Pipeline
def generate_file_docs(file_path, docs_dir="docs/"):
elements = extract_code_elements(file_path)
if not elements:
return None
relative_path = os.path.basename(file_path).replace(".py", "")
doc_path = os.path.join(docs_dir, f"{relative_path}.md")
sections = [f"# {relative_path}\n"]
for element in elements:
if element["name"].startswith("_") and element["name"] != "__init__":
continue
doc = generate_doc(element, file_path)
sections.append(doc)
sections.append("\n---\n")
os.makedirs(docs_dir, exist_ok=True)
with open(doc_path, "w") as f:
f.write("\n".join(sections))
return doc_path
Step 4: Generate Project-Wide Docs
def generate_project_docs(src_dir, docs_dir="docs/"):
documented = []
for root, dirs, files in os.walk(src_dir):
dirs[:] = [d for d in dirs if d not in ("__pycache__", ".venv", "node_modules")]
for f in files:
if f.endswith(".py") and not f.startswith("test_"):
path = os.path.join(root, f)
print(f"Documenting {f}...")
doc_path = generate_file_docs(path, docs_dir)
if doc_path:
documented.append(doc_path)
# Generate index
index_path = os.path.join(docs_dir, "index.md")
with open(index_path, "w") as f:
f.write("# API Documentation\n\n")
for doc in sorted(documented):
name = os.path.basename(doc).replace(".md", "")
f.write(f"- [{name}]({os.path.basename(doc)})\n")
print(f"\nGenerated docs for {len(documented)} files")
return documented
Step 5: Add a Freshness Check
Only regenerate docs for files that changed since last run:
import hashlib
import json
def get_file_hash(path):
with open(path, "rb") as f:
return hashlib.md5(f.read()).hexdigest()
def check_freshness(src_dir, hash_file="docs/.hashes.json"):
try:
with open(hash_file) as f:
old_hashes = json.load(f)
except FileNotFoundError:
old_hashes = {}
stale = []
current_hashes = {}
for root, dirs, files in os.walk(src_dir):
dirs[:] = [d for d in dirs if d not in ("__pycache__", ".venv")]
for f in files:
if f.endswith(".py"):
path = os.path.join(root, f)
h = get_file_hash(path)
current_hashes[path] = h
if old_hashes.get(path) != h:
stale.append(path)
with open(hash_file, "w") as f:
json.dump(current_hashes, f)
return stale
What to Build Next
Add architecture diagram generation. Feed your entire project structure to AI and have it generate a mermaid diagram showing how modules connect. That gives new team members a visual map of the system.
Related Reading
- Why Process Documentation Is the First Step - why documentation comes before automation
- Building a Changelog for Your Operations - tracking what changed and when
- Claude Code: The AI Coding Tool That Changes Operations - AI development workflows
Want this system built for your business?
Get a free assessment. We will map every system your business needs and show you the ROI.
Get Your Free Assessment