Systems Library / Infrastructure / How to Build an AI Documentation Generator
Infrastructure developer tools

How to Build an AI Documentation Generator

Generate code documentation automatically using AI analysis.

Jay Banlasan

Jay Banlasan

The AI Systems Guy

An AI documentation generator for automated code docs solves the problem every developer ignores until they can't remember what their own code does. I run this across every project I maintain. It reads the source, generates function docs, API references, and architecture overviews, then writes them to markdown files.

Documentation that writes itself actually gets maintained.

What You Need Before Starting

Step 1: Parse Source Files for Functions and Classes

import ast
import os

def extract_code_elements(file_path):
    with open(file_path) as f:
        source = f.read()
    
    tree = ast.parse(source)
    elements = []
    
    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef):
            args = [a.arg for a in node.args.args]
            elements.append({
                "type": "function",
                "name": node.name,
                "args": args,
                "lineno": node.lineno,
                "source": ast.get_source_segment(source, node)
            })
        elif isinstance(node, ast.ClassDef):
            methods = [n.name for n in node.body if isinstance(n, ast.FunctionDef)]
            elements.append({
                "type": "class",
                "name": node.name,
                "methods": methods,
                "lineno": node.lineno,
                "source": ast.get_source_segment(source, node)
            })
    
    return elements

Step 2: Generate Documentation with AI

import anthropic
from dotenv import load_dotenv

load_dotenv()

def generate_doc(element, file_path):
    client = anthropic.Anthropic()
    
    response = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=1024,
        system="""Generate concise documentation in markdown format.
Include: purpose, parameters, return value, example usage.
Keep it practical. No filler. Write for a developer who needs to use this code today.""",
        messages=[{
            "role": "user",
            "content": f"Document this {element['type']} from {file_path}:\n\n```python\n{element['source']}\n```"
        }]
    )
    
    return response.content[0].text

Step 3: Build the Doc Generator Pipeline

def generate_file_docs(file_path, docs_dir="docs/"):
    elements = extract_code_elements(file_path)
    if not elements:
        return None
    
    relative_path = os.path.basename(file_path).replace(".py", "")
    doc_path = os.path.join(docs_dir, f"{relative_path}.md")
    
    sections = [f"# {relative_path}\n"]
    
    for element in elements:
        if element["name"].startswith("_") and element["name"] != "__init__":
            continue
        
        doc = generate_doc(element, file_path)
        sections.append(doc)
        sections.append("\n---\n")
    
    os.makedirs(docs_dir, exist_ok=True)
    with open(doc_path, "w") as f:
        f.write("\n".join(sections))
    
    return doc_path

Step 4: Generate Project-Wide Docs

def generate_project_docs(src_dir, docs_dir="docs/"):
    documented = []
    
    for root, dirs, files in os.walk(src_dir):
        dirs[:] = [d for d in dirs if d not in ("__pycache__", ".venv", "node_modules")]
        for f in files:
            if f.endswith(".py") and not f.startswith("test_"):
                path = os.path.join(root, f)
                print(f"Documenting {f}...")
                doc_path = generate_file_docs(path, docs_dir)
                if doc_path:
                    documented.append(doc_path)
    
    # Generate index
    index_path = os.path.join(docs_dir, "index.md")
    with open(index_path, "w") as f:
        f.write("# API Documentation\n\n")
        for doc in sorted(documented):
            name = os.path.basename(doc).replace(".md", "")
            f.write(f"- [{name}]({os.path.basename(doc)})\n")
    
    print(f"\nGenerated docs for {len(documented)} files")
    return documented

Step 5: Add a Freshness Check

Only regenerate docs for files that changed since last run:

import hashlib
import json

def get_file_hash(path):
    with open(path, "rb") as f:
        return hashlib.md5(f.read()).hexdigest()

def check_freshness(src_dir, hash_file="docs/.hashes.json"):
    try:
        with open(hash_file) as f:
            old_hashes = json.load(f)
    except FileNotFoundError:
        old_hashes = {}
    
    stale = []
    current_hashes = {}
    
    for root, dirs, files in os.walk(src_dir):
        dirs[:] = [d for d in dirs if d not in ("__pycache__", ".venv")]
        for f in files:
            if f.endswith(".py"):
                path = os.path.join(root, f)
                h = get_file_hash(path)
                current_hashes[path] = h
                if old_hashes.get(path) != h:
                    stale.append(path)
    
    with open(hash_file, "w") as f:
        json.dump(current_hashes, f)
    
    return stale

What to Build Next

Add architecture diagram generation. Feed your entire project structure to AI and have it generate a mermaid diagram showing how modules connect. That gives new team members a visual map of the system.

Related Reading

Want this system built for your business?

Get a free assessment. We will map every system your business needs and show you the ROI.

Get Your Free Assessment

Related Systems