Infrastructure
developer tools
How to Create an Automated Testing Pipeline with AI
Build AI-powered test generation and execution pipelines.
Jay Banlasan
The AI Systems Guy
An automated testing pipeline with AI for code quality takes the most tedious part of development and handles it for you. I use AI to generate test cases from my source code, run them automatically, and flag anything that breaks. Writing tests manually is important for critical paths, but AI can cover the 80% of boilerplate tests that catch regressions.
What You Need Before Starting
- Python 3.8+ with pytest installed
- Anthropic API key
- A codebase with functions to test
- Basic familiarity with unit testing
Step 1: Build the Test Generator
import anthropic
from dotenv import load_dotenv
load_dotenv()
def generate_tests(source_code, file_path):
client = anthropic.Anthropic()
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=2048,
system="""You are a Python testing expert. Generate pytest test cases.
Rules:
- Test happy path, edge cases, and error cases
- Use descriptive test names: test_function_name_scenario
- Use assert statements, not unittest methods
- Mock external dependencies
- No comments explaining obvious assertions""",
messages=[{
"role": "user",
"content": f"Generate pytest tests for this code from {file_path}:\n\n```python\n{source_code}\n```"
}]
)
test_code = response.content[0].text
if test_code.startswith("```"):
lines = test_code.split("\n")
test_code = "\n".join(lines[1:-1])
return test_code
Step 2: Scan Source Files and Generate Tests
import os
import ast
def find_testable_files(src_dir):
testable = []
for root, dirs, files in os.walk(src_dir):
dirs[:] = [d for d in dirs if d not in ("__pycache__", ".venv", "node_modules")]
for f in files:
if f.endswith(".py") and not f.startswith("test_"):
path = os.path.join(root, f)
with open(path) as fh:
content = fh.read()
try:
tree = ast.parse(content)
has_functions = any(isinstance(n, (ast.FunctionDef, ast.ClassDef)) for n in ast.walk(tree))
if has_functions:
testable.append({"path": path, "content": content})
except SyntaxError:
continue
return testable
def generate_all_tests(src_dir, test_dir="tests/"):
os.makedirs(test_dir, exist_ok=True)
files = find_testable_files(src_dir)
for file_info in files:
source_name = os.path.basename(file_info["path"])
test_name = f"test_{source_name}"
test_path = os.path.join(test_dir, test_name)
if os.path.exists(test_path):
print(f"Skipping {test_name} (already exists)")
continue
print(f"Generating tests for {source_name}...")
test_code = generate_tests(file_info["content"], file_info["path"])
with open(test_path, "w") as f:
f.write(test_code)
print(f" Wrote {test_path}")
Step 3: Run Tests and Capture Results
import subprocess
import json
def run_tests(test_dir="tests/"):
result = subprocess.run(
["python", "-m", "pytest", test_dir, "-v", "--tb=short", "--json-report", "--json-report-file=test_results.json"],
capture_output=True, text=True
)
try:
with open("test_results.json") as f:
report = json.load(f)
return {
"passed": report["summary"].get("passed", 0),
"failed": report["summary"].get("failed", 0),
"errors": report["summary"].get("error", 0),
"output": result.stdout[-2000:] if result.stdout else ""
}
except FileNotFoundError:
return {
"passed": 0,
"failed": 0,
"errors": 1,
"output": result.stderr[-2000:]
}
Step 4: Build the CI Script
def run_pipeline(src_dir="src/", test_dir="tests/"):
print("Step 1: Generating tests...")
generate_all_tests(src_dir, test_dir)
print("\nStep 2: Running tests...")
results = run_tests(test_dir)
print(f"\nResults: {results['passed']} passed, {results['failed']} failed, {results['errors']} errors")
if results["failed"] > 0 or results["errors"] > 0:
print("\nFailing tests detected. Review output above.")
return False
print("\nAll tests passing.")
return True
if __name__ == "__main__":
success = run_pipeline()
exit(0 if success else 1)
Step 5: Schedule Regular Runs
# Run nightly
0 2 * * * cd /path/to/project && python test_pipeline.py >> /var/log/test_pipeline.log 2>&1
What to Build Next
Add coverage tracking with pytest-cov. When coverage drops below a threshold, have the AI generate tests specifically for uncovered functions.
Related Reading
- The Testing Pyramid for AI Operations - testing strategy for AI-powered systems
- Claude Code: The AI Coding Tool That Changes Operations - AI in the development workflow
- Why Process Documentation Is the First Step - documenting what your tests cover
Want this system built for your business?
Get a free assessment. We will map every system your business needs and show you the ROI.
Get Your Free Assessment