Systems Library / AI Model Setup / How to Fine-Tune GPT on Your Business Data
AI Model Setup advanced

How to Fine-Tune GPT on Your Business Data

Train a custom GPT model on your company writing style and knowledge.

Jay Banlasan

Jay Banlasan

The AI Systems Guy

Fine-tuning GPT on custom business data is the right tool for one specific problem: when a model consistently produces outputs that are almost right but wrong in the same predictable ways, and system prompts have failed to fix it. I have used fine-tuning to train email generators on a founder's specific writing style, to teach a classifier the difference between deal types that a base model consistently confuses, and to reduce system prompt length by baking instructions into the weights. It is not magic, but used correctly it eliminates whole categories of output problems.

The most common mistake is fine-tuning when the real fix is a better system prompt. Fine-tuning costs money and takes time. Always exhaust prompt engineering first.

What You Need Before Starting

Step 1: Decide If Fine-Tuning Is the Right Tool

Fine-tuning is the right choice when:

Fine-tuning is NOT the right choice when:

Step 2: Build Your Training Dataset

Fine-tuning datasets are JSONL files. Each line is one training example in the messages format.

import json

def create_training_example(system: str, user: str, assistant: str) -> dict:
    return {
        "messages": [
            {"role": "system", "content": system},
            {"role": "user", "content": user},
            {"role": "assistant", "content": assistant}
        ]
    }

# Example: fine-tuning for a specific email style
SYSTEM_PROMPT = "You write follow-up emails for a B2B software company. Casual, direct, under 80 words, end with a question."

training_examples = [
    create_training_example(
        SYSTEM_PROMPT,
        "Follow-up for Sarah who attended our demo on Monday.",
        "Hey Sarah, good to connect on Monday. The workflow automation piece you asked about - we just shipped an update that handles exactly that use case. Worth a quick look? Would a 20-minute call this week work for you?"
    ),
    create_training_example(
        SYSTEM_PROMPT,
        "Follow-up for Marcus who downloaded our pricing guide 3 days ago.",
        "Marcus, saw you grabbed the pricing guide - hope it was useful. Most people have questions about the Enterprise tier after reading it. Anything I can clarify, or does the scope feel right for what you're building?"
    ),
    # Add 48+ more examples following the same pattern
]

def save_training_file(examples: list, filepath: str):
    with open(filepath, "w") as f:
        for example in examples:
            f.write(json.dumps(example) + "\n")
    print(f"Saved {len(examples)} examples to {filepath}")

save_training_file(training_examples, "training_data.jsonl")

Quality rules for training data:

Step 3: Validate Your Training Data

OpenAI provides a validation script. Run it before uploading.

import json
from collections import defaultdict

def validate_training_file(filepath: str) -> bool:
    errors = []
    warnings = []
    data = []

    with open(filepath, "r") as f:
        for i, line in enumerate(f, 1):
            try:
                example = json.loads(line.strip())
                data.append(example)
            except json.JSONDecodeError as e:
                errors.append(f"Line {i}: Invalid JSON - {e}")

    if errors:
        for e in errors:
            print(f"ERROR: {e}")
        return False

    print(f"Loaded {len(data)} examples")

    # Check required structure
    for i, example in enumerate(data, 1):
        if "messages" not in example:
            errors.append(f"Example {i}: Missing 'messages' key")
            continue

        messages = example["messages"]
        roles = [m.get("role") for m in messages]

        if "user" not in roles:
            errors.append(f"Example {i}: No 'user' message")
        if "assistant" not in roles:
            errors.append(f"Example {i}: No 'assistant' message")

        for msg in messages:
            if not msg.get("content", "").strip():
                warnings.append(f"Example {i}: Empty content in {msg.get('role')} message")

    # Token count estimate
    total_tokens = sum(
        sum(len(m.get("content", "").split()) * 1.3 for m in ex["messages"])
        for ex in data
    )
    estimated_cost = (total_tokens / 1000) * 0.008  # GPT-3.5 fine-tune rate approx

    for e in errors:
        print(f"ERROR: {e}")
    for w in warnings:
        print(f"WARNING: {w}")

    print(f"\nEstimated tokens: {int(total_tokens):,}")
    print(f"Estimated training cost (1 epoch): ~${estimated_cost:.2f}")

    return len(errors) == 0

validate_training_file("training_data.jsonl")

Step 4: Upload and Start Fine-Tuning

import openai
import os

client = openai.OpenAI(api_key=os.environ["OPENAI_API_KEY"])

def start_fine_tune(
    training_file: str,
    model: str = "gpt-4o-mini-2024-07-18",
    suffix: str = "email-followup",
    n_epochs: int = 3
) -> str:
    # Upload the training file
    print("Uploading training file...")
    with open(training_file, "rb") as f:
        upload_response = client.files.create(
            file=f,
            purpose="fine-tune"
        )
    file_id = upload_response.id
    print(f"File uploaded: {file_id}")

    # Start the fine-tuning job
    print("Starting fine-tune job...")
    job = client.fine_tuning.jobs.create(
        training_file=file_id,
        model=model,
        suffix=suffix,
        hyperparameters={"n_epochs": n_epochs}
    )

    print(f"Fine-tune job started: {job.id}")
    print(f"Status: {job.status}")
    print(f"Estimated completion: check status in ~15-30 minutes for small datasets")

    return job.id

job_id = start_fine_tune("training_data.jsonl", suffix="email-v1")

Step 5: Monitor the Training Job

import time

def wait_for_fine_tune(job_id: str, poll_interval: int = 60) -> str:
    print(f"Monitoring job: {job_id}")

    while True:
        job = client.fine_tuning.jobs.retrieve(job_id)
        status = job.status

        print(f"[{time.strftime('%H:%M:%S')}] Status: {status}")

        if status == "succeeded":
            model_id = job.fine_tuned_model
            print(f"\nFine-tune complete!")
            print(f"Model ID: {model_id}")
            return model_id

        elif status == "failed":
            print(f"\nFine-tune failed: {job.error}")
            raise RuntimeError(f"Fine-tune failed: {job.error}")

        elif status in ["running", "queued", "validating_files"]:
            # Print training metrics if available
            events = client.fine_tuning.jobs.list_events(job_id, limit=5)
            for event in reversed(events.data):
                if "train_loss" in event.message:
                    print(f"  {event.message}")

        time.sleep(poll_interval)

# Run this after starting the job
# model_id = wait_for_fine_tune(job_id)

Step 6: Test and Evaluate Your Fine-Tuned Model

def test_fine_tuned_model(fine_tuned_model_id: str, test_inputs: list) -> None:
    SYSTEM_PROMPT = "You write follow-up emails for a B2B software company. Casual, direct, under 80 words, end with a question."

    print(f"Testing: {fine_tuned_model_id}\n")

    for user_input in test_inputs:
        response = client.chat.completions.create(
            model=fine_tuned_model_id,
            messages=[
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user", "content": user_input}
            ],
            max_tokens=200,
            temperature=0.7
        )
        print(f"Input: {user_input}")
        print(f"Output: {response.choices[0].message.content}")
        print("-" * 40)

test_inputs = [
    "Follow-up for Janet who requested a demo for her 20-person marketing team.",
    "Follow-up for Tom who went cold after two calls.",
    "Follow-up for a startup founder who said they couldn't afford it.",
]

# test_fine_tuned_model("ft:gpt-4o-mini:your-org:email-v1:abc123", test_inputs)

Compare outputs side by side against the base model on the same inputs. If the fine-tuned model is not clearly better on your test cases, it is not worth the per-call cost premium.

What to Build Next

Related Reading

Want this system built for your business?

Get a free assessment. We will map every system your business needs and show you the ROI.

Get Your Free Assessment

Related Systems