How to Build a Multi-Turn Conversation with Claude
Implement conversation memory and context management with Claude API.
Jay Banlasan
The AI Systems Guy
Building a multi-turn conversation with Claude requires understanding one thing: the model has no persistent memory. Every time you call the API, you pass the entire conversation history yourself. This is not a limitation, it is a feature. You control exactly what context the model sees. The claude multi-turn conversation api pattern gives you full control over what gets remembered, what gets trimmed, and how long the context stays relevant.
I build conversation memory into every client-facing chatbot I deploy. The naive version just appends all messages forever until you hit the context limit. The production version trims intelligently, summarizes old context, and keeps costs predictable. This tutorial covers both.
What You Need Before Starting
- Anthropic API key set up
anthropicPython SDK installed- Understanding of how message lists work (each message has
roleandcontent) - A target use case: customer support, research assistant, onboarding bot
Step 1: The Basic Multi-Turn Pattern
import os
import anthropic
from dotenv import load_dotenv
load_dotenv()
client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
SYSTEM_PROMPT = "You are a helpful business assistant. Be concise and specific."
def chat_with_claude(user_message: str, history: list) -> tuple[str, list]:
"""
Send a message to Claude and get a response, maintaining conversation history.
Args:
user_message: The new user input
history: List of previous message dicts (role + content)
Returns:
Tuple of (response_text, updated_history)
"""
# Add new user message to history
history = history + [{"role": "user", "content": user_message}]
# Send full history to Claude
response = client.messages.create(
model="claude-opus-4-5",
max_tokens=1000,
system=SYSTEM_PROMPT,
messages=history
)
# Extract response text
reply = response.content[0].text
# Add Claude's reply to history
history = history + [{"role": "assistant", "content": reply}]
return reply, history
# Simple terminal loop
def run_chat():
history = []
print("Chat with Claude (type 'quit' to exit)\n")
while True:
user_input = input("You: ").strip()
if user_input.lower() == "quit":
break
reply, history = chat_with_claude(user_input, history)
print(f"\nClaude: {reply}\n")
print(f"[Context: {len(history)} messages]\n")
Step 2: Build a Conversation Manager Class
For production use, encapsulate conversation state in a class:
from dataclasses import dataclass, field
from typing import Optional
import anthropic
import os
@dataclass
class Message:
role: str # "user" or "assistant"
content: str
class ConversationManager:
"""
Manages a conversation with Claude including history, context limits, and metadata.
"""
def __init__(
self,
system_prompt: str,
model: str = "claude-opus-4-5",
max_tokens: int = 1000,
max_history_messages: int = 20
):
self.system_prompt = system_prompt
self.model = model
self.max_tokens = max_tokens
self.max_history_messages = max_history_messages
self.history: list[Message] = []
self.client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
self.total_tokens_used = 0
def send(self, user_message: str) -> str:
"""Send a message and get a response."""
# Add user message
self.history.append(Message(role="user", content=user_message))
# Trim history if needed
self._trim_history()
# Format for API
messages = [{"role": m.role, "content": m.content} for m in self.history]
# Call API
response = self.client.messages.create(
model=self.model,
max_tokens=self.max_tokens,
system=self.system_prompt,
messages=messages
)
reply = response.content[0].text
self.total_tokens_used += response.usage.input_tokens + response.usage.output_tokens
# Add assistant reply
self.history.append(Message(role="assistant", content=reply))
return reply
def _trim_history(self) -> None:
"""Keep only the most recent messages to avoid hitting context limits."""
if len(self.history) > self.max_history_messages:
# Keep the most recent messages (always drop from the beginning)
self.history = self.history[-self.max_history_messages:]
def reset(self) -> None:
"""Clear conversation history."""
self.history = []
def get_summary(self) -> str:
"""Get a summary of the conversation so far."""
if not self.history:
return "No conversation yet."
history_text = "\n".join([f"{m.role}: {m.content}" for m in self.history])
response = self.client.messages.create(
model=self.model,
max_tokens=300,
messages=[{
"role": "user",
"content": f"Summarize this conversation in 3 bullet points:\n\n{history_text}"
}]
)
return response.content[0].text
@property
def message_count(self) -> int:
return len(self.history)
# Usage
manager = ConversationManager(
system_prompt="You are an onboarding assistant for a B2B SaaS product. Help new users get set up.",
max_history_messages=16
)
print(manager.send("Hi, I just signed up. Where do I start?"))
print(manager.send("What integrations do you support?"))
print(manager.send("Can I connect it to Slack?"))
print(f"\nTotal tokens used: {manager.total_tokens_used}")
Step 3: Implement Context Summarization for Long Conversations
For conversations that need to go long (customer onboarding, research sessions), summarize old context instead of dropping it:
class LongConversationManager(ConversationManager):
"""
Extended conversation manager that summarizes old context instead of dropping it.
"""
def __init__(self, *args, summarize_threshold: int = 16, **kwargs):
super().__init__(*args, **kwargs)
self.summarize_threshold = summarize_threshold
self.summary = "" # Accumulated summary of past context
def _trim_history(self) -> None:
"""Summarize old messages instead of dropping them."""
if len(self.history) <= self.summarize_threshold:
return
# Take the oldest half of messages and summarize them
split_point = len(self.history) // 2
old_messages = self.history[:split_point]
self.history = self.history[split_point:]
# Generate summary of old messages
old_text = "\n".join([f"{m.role}: {m.content}" for m in old_messages])
summary_response = self.client.messages.create(
model="claude-haiku-20240307", # Use cheap model for summaries
max_tokens=300,
messages=[{
"role": "user",
"content": f"Summarize the key points from this conversation excerpt in 3-5 bullet points:\n\n{old_text}"
}]
)
new_summary = summary_response.content[0].text
# Update cumulative summary
if self.summary:
self.summary = f"Previous context summary:\n{self.summary}\n\nMore recent context:\n{new_summary}"
else:
self.summary = new_summary
# Prepend summary as context to the system prompt
self.system_prompt = f"{self.system_prompt}\n\n---\nConversation context from earlier:\n{self.summary}"
Step 4: Persist Conversations to Disk
import json
from pathlib import Path
from datetime import datetime
def save_conversation(manager: ConversationManager, session_id: str, save_dir: str = "conversations"):
"""Save conversation history to a JSON file."""
Path(save_dir).mkdir(exist_ok=True)
data = {
"session_id": session_id,
"saved_at": datetime.now().isoformat(),
"system_prompt": manager.system_prompt,
"messages": [{"role": m.role, "content": m.content} for m in manager.history],
"total_tokens": manager.total_tokens_used
}
path = Path(save_dir) / f"{session_id}.json"
path.write_text(json.dumps(data, indent=2))
return str(path)
def load_conversation(session_id: str, save_dir: str = "conversations") -> ConversationManager:
"""Load a conversation from a saved file."""
path = Path(save_dir) / f"{session_id}.json"
data = json.loads(path.read_text())
manager = ConversationManager(system_prompt=data["system_prompt"])
manager.history = [Message(role=m["role"], content=m["content"]) for m in data["messages"]]
manager.total_tokens_used = data.get("total_tokens", 0)
return manager
What to Build Next
- Add user identity tracking so conversations can resume across sessions with the right context
- Build a conversation analysis layer that extracts action items and follow-ups automatically
- Combine with function calling to create an agent that takes actions mid-conversation
Related Reading
- How to Set Up Anthropic Claude with System Prompts - The system prompt foundation that conversation managers use
- How to Stream AI Responses in Real-Time - Stream conversation replies for better user experience
- How to Create Your First AI Chatbot with OpenAI - Same conversation pattern with OpenAI
Want this system built for your business?
Get a free assessment. We will map every system your business needs and show you the ROI.
Get Your Free Assessment