How to Build an AI Assistant with Memory
Implement persistent memory so your AI assistant remembers past conversations.
Jay Banlasan
The AI Systems Guy
Most AI assistants forget everything the moment a session ends. To build ai assistant conversation memory that actually sticks, you need to persist context outside the model itself. I build this pattern into every client assistant I deploy because without it, users repeat themselves endlessly and trust in the tool drops fast.
The business case is simple: a support assistant that remembers a customer's plan tier, past complaints, and preferences handles issues in one turn instead of five. That difference compounds across thousands of interactions. Memory is not a nice-to-have, it is what separates a tool from a system.
What You Need Before Starting
- Python 3.10+
anthropiclibrary (pip install anthropic)sqlite3(built into Python standard library)- An Anthropic API key in your environment as
ANTHROPIC_API_KEY - Basic familiarity with making Claude API calls (see tutorial 001)
Step 1: Design Your Memory Schema
Memory has two layers. Short-term memory is the conversation turns in the current session. Long-term memory is facts and summaries that persist across sessions. Store them separately.
Create a SQLite database with two tables:
import sqlite3
def init_db(db_path: str = "assistant_memory.db"):
conn = sqlite3.connect(db_path)
cur = conn.cursor()
# Long-term facts and summaries per user
cur.execute("""
CREATE TABLE IF NOT EXISTS long_term_memory (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id TEXT NOT NULL,
memory_type TEXT NOT NULL, -- 'fact', 'preference', 'summary'
content TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
# Session conversation history
cur.execute("""
CREATE TABLE IF NOT EXISTS conversation_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL,
user_id TEXT NOT NULL,
role TEXT NOT NULL, -- 'user' or 'assistant'
content TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
conn.commit()
conn.close()
Run init_db() once at startup. The memory_type field lets you query specific kinds of context without pulling everything.
Step 2: Build Memory Read and Write Functions
You need two operations: load relevant memory before each API call, and write new facts after each exchange.
import sqlite3
from datetime import datetime
def load_user_memory(user_id: str, db_path: str = "assistant_memory.db") -> str:
conn = sqlite3.connect(db_path)
cur = conn.cursor()
cur.execute("""
SELECT memory_type, content FROM long_term_memory
WHERE user_id = ?
ORDER BY updated_at DESC
LIMIT 20
""", (user_id,))
rows = cur.fetchall()
conn.close()
if not rows:
return ""
lines = ["Known facts about this user:"]
for memory_type, content in rows:
lines.append(f"- [{memory_type}] {content}")
return "\n".join(lines)
def save_memory(user_id: str, memory_type: str, content: str, db_path: str = "assistant_memory.db"):
conn = sqlite3.connect(db_path)
cur = conn.cursor()
cur.execute("""
INSERT INTO long_term_memory (user_id, memory_type, content)
VALUES (?, ?, ?)
""", (user_id, memory_type, content))
conn.commit()
conn.close()
def load_recent_history(session_id: str, limit: int = 10, db_path: str = "assistant_memory.db") -> list:
conn = sqlite3.connect(db_path)
cur = conn.cursor()
cur.execute("""
SELECT role, content FROM conversation_history
WHERE session_id = ?
ORDER BY created_at DESC
LIMIT ?
""", (session_id, limit))
rows = cur.fetchall()
conn.close()
# Return in chronological order
return [{"role": r, "content": c} for r, c in reversed(rows)]
def save_turn(session_id: str, user_id: str, role: str, content: str, db_path: str = "assistant_memory.db"):
conn = sqlite3.connect(db_path)
cur = conn.cursor()
cur.execute("""
INSERT INTO conversation_history (session_id, user_id, role, content)
VALUES (?, ?, ?, ?)
""", (session_id, user_id, role, content))
conn.commit()
conn.close()
Step 3: Build the Memory-Aware System Prompt
Inject long-term memory into the system prompt dynamically. This is how the model "knows" facts about the user without them repeating themselves.
def build_system_prompt(user_id: str) -> str:
base_prompt = """You are a helpful business assistant. Be concise and direct.
When you learn something important about the user (their role, preferences, ongoing projects),
note it in your response using this format on a new line: [MEMORY: fact|<the fact to remember>]
Only use this tag for genuinely useful persistent facts, not for conversational details."""
memory_context = load_user_memory(user_id)
if memory_context:
return f"{base_prompt}\n\n{memory_context}"
return base_prompt
The [MEMORY: fact|...] tag is a lightweight protocol to extract facts from responses without a separate extraction call.
Step 4: Build the Main Chat Function
Wire everything together into a single function that handles loading history, calling Claude, saving the exchange, and extracting any new memories.
import anthropic
import re
import uuid
client = anthropic.Anthropic()
def chat(user_id: str, user_message: str, session_id: str = None) -> str:
if session_id is None:
session_id = str(uuid.uuid4())
# Load conversation history for this session
history = load_recent_history(session_id)
# Add the new user message
history.append({"role": "user", "content": user_message})
# Build system prompt with long-term memory injected
system_prompt = build_system_prompt(user_id)
# Call Claude
response = client.messages.create(
model="claude-opus-4-5",
max_tokens=1024,
system=system_prompt,
messages=history
)
assistant_reply = response.content[0].text
# Save both turns to history
save_turn(session_id, user_id, "user", user_message)
save_turn(session_id, user_id, "assistant", assistant_reply)
# Extract and save any new memories
memory_tags = re.findall(r'\[MEMORY: (\w+)\|(.+?)\]', assistant_reply)
for memory_type, content in memory_tags:
save_memory(user_id, memory_type, content.strip())
# Clean the reply before returning to user
clean_reply = re.sub(r'\[MEMORY: .+?\]', '', assistant_reply).strip()
return clean_reply, session_id
Step 5: Add a Session Summary for Long Conversations
After 20+ turns, conversations get long and expensive. Summarize old turns and compress them into long-term memory.
def summarize_and_compress(session_id: str, user_id: str):
conn = sqlite3.connect("assistant_memory.db")
cur = conn.cursor()
cur.execute("""
SELECT role, content FROM conversation_history
WHERE session_id = ?
ORDER BY created_at ASC
LIMIT 20
""", (session_id,))
rows = cur.fetchall()
conn.close()
if len(rows) < 20:
return # Not long enough to compress
conversation_text = "\n".join([f"{r}: {c}" for r, c in rows])
summary_response = client.messages.create(
model="claude-haiku-4-5",
max_tokens=300,
messages=[{
"role": "user",
"content": f"Summarize the key facts and decisions from this conversation in 3-5 bullet points:\n\n{conversation_text}"
}]
)
summary = summary_response.content[0].text
save_memory(user_id, "summary", summary)
# Delete the compressed turns
conn = sqlite3.connect("assistant_memory.db")
cur = conn.cursor()
cur.execute("""
DELETE FROM conversation_history
WHERE session_id = ? AND id IN (
SELECT id FROM conversation_history
WHERE session_id = ?
ORDER BY created_at ASC
LIMIT 20
)
""", (session_id, session_id))
conn.commit()
conn.close()
Use claude-haiku-4-5 for summarization. It costs a fraction of Opus and the task does not need strong reasoning.
Step 6: Wire It Into a Simple CLI to Test
def main():
init_db()
user_id = "user_001"
session_id = str(uuid.uuid4())
print(f"Session: {session_id}")
print("Type 'quit' to exit\n")
while True:
user_input = input("You: ").strip()
if user_input.lower() == "quit":
break
if not user_input:
continue
reply, session_id = chat(user_id, user_input, session_id)
print(f"Assistant: {reply}\n")
# Check if we should compress
conn = sqlite3.connect("assistant_memory.db")
cur = conn.cursor()
cur.execute("SELECT COUNT(*) FROM conversation_history WHERE session_id = ?", (session_id,))
count = cur.fetchone()[0]
conn.close()
if count >= 20:
summarize_and_compress(session_id, user_id)
if __name__ == "__main__":
main()
Run the script, have a conversation, quit, then run it again. The assistant will remember what it learned in the first session.
What to Build Next
- Add a vector store (pgvector or Chroma) to retrieve semantically relevant memories instead of just the 20 most recent
- Build a memory management UI so users can view and delete what the assistant knows about them
- Add memory categories specific to your use case (e.g., "crm_contact", "project_status", "billing_tier")
Related Reading
- Building an AI-Powered Sales Assistant - ai powered sales assistant
- Building an AI Meeting Assistant - ai meeting assistant setup
- Creating an AI Research Assistant - ai research assistant guide
Want this system built for your business?
Get a free assessment. We will map every system your business needs and show you the ROI.
Get Your Free Assessment