How to Use OpenAI Embeddings for Text Search
Generate and store text embeddings for semantic search applications.
Jay Banlasan
The AI Systems Guy
OpenAI embeddings turn text into numbers in a way that captures meaning. That is what makes openai embeddings text search setup useful: you can search by concept, not just by keyword. A keyword search for "payment issue" misses "I can't complete my purchase." An embeddings-based search finds both because the vectors are close in meaning. I use this for building internal search tools over knowledge bases, finding similar support tickets, and matching leads to relevant case studies.
The core workflow is: generate embeddings for your documents once, store them, then at search time generate an embedding for the query and find the closest document vectors. Cosine similarity is your friend here.
What You Need Before Starting
- OpenAI API key configured
openaiandnumpypackages installed- Some text data to index (support tickets, product docs, FAQs, etc.)
sqlite3(built into Python) or another storage option
Step 1: Generate Your First Embedding
import os
import numpy as np
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def get_embedding(text: str, model: str = "text-embedding-3-small") -> list[float]:
"""
Generate a vector embedding for the given text.
Args:
text: Text to embed (max ~8,000 tokens for small, ~8,000 for large)
model: text-embedding-3-small (cost-efficient) or text-embedding-3-large (better quality)
Returns:
List of floats representing the text in vector space
"""
text = text.replace("\n", " ") # Clean up newlines
response = client.embeddings.create(input=[text], model=model)
return response.data[0].embedding
# Test it
embedding = get_embedding("How do I reset my password?")
print(f"Embedding dimensions: {len(embedding)}") # 1536 for small, 3072 for large
print(f"First 5 values: {embedding[:5]}")
Step 2: Calculate Similarity Between Two Texts
def cosine_similarity(vec1: list[float], vec2: list[float]) -> float:
"""Calculate cosine similarity between two embedding vectors."""
a = np.array(vec1)
b = np.array(vec2)
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
# Example: How similar are these phrases?
texts = [
"How do I reset my password?",
"I forgot my login credentials",
"What time does your store close?",
"How can I change my account password?"
]
embeddings = [get_embedding(t) for t in texts]
# Compare query to all others
query_embedding = embeddings[0]
print(f"Query: '{texts[0]}'\n")
for i in range(1, len(texts)):
sim = cosine_similarity(query_embedding, embeddings[i])
print(f" '{texts[i]}': {sim:.3f}")
You will see that the two password-related phrases score high (above 0.9), while the store hours question scores low (below 0.5).
Step 3: Build a Local Embeddings Store with SQLite
import sqlite3
import json
from pathlib import Path
class EmbeddingsStore:
"""
Simple local store for text embeddings using SQLite.
Good for up to 100,000 documents.
"""
def __init__(self, db_path: str = "embeddings.db"):
self.db_path = db_path
self.conn = sqlite3.connect(db_path)
self._create_table()
def _create_table(self):
self.conn.execute("""
CREATE TABLE IF NOT EXISTS documents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
content TEXT NOT NULL,
metadata TEXT,
embedding TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
self.conn.commit()
def add(self, content: str, metadata: dict = None) -> int:
"""Add a document and its embedding to the store."""
embedding = get_embedding(content)
cursor = self.conn.execute(
"INSERT INTO documents (content, metadata, embedding) VALUES (?, ?, ?)",
(content, json.dumps(metadata or {}), json.dumps(embedding))
)
self.conn.commit()
return cursor.lastrowid
def add_many(self, documents: list[dict]) -> list[int]:
"""Add multiple documents at once. Each dict needs 'content' key, optional 'metadata'."""
ids = []
for doc in documents:
id_ = self.add(doc["content"], doc.get("metadata", {}))
ids.append(id_)
return ids
def search(self, query: str, top_k: int = 5) -> list[dict]:
"""
Find the most semantically similar documents to the query.
Returns:
List of dicts with content, metadata, and similarity score
"""
query_embedding = get_embedding(query)
query_vec = np.array(query_embedding)
# Load all embeddings (for large datasets, use a vector DB instead)
cursor = self.conn.execute("SELECT id, content, metadata, embedding FROM documents")
rows = cursor.fetchall()
results = []
for row_id, content, metadata_str, embedding_str in rows:
doc_vec = np.array(json.loads(embedding_str))
similarity = np.dot(query_vec, doc_vec) / (np.linalg.norm(query_vec) * np.linalg.norm(doc_vec))
results.append({
"id": row_id,
"content": content,
"metadata": json.loads(metadata_str),
"similarity": float(similarity)
})
# Sort by similarity descending
results.sort(key=lambda x: x["similarity"], reverse=True)
return results[:top_k]
def close(self):
self.conn.close()
Step 4: Index a Knowledge Base and Search It
# Example: Index an FAQ knowledge base
store = EmbeddingsStore("faq_store.db")
faq_documents = [
{"content": "To reset your password, click 'Forgot Password' on the login page and enter your email.",
"metadata": {"category": "account", "id": "faq-001"}},
{"content": "You can cancel your subscription at any time from Settings > Billing > Cancel Plan.",
"metadata": {"category": "billing", "id": "faq-002"}},
{"content": "We accept Visa, Mastercard, American Express, and PayPal.",
"metadata": {"category": "billing", "id": "faq-003"}},
{"content": "Our API rate limit is 1,000 requests per hour on the Pro plan and 5,000 on Enterprise.",
"metadata": {"category": "api", "id": "faq-004"}},
{"content": "To export your data, go to Settings > Data > Export and choose CSV or JSON format.",
"metadata": {"category": "data", "id": "faq-005"}},
{"content": "Business hours are Monday through Friday, 9am to 6pm EST. Support tickets answered within 24 hours.",
"metadata": {"category": "support", "id": "faq-006"}},
]
print("Indexing FAQ documents...")
ids = store.add_many(faq_documents)
print(f"Indexed {len(ids)} documents\n")
# Search it
search_queries = [
"I forgot my password",
"How do I stop my plan?",
"What payment methods do you take?"
]
for query in search_queries:
print(f"Query: '{query}'")
results = store.search(query, top_k=2)
for r in results:
print(f" [{r['similarity']:.2f}] {r['content'][:80]}...")
print()
Step 5: Build a Smart FAQ Bot Using Embeddings + Claude
import anthropic
claude = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
def smart_faq_bot(user_question: str, store: EmbeddingsStore, threshold: float = 0.75) -> str:
"""
Answer questions by finding relevant FAQ entries and using Claude to compose a response.
Args:
user_question: The user's question
store: Initialized EmbeddingsStore with indexed documents
threshold: Minimum similarity score to use a document as context
"""
# Find relevant FAQ entries
results = store.search(user_question, top_k=3)
relevant = [r for r in results if r["similarity"] >= threshold]
if not relevant:
return "I don't have specific information about that. Please contact our support team."
# Build context from relevant FAQ entries
context = "\n\n".join([f"FAQ: {r['content']}" for r in relevant])
# Use Claude to compose a natural response
response = claude.messages.create(
model="claude-haiku-20240307", # Cheap for simple Q&A
max_tokens=300,
system="""You are a helpful support agent. Answer the customer's question using only the provided FAQ context.
If the context doesn't fully answer the question, say so and offer to connect them with support.
Be concise and friendly.""",
messages=[{
"role": "user",
"content": f"Customer question: {user_question}\n\nRelevant FAQ context:\n{context}"
}]
)
return response.content[0].text
# Test the bot
questions = [
"How do I change my password?",
"Can I pay with PayPal?",
"Do you offer refunds?"
]
for q in questions:
print(f"Q: {q}")
print(f"A: {smart_faq_bot(q, store)}\n")
What to Build Next
- Migrate to a dedicated vector database (Pinecone, Qdrant, or Chroma) when your document count exceeds 50,000
- Add document metadata filtering so searches only return results from specific categories
- Build a "similar support tickets" feature to help your support team see how past issues were resolved
Related Reading
- How to Build a Multi-Turn Conversation with Claude - Combine embeddings search with conversation context
- How to Configure Claude for JSON Output Mode - Use structured output to return search results with confidence scores
- How to Handle AI API Rate Limits Gracefully - Batch embedding generation hits rate limits fast
Want this system built for your business?
Get a free assessment. We will map every system your business needs and show you the ROI.
Get Your Free Assessment