How to Use OpenAI Embeddings for Text Search

Generate and store text embeddings for semantic search applications.

Jay Banlasan

The AI Systems Guy

OpenAI embeddings turn text into numbers in a way that captures meaning. That is what makes openai embeddings text search setup useful: you can search by concept, not just by keyword. A keyword search for "payment issue" misses "I can't complete my purchase." An embeddings-based search finds both because the vectors are close in meaning. I use this for building internal search tools over knowledge bases, finding similar support tickets, and matching leads to relevant case studies.

The core workflow is: generate embeddings for your documents once, store them, then at search time generate an embedding for the query and find the closest document vectors. Cosine similarity is your friend here.

What You Need Before Starting

OpenAI API key configured
openai and numpy packages installed
Some text data to index (support tickets, product docs, FAQs, etc.)
sqlite3 (built into Python) or another storage option

Step 1: Generate Your First Embedding

import os
import numpy as np
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))


def get_embedding(text: str, model: str = "text-embedding-3-small") -> list[float]:
    """
    Generate a vector embedding for the given text.
    
    Args:
        text: Text to embed (max ~8,000 tokens for small, ~8,000 for large)
        model: text-embedding-3-small (cost-efficient) or text-embedding-3-large (better quality)
    
    Returns:
        List of floats representing the text in vector space
    """
    text = text.replace("\n", " ")  # Clean up newlines
    response = client.embeddings.create(input=[text], model=model)
    return response.data[0].embedding


# Test it
embedding = get_embedding("How do I reset my password?")
print(f"Embedding dimensions: {len(embedding)}")  # 1536 for small, 3072 for large
print(f"First 5 values: {embedding[:5]}")

Step 2: Calculate Similarity Between Two Texts

def cosine_similarity(vec1: list[float], vec2: list[float]) -> float:
    """Calculate cosine similarity between two embedding vectors."""
    a = np.array(vec1)
    b = np.array(vec2)
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


# Example: How similar are these phrases?
texts = [
    "How do I reset my password?",
    "I forgot my login credentials",
    "What time does your store close?",
    "How can I change my account password?"
]

embeddings = [get_embedding(t) for t in texts]

# Compare query to all others
query_embedding = embeddings[0]
print(f"Query: '{texts[0]}'\n")
for i in range(1, len(texts)):
    sim = cosine_similarity(query_embedding, embeddings[i])
    print(f"  '{texts[i]}': {sim:.3f}")

You will see that the two password-related phrases score high (above 0.9), while the store hours question scores low (below 0.5).

Step 3: Build a Local Embeddings Store with SQLite

import sqlite3
import json
from pathlib import Path


class EmbeddingsStore:
    """
    Simple local store for text embeddings using SQLite.
    Good for up to 100,000 documents.
    """
    
    def __init__(self, db_path: str = "embeddings.db"):
        self.db_path = db_path
        self.conn = sqlite3.connect(db_path)
        self._create_table()
    
    def _create_table(self):
        self.conn.execute("""
            CREATE TABLE IF NOT EXISTS documents (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                content TEXT NOT NULL,
                metadata TEXT,
                embedding TEXT NOT NULL,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)
        self.conn.commit()
    
    def add(self, content: str, metadata: dict = None) -> int:
        """Add a document and its embedding to the store."""
        embedding = get_embedding(content)
        
        cursor = self.conn.execute(
            "INSERT INTO documents (content, metadata, embedding) VALUES (?, ?, ?)",
            (content, json.dumps(metadata or {}), json.dumps(embedding))
        )
        self.conn.commit()
        return cursor.lastrowid
    
    def add_many(self, documents: list[dict]) -> list[int]:
        """Add multiple documents at once. Each dict needs 'content' key, optional 'metadata'."""
        ids = []
        for doc in documents:
            id_ = self.add(doc["content"], doc.get("metadata", {}))
            ids.append(id_)
        return ids
    
    def search(self, query: str, top_k: int = 5) -> list[dict]:
        """
        Find the most semantically similar documents to the query.
        
        Returns:
            List of dicts with content, metadata, and similarity score
        """
        query_embedding = get_embedding(query)
        query_vec = np.array(query_embedding)
        
        # Load all embeddings (for large datasets, use a vector DB instead)
        cursor = self.conn.execute("SELECT id, content, metadata, embedding FROM documents")
        rows = cursor.fetchall()
        
        results = []
        for row_id, content, metadata_str, embedding_str in rows:
            doc_vec = np.array(json.loads(embedding_str))
            similarity = np.dot(query_vec, doc_vec) / (np.linalg.norm(query_vec) * np.linalg.norm(doc_vec))
            
            results.append({
                "id": row_id,
                "content": content,
                "metadata": json.loads(metadata_str),
                "similarity": float(similarity)
            })
        
        # Sort by similarity descending
        results.sort(key=lambda x: x["similarity"], reverse=True)
        return results[:top_k]
    
    def close(self):
        self.conn.close()

Step 4: Index a Knowledge Base and Search It

# Example: Index an FAQ knowledge base
store = EmbeddingsStore("faq_store.db")

faq_documents = [
    {"content": "To reset your password, click 'Forgot Password' on the login page and enter your email.", 
     "metadata": {"category": "account", "id": "faq-001"}},
    
    {"content": "You can cancel your subscription at any time from Settings > Billing > Cancel Plan.",
     "metadata": {"category": "billing", "id": "faq-002"}},
    
    {"content": "We accept Visa, Mastercard, American Express, and PayPal.",
     "metadata": {"category": "billing", "id": "faq-003"}},
    
    {"content": "Our API rate limit is 1,000 requests per hour on the Pro plan and 5,000 on Enterprise.",
     "metadata": {"category": "api", "id": "faq-004"}},
    
    {"content": "To export your data, go to Settings > Data > Export and choose CSV or JSON format.",
     "metadata": {"category": "data", "id": "faq-005"}},
    
    {"content": "Business hours are Monday through Friday, 9am to 6pm EST. Support tickets answered within 24 hours.",
     "metadata": {"category": "support", "id": "faq-006"}},
]

print("Indexing FAQ documents...")
ids = store.add_many(faq_documents)
print(f"Indexed {len(ids)} documents\n")

# Search it
search_queries = [
    "I forgot my password",
    "How do I stop my plan?",
    "What payment methods do you take?"
]

for query in search_queries:
    print(f"Query: '{query}'")
    results = store.search(query, top_k=2)
    for r in results:
        print(f"  [{r['similarity']:.2f}] {r['content'][:80]}...")
    print()

Step 5: Build a Smart FAQ Bot Using Embeddings + Claude

import anthropic

claude = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))


def smart_faq_bot(user_question: str, store: EmbeddingsStore, threshold: float = 0.75) -> str:
    """
    Answer questions by finding relevant FAQ entries and using Claude to compose a response.
    
    Args:
        user_question: The user's question
        store: Initialized EmbeddingsStore with indexed documents
        threshold: Minimum similarity score to use a document as context
    """
    
    # Find relevant FAQ entries
    results = store.search(user_question, top_k=3)
    relevant = [r for r in results if r["similarity"] >= threshold]
    
    if not relevant:
        return "I don't have specific information about that. Please contact our support team."
    
    # Build context from relevant FAQ entries
    context = "\n\n".join([f"FAQ: {r['content']}" for r in relevant])
    
    # Use Claude to compose a natural response
    response = claude.messages.create(
        model="claude-haiku-20240307",  # Cheap for simple Q&A
        max_tokens=300,
        system="""You are a helpful support agent. Answer the customer's question using only the provided FAQ context.
If the context doesn't fully answer the question, say so and offer to connect them with support.
Be concise and friendly.""",
        messages=[{
            "role": "user",
            "content": f"Customer question: {user_question}\n\nRelevant FAQ context:\n{context}"
        }]
    )
    
    return response.content[0].text


# Test the bot
questions = [
    "How do I change my password?",
    "Can I pay with PayPal?",
    "Do you offer refunds?"
]

for q in questions:
    print(f"Q: {q}")
    print(f"A: {smart_faq_bot(q, store)}\n")

What to Build Next

Migrate to a dedicated vector database (Pinecone, Qdrant, or Chroma) when your document count exceeds 50,000
Add document metadata filtering so searches only return results from specific categories
Build a "similar support tickets" feature to help your support team see how past issues were resolved

How to Use OpenAI Embeddings for Text Search

What You Need Before Starting

Step 1: Generate Your First Embedding

Step 2: Calculate Similarity Between Two Texts

Step 3: Build a Local Embeddings Store with SQLite

Step 4: Index a Knowledge Base and Search It

Step 5: Build a Smart FAQ Bot Using Embeddings + Claude

What to Build Next

Related Reading

Related Systems

How to Set Up Your First Claude API Call

How to Build a Multi-Turn Conversation with Claude

How to Create AI API Keys Securely