Systems Library / AI Capabilities / How to Build Hybrid Search for RAG Systems
AI Capabilities rag knowledge

How to Build Hybrid Search for RAG Systems

Combine keyword and semantic search for more reliable RAG retrieval.

Jay Banlasan

Jay Banlasan

The AI Systems Guy

A hybrid search rag system combining keyword and semantic retrieval catches what either approach misses alone. I build these because pure semantic search fails on exact terms like product codes, policy numbers, and technical jargon. Pure keyword search fails on intent. Combining both gives you the best of each.

When someone searches "PO-2847 refund status," keyword search nails the document by PO number while semantic search finds related refund policy context. Together, the answer is complete.

What You Need Before Starting

Step 1: Set Up BM25 Keyword Search

from rank_bm25 import BM25Okapi
import re

class KeywordIndex:
    def __init__(self):
        self.documents = []
        self.doc_ids = []
        self.bm25 = None

    def index(self, documents):
        self.documents = documents
        self.doc_ids = [d["id"] for d in documents]
        tokenized = [self._tokenize(d["content"]) for d in documents]
        self.bm25 = BM25Okapi(tokenized)

    def search(self, query, top_k=10):
        tokens = self._tokenize(query)
        scores = self.bm25.get_scores(tokens)
        top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_k]
        return [{
            "id": self.doc_ids[i],
            "score": float(scores[i]),
            "content": self.documents[i]["content"]
        } for i in top_indices if scores[i] > 0]

    def _tokenize(self, text):
        return re.findall(r'\w+', text.lower())

keyword_index = KeywordIndex()

Step 2: Build the Hybrid Retriever

from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")

def hybrid_search(query, collection, keyword_index, top_k=5, keyword_weight=0.3, semantic_weight=0.7):
    # Semantic search
    query_embedding = model.encode(query).tolist()
    semantic_results = collection.query(query_embeddings=[query_embedding], n_results=top_k * 2)

    semantic_scores = {}
    for i in range(len(semantic_results["ids"][0])):
        doc_id = semantic_results["ids"][0][i]
        semantic_scores[doc_id] = {
            "score": (1 - semantic_results["distances"][0][i]) * semantic_weight,
            "content": semantic_results["documents"][0][i],
            "metadata": semantic_results["metadatas"][0][i]
        }

    # Keyword search
    keyword_results = keyword_index.search(query, top_k=top_k * 2)
    max_kw = max((r["score"] for r in keyword_results), default=1)

    for result in keyword_results:
        doc_id = result["id"]
        normalized_score = (result["score"] / max_kw) * keyword_weight
        if doc_id in semantic_scores:
            semantic_scores[doc_id]["score"] += normalized_score
        else:
            semantic_scores[doc_id] = {"score": normalized_score, "content": result["content"], "metadata": {}}

    ranked = sorted(semantic_scores.items(), key=lambda x: x[1]["score"], reverse=True)
    return [{"id": doc_id, **data} for doc_id, data in ranked[:top_k]]

Step 3: Auto-Detect Query Type

Adjust weights based on whether the query looks like a keyword or natural language question:

def detect_query_type(query):
    has_code = bool(re.search(r'[A-Z]{2,}-\d+|#\d+|\b\d{4,}\b', query))
    has_question_words = any(w in query.lower().split() for w in ["how", "what", "why", "when", "where", "can"])
    word_count = len(query.split())

    if has_code or (word_count <= 3 and not has_question_words):
        return "keyword_heavy"
    elif has_question_words and word_count > 5:
        return "semantic_heavy"
    return "balanced"

WEIGHT_PROFILES = {
    "keyword_heavy": {"keyword": 0.7, "semantic": 0.3},
    "semantic_heavy": {"keyword": 0.2, "semantic": 0.8},
    "balanced": {"keyword": 0.4, "semantic": 0.6},
}

Step 4: Generate Answers from Hybrid Results

import anthropic

client = anthropic.Anthropic()

def answer_hybrid(question, collection, keyword_index):
    query_type = detect_query_type(question)
    weights = WEIGHT_PROFILES[query_type]

    results = hybrid_search(question, collection, keyword_index,
                           keyword_weight=weights["keyword"],
                           semantic_weight=weights["semantic"])

    context = "\n\n".join([r["content"] for r in results])

    response = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=500,
        system="Answer from the provided context only. Cite sources.",
        messages=[{"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}"}]
    )
    return response.content[0].text

Step 5: Evaluate Hybrid vs Pure Approaches

def compare_approaches(test_set, collection, keyword_index):
    results = {"hybrid": 0, "semantic_only": 0, "keyword_only": 0}

    for test in test_set:
        hybrid = answer_hybrid(test["question"], collection, keyword_index)
        # Score each approach against expected answer
        for approach, answer in [("hybrid", hybrid)]:
            if test["expected"].lower() in answer.lower():
                results[approach] += 1

    return {k: round(v / len(test_set) * 100, 1) for k, v in results.items()}

What to Build Next

Add query expansion for the keyword component. When someone searches a specific term, automatically include synonyms and related terms. "PTO policy" should also match "vacation policy" and "time off policy" in keyword search.

Related Reading

Want this system built for your business?

Get a free assessment. We will map every system your business needs and show you the ROI.

Get Your Free Assessment

Related Systems