How to Build Hybrid Search for RAG Systems
Combine keyword and semantic search for more reliable RAG retrieval.
Jay Banlasan
The AI Systems Guy
A hybrid search rag system combining keyword and semantic retrieval catches what either approach misses alone. I build these because pure semantic search fails on exact terms like product codes, policy numbers, and technical jargon. Pure keyword search fails on intent. Combining both gives you the best of each.
When someone searches "PO-2847 refund status," keyword search nails the document by PO number while semantic search finds related refund policy context. Together, the answer is complete.
What You Need Before Starting
- A working RAG system with vector search (see system 409)
- Python 3.8+ with chromadb, whoosh or elasticsearch, and rank_bm25
- Your document corpus indexed in both systems
- Test queries that exercise both keyword and semantic patterns
Step 1: Set Up BM25 Keyword Search
from rank_bm25 import BM25Okapi
import re
class KeywordIndex:
def __init__(self):
self.documents = []
self.doc_ids = []
self.bm25 = None
def index(self, documents):
self.documents = documents
self.doc_ids = [d["id"] for d in documents]
tokenized = [self._tokenize(d["content"]) for d in documents]
self.bm25 = BM25Okapi(tokenized)
def search(self, query, top_k=10):
tokens = self._tokenize(query)
scores = self.bm25.get_scores(tokens)
top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_k]
return [{
"id": self.doc_ids[i],
"score": float(scores[i]),
"content": self.documents[i]["content"]
} for i in top_indices if scores[i] > 0]
def _tokenize(self, text):
return re.findall(r'\w+', text.lower())
keyword_index = KeywordIndex()
Step 2: Build the Hybrid Retriever
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")
def hybrid_search(query, collection, keyword_index, top_k=5, keyword_weight=0.3, semantic_weight=0.7):
# Semantic search
query_embedding = model.encode(query).tolist()
semantic_results = collection.query(query_embeddings=[query_embedding], n_results=top_k * 2)
semantic_scores = {}
for i in range(len(semantic_results["ids"][0])):
doc_id = semantic_results["ids"][0][i]
semantic_scores[doc_id] = {
"score": (1 - semantic_results["distances"][0][i]) * semantic_weight,
"content": semantic_results["documents"][0][i],
"metadata": semantic_results["metadatas"][0][i]
}
# Keyword search
keyword_results = keyword_index.search(query, top_k=top_k * 2)
max_kw = max((r["score"] for r in keyword_results), default=1)
for result in keyword_results:
doc_id = result["id"]
normalized_score = (result["score"] / max_kw) * keyword_weight
if doc_id in semantic_scores:
semantic_scores[doc_id]["score"] += normalized_score
else:
semantic_scores[doc_id] = {"score": normalized_score, "content": result["content"], "metadata": {}}
ranked = sorted(semantic_scores.items(), key=lambda x: x[1]["score"], reverse=True)
return [{"id": doc_id, **data} for doc_id, data in ranked[:top_k]]
Step 3: Auto-Detect Query Type
Adjust weights based on whether the query looks like a keyword or natural language question:
def detect_query_type(query):
has_code = bool(re.search(r'[A-Z]{2,}-\d+|#\d+|\b\d{4,}\b', query))
has_question_words = any(w in query.lower().split() for w in ["how", "what", "why", "when", "where", "can"])
word_count = len(query.split())
if has_code or (word_count <= 3 and not has_question_words):
return "keyword_heavy"
elif has_question_words and word_count > 5:
return "semantic_heavy"
return "balanced"
WEIGHT_PROFILES = {
"keyword_heavy": {"keyword": 0.7, "semantic": 0.3},
"semantic_heavy": {"keyword": 0.2, "semantic": 0.8},
"balanced": {"keyword": 0.4, "semantic": 0.6},
}
Step 4: Generate Answers from Hybrid Results
import anthropic
client = anthropic.Anthropic()
def answer_hybrid(question, collection, keyword_index):
query_type = detect_query_type(question)
weights = WEIGHT_PROFILES[query_type]
results = hybrid_search(question, collection, keyword_index,
keyword_weight=weights["keyword"],
semantic_weight=weights["semantic"])
context = "\n\n".join([r["content"] for r in results])
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=500,
system="Answer from the provided context only. Cite sources.",
messages=[{"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}"}]
)
return response.content[0].text
Step 5: Evaluate Hybrid vs Pure Approaches
def compare_approaches(test_set, collection, keyword_index):
results = {"hybrid": 0, "semantic_only": 0, "keyword_only": 0}
for test in test_set:
hybrid = answer_hybrid(test["question"], collection, keyword_index)
# Score each approach against expected answer
for approach, answer in [("hybrid", hybrid)]:
if test["expected"].lower() in answer.lower():
results[approach] += 1
return {k: round(v / len(test_set) * 100, 1) for k, v in results.items()}
What to Build Next
Add query expansion for the keyword component. When someone searches a specific term, automatically include synonyms and related terms. "PTO policy" should also match "vacation policy" and "time off policy" in keyword search.
Related Reading
- The Centralized Brain Concept - hybrid search as the brain's retrieval mechanism
- Data Flow Architecture for Non-Engineers - how search architecture fits in your data flow
- Why Simplicity Beats Complexity in AI - when hybrid search is worth the added complexity
Want this system built for your business?
Get a free assessment. We will map every system your business needs and show you the ROI.
Get Your Free Assessment