From 2f9bbfcb00080b140462406d8506a2f113a134e0 Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Sat, 28 Feb 2026 17:21:17 +0300 Subject: [PATCH] fix: consistent score calculation between search() and batch_search() search() used raw FAISS distance as score while batch_search() used 1/(1+dist) normalization. Same distance value produced different scores depending on which method was called (e.g., 0.1 vs 0.909 for dist=0.1). Align search() to use the same normalization formula as batch_search(). --- src/fuser/knowledge_base/faiss/faiss_retriever.py | 2 +- .../fuser/knowledge_base/faiss/test_faiss_retriever.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/fuser/knowledge_base/faiss/faiss_retriever.py b/src/fuser/knowledge_base/faiss/faiss_retriever.py index b2a8738e4..e59a97695 100644 --- a/src/fuser/knowledge_base/faiss/faiss_retriever.py +++ b/src/fuser/knowledge_base/faiss/faiss_retriever.py @@ -117,7 +117,7 @@ def search(self, query_embedding: np.ndarray, top_k: int = 5) -> list[Document]: if idx < 0 or idx >= len(self.documents): continue doc = self.documents[idx] - score = float(dist) + score = float(1.0 / (1.0 + dist)) results.append( Document(text=doc.text, metadata=doc.metadata.copy(), score=score) ) diff --git a/tests/fuser/knowledge_base/faiss/test_faiss_retriever.py b/tests/fuser/knowledge_base/faiss/test_faiss_retriever.py index 7128b8a3d..b22bf39fd 100644 --- a/tests/fuser/knowledge_base/faiss/test_faiss_retriever.py +++ b/tests/fuser/knowledge_base/faiss/test_faiss_retriever.py @@ -260,12 +260,11 @@ def test_index_is_loaded_on_init(self, mock_faiss_index): mock_load.assert_called_once() def test_score_calculation(self, mock_faiss_index): - """Test that score is calculated correctly from distance.""" + """Test that score is calculated consistently with batch_search.""" index_path, metadata_path, dim, _ = mock_faiss_index retriever = FAISSRetriever(index_path, metadata_path) with patch.object(retriever.index, "search") as mock_search: - # For IndexFlatIP, dist is already cosine similarity mock_search.return_value = ( np.array([[0.95, 0.85, 0.70]], dtype="float32"), np.array([[0, 1, 2]], dtype="int64"), @@ -274,9 +273,10 @@ def test_score_calculation(self, mock_faiss_index): query_embedding = np.random.randn(dim).astype("float32") results = retriever.search(query_embedding, top_k=3) - assert results[0].score == pytest.approx(0.95, rel=1e-5) - assert results[1].score == pytest.approx(0.85, rel=1e-5) - assert results[2].score == pytest.approx(0.70, rel=1e-5) + # Score uses 1/(1+dist) normalization, consistent with batch_search + assert results[0].score == pytest.approx(1.0 / (1.0 + 0.95), rel=1e-5) + assert results[1].score == pytest.approx(1.0 / (1.0 + 0.85), rel=1e-5) + assert results[2].score == pytest.approx(1.0 / (1.0 + 0.70), rel=1e-5) def test_batch_search_returns_independent_results(self, mock_faiss_index): """Test that batch search returns independent result sets."""