Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion vector_search/encoders/qdrant_cloud.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging

import litellm
import tiktoken
from django.conf import settings
from qdrant_client import models
Expand All @@ -19,7 +20,9 @@ class QdrantCloudEncoder(BaseEncoder):
def __init__(self, model_name):
self.model_name = model_name
try:
self.token_encoding_name = tiktoken.encoding_name_for_model(model_name)
self.token_encoding_name = tiktoken.encoding_name_for_model(
self.model_short_name()
)
except KeyError:
msg = f"Model {model_name} not found in tiktoken. defaulting to None"
log.warning(msg)
Expand All @@ -42,3 +45,10 @@ def get_embedding(self, texts):
)
for text in texts
]

def dim(self):
"""
Return the dimension of the embeddings
"""
info = litellm.get_model_info(self.model_short_name())
return info["output_vector_size"]
1 change: 1 addition & 0 deletions vector_search/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,7 @@ def vector_search( # noqa: PLR0913
encoder_sparse = sparse_encoder()

search_filter = qdrant_query_conditions(params, collection_name=search_collection)

prefetch_multiplier = settings.VECTOR_HYBRID_SEARCH_PREFETCH_MULTIPLIER
prefetch_max_limit = settings.VECTOR_HYBRID_SEARCH_PREFETCH_MAX_LIMIT
prefetch_limit = min((offset + limit) * prefetch_multiplier, prefetch_max_limit)
Expand Down
Loading