Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion integrations/oracle/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: CPython",
]
dependencies = [
"haystack-ai>=2.26.1",
"haystack-ai>=2.27.0",
"oracledb>=2.1.0,<3.0.0",
]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
# SPDX-License-Identifier: Apache-2.0

from haystack_integrations.components.retrievers.oracle.embedding_retriever import OracleEmbeddingRetriever
from haystack_integrations.components.retrievers.oracle.keyword_retriever import OracleKeywordRetriever

__all__ = ["OracleEmbeddingRetriever"]
__all__ = ["OracleEmbeddingRetriever", "OracleKeywordRetriever"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

from typing import Any

from haystack import component, default_from_dict, default_to_dict
from haystack.dataclasses import Document
from haystack.document_stores.types import FilterPolicy
from haystack.document_stores.types.filter_policy import apply_filter_policy

from haystack_integrations.document_stores.oracle import OracleDocumentStore


@component
class OracleKeywordRetriever:
"""
Retrieves documents from an OracleDocumentStore using keyword-based (BM25) similarity.

Requires Oracle Database 23ai and an automatically created DBMS_SEARCH index.

Use inside a Haystack pipeline::

pipeline.add_component("retriever", OracleKeywordRetriever(document_store=store, top_k=5))
"""

def __init__(
self,
*,
document_store: OracleDocumentStore,
filters: dict[str, Any] | None = None,
top_k: int = 10,
filter_policy: FilterPolicy = FilterPolicy.REPLACE,
) -> None:
if not isinstance(document_store, OracleDocumentStore):
msg = "document_store must be an instance of OracleDocumentStore"
raise TypeError(msg)
self.document_store = document_store
self.filters = filters or {}
self.top_k = top_k
self.filter_policy = FilterPolicy.from_str(filter_policy) if isinstance(filter_policy, str) else filter_policy

@component.output_types(documents=list[Document])
def run(
self,
query: str,
filters: dict[str, Any] | None = None,
top_k: int | None = None,
) -> dict[str, list[Document]]:
"""
Retrieve documents by keyword search.

Args:
query: The keyword query string.
filters: Runtime filters, merged with constructor filters according to filter_policy.
top_k: Override the constructor top_k for this call.

Returns:
``{"documents": [Document, ...]}``
"""
filters = apply_filter_policy(self.filter_policy, self.filters, filters)
docs = self.document_store._keyword_retrieval(
query,
filters=filters,
top_k=top_k if top_k is not None else self.top_k,
)
return {"documents": docs}

@component.output_types(documents=list[Document])
async def run_async(
self,
query: str,
filters: dict[str, Any] | None = None,
top_k: int | None = None,
) -> dict[str, list[Document]]:
"""Async variant of :meth:`run`."""
filters = apply_filter_policy(self.filter_policy, self.filters, filters)
docs = await self.document_store._keyword_retrieval_async(
query,
filters=filters,
top_k=top_k if top_k is not None else self.top_k,
)
return {"documents": docs}

def to_dict(self) -> dict[str, Any]:
"""
Serializes the component to a dictionary.

:returns:
Dictionary with serialized data.
"""
return default_to_dict(
self,
document_store=self.document_store.to_dict(),
filters=self.filters,
top_k=self.top_k,
filter_policy=self.filter_policy.value,
)

@classmethod
def from_dict(cls, data: dict[str, Any]) -> "OracleKeywordRetriever":
"""
Deserializes the component from a dictionary.

:param data:
Dictionary to deserialize from.
:returns:
Deserialized component.
"""
params = data.get("init_parameters", {})
if "document_store" in params:
params["document_store"] = OracleDocumentStore.from_dict(params["document_store"])
if filter_policy := params.get("filter_policy"):
params["filter_policy"] = FilterPolicy.from_str(filter_policy)
return default_from_dict(cls, data)
Loading
Loading