diff --git a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/VectorEmbeddingIntegrationIT.java b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/VectorEmbeddingIntegrationIT.java index 7bd59513ac90..7f4e16749928 100644 --- a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/VectorEmbeddingIntegrationIT.java +++ b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/VectorEmbeddingIntegrationIT.java @@ -253,11 +253,11 @@ void testBatchFingerprintOperations() throws Exception { @Test void testEnsureHybridSearchPipelineCreatesAndUpdates() throws Exception { - vectorService.ensureHybridSearchPipeline(0.6, 0.4); + vectorService.ensureHybridSearchPipeline(0.4, 0.6); Map pipeline = getSearchPipeline(OpenSearchVectorService.HYBRID_PIPELINE_NAME); assertNotNull(pipeline, "Pipeline should exist after creation"); - assertWeightsInPipeline(pipeline, 0.6, 0.4); + assertWeightsInPipeline(pipeline, 0.4, 0.6); vectorService.ensureHybridSearchPipeline(0.3, 0.7); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRepository.java index 802031d4e06e..21e8b1558958 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRepository.java @@ -453,9 +453,9 @@ public void ensureHybridSearchPipeline() { ElasticSearchConfiguration cfg = getSearchConfiguration(); NaturalLanguageSearchConfiguration nlConfig = cfg.getNaturalLanguageSearch(); - double keywordWeight = nlConfig.getKeywordWeight() != null ? nlConfig.getKeywordWeight() : 0.6; + double keywordWeight = nlConfig.getKeywordWeight() != null ? nlConfig.getKeywordWeight() : 0.4; double semanticWeight = - nlConfig.getSemanticWeight() != null ? nlConfig.getSemanticWeight() : 0.4; + nlConfig.getSemanticWeight() != null ? nlConfig.getSemanticWeight() : 0.6; try { SearchSettings ss = diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/OpenSearchVectorService.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/OpenSearchVectorService.java index c7f9a14d5774..70e967962f14 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/OpenSearchVectorService.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/OpenSearchVectorService.java @@ -79,7 +79,7 @@ public void ensureHybridSearchPipeline(double keywordWeight, double semanticWeig MAPPER .createObjectNode() .put("technique", "rrf") - .put("rank_constant", 60) + .put("rank_constant", 30) .set("parameters", MAPPER.createObjectNode().set("weights", weights)); var scoreRanker = MAPPER diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/search/SearchRepositoryBehaviorTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/search/SearchRepositoryBehaviorTest.java index b8178f50c6cc..eb3217c406fe 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/search/SearchRepositoryBehaviorTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/search/SearchRepositoryBehaviorTest.java @@ -1474,7 +1474,7 @@ void initializeVectorSearchServiceInitializesOpenSearchVectorSupport() throws Ex assertSame(embeddingClient, spyRepository.getEmbeddingClient()); assertSame(vectorService, spyRepository.getVectorIndexService()); assertNotNull(spyRepository.getVectorEmbeddingHandler()); - verify(vectorService).ensureHybridSearchPipeline(0.6, 0.4); + verify(vectorService).ensureHybridSearchPipeline(0.4, 0.6); } @Test diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/search/vector/OpenSearchVectorServiceTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/search/vector/OpenSearchVectorServiceTest.java index a4215e08e9af..e894287f0868 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/search/vector/OpenSearchVectorServiceTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/search/vector/OpenSearchVectorServiceTest.java @@ -438,7 +438,7 @@ void testEnsureHybridSearchPipelineSendsCorrectRequest() throws IOException { ArgumentCaptor captor = ArgumentCaptor.forClass(os.org.opensearch.client.opensearch.generic.Request.class); - vectorService.ensureHybridSearchPipeline(0.6, 0.4); + vectorService.ensureHybridSearchPipeline(0.4, 0.6); verify(mockGenericClient).execute(captor.capture()); os.org.opensearch.client.opensearch.generic.Request captured = captor.getValue(); @@ -448,9 +448,9 @@ void testEnsureHybridSearchPipelineSendsCorrectRequest() throws IOException { String body = new String(captured.getBody().get().bodyAsBytes(), java.nio.charset.StandardCharsets.UTF_8); - assertTrue(body.contains("\"weights\":[0.6,0.4]")); + assertTrue(body.contains("\"weights\":[0.4,0.6]")); assertTrue(body.contains("\"technique\":\"rrf\"")); - assertTrue(body.contains("\"rank_constant\":60")); + assertTrue(body.contains("\"rank_constant\":30")); assertTrue(body.contains("\"collapse\"")); assertTrue(body.contains("\"parentId\"")); } diff --git a/openmetadata-spec/src/main/resources/elasticsearch/indexMapping.json b/openmetadata-spec/src/main/resources/elasticsearch/indexMapping.json index ed99152da82c..3927077e0efc 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/indexMapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/indexMapping.json @@ -157,8 +157,7 @@ "parentAliases": [ "storageService", "all", - "dataAsset", - "dataAssetEmbeddings" + "dataAsset" ], "childAliases": [] }, @@ -169,8 +168,7 @@ "parentAliases": [ "driveService", "all", - "dataAsset", - "dataAssetEmbeddings" + "dataAsset" ], "childAliases": [ "file", @@ -185,8 +183,7 @@ "driveService", "directory", "all", - "dataAsset", - "dataAssetEmbeddings" + "dataAsset" ], "childAliases": [] }, @@ -198,8 +195,7 @@ "driveService", "directory", "all", - "dataAsset", - "dataAssetEmbeddings" + "dataAsset" ], "childAliases": [ "worksheet" @@ -213,8 +209,7 @@ "driveService", "spreadsheet", "all", - "dataAsset", - "dataAssetEmbeddings" + "dataAsset" ], "childAliases": [] }, @@ -331,8 +326,7 @@ "parentAliases": [ "classification", "all", - "dataAsset", - "dataAssetEmbeddings" + "dataAsset" ], "childAliases": [] }, diff --git a/openmetadata-spec/src/main/resources/json/schema/configuration/elasticSearchConfiguration.json b/openmetadata-spec/src/main/resources/json/schema/configuration/elasticSearchConfiguration.json index 29284acc31b8..38ca4c3f5779 100644 --- a/openmetadata-spec/src/main/resources/json/schema/configuration/elasticSearchConfiguration.json +++ b/openmetadata-spec/src/main/resources/json/schema/configuration/elasticSearchConfiguration.json @@ -141,12 +141,12 @@ "keywordWeight": { "description": "Weight for BM25 keyword search results in hybrid RRF pipeline (0.0-1.0)", "type": "number", - "default": 0.6 + "default": 0.4 }, "semanticWeight": { "description": "Weight for semantic vector search results in hybrid RRF pipeline (0.0-1.0)", "type": "number", - "default": 0.4 + "default": 0.6 }, "embeddingProvider": { "description": "The provider to use for generating vector embeddings (e.g., bedrock, openai).", diff --git a/openmetadata-spec/src/main/resources/json/schema/configuration/searchSettings.json b/openmetadata-spec/src/main/resources/json/schema/configuration/searchSettings.json index 03ed88fa16d8..bc9cd6393270 100644 --- a/openmetadata-spec/src/main/resources/json/schema/configuration/searchSettings.json +++ b/openmetadata-spec/src/main/resources/json/schema/configuration/searchSettings.json @@ -56,12 +56,12 @@ "keywordWeight": { "description": "Weight for BM25 keyword search in hybrid RRF pipeline (0.0-1.0)", "type": "number", - "default": 0.6 + "default": 0.4 }, "semanticWeight": { "description": "Weight for semantic vector search in hybrid RRF pipeline (0.0-1.0)", "type": "number", - "default": 0.4 + "default": 0.6 } }, "additionalProperties": false