diff --git a/ingestion/src/metadata/data_quality/api/models.py b/ingestion/src/metadata/data_quality/api/models.py index 33bbaa2c186a..56336726a697 100644 --- a/ingestion/src/metadata/data_quality/api/models.py +++ b/ingestion/src/metadata/data_quality/api/models.py @@ -22,7 +22,7 @@ from metadata.config.common import ConfigModel from metadata.generated.schema.api.tests.createTestSuite import CreateTestSuiteRequest -from metadata.generated.schema.entity.data.table import Table +from metadata.generated.schema.entity.data.table import Table, TableData from metadata.generated.schema.entity.services.databaseService import DatabaseConnection from metadata.generated.schema.tests.basic import TestCaseResult from metadata.generated.schema.tests.testCase import TestCase, TestCaseParameterValue @@ -51,6 +51,9 @@ class TestSuiteProcessorConfig(ConfigModel): class TestCaseResultResponse(BaseModel): testCaseResult: TestCaseResult testCase: TestCase + failedRowsSample: Optional[TableData] = None + inspectionQuery: Optional[str] = None + validateColumns: bool = True class TableAndTests(BaseModel): diff --git a/ingestion/src/metadata/data_quality/interface/test_suite_interface.py b/ingestion/src/metadata/data_quality/interface/test_suite_interface.py index 3f82f09dd63a..f309111eee2e 100644 --- a/ingestion/src/metadata/data_quality/interface/test_suite_interface.py +++ b/ingestion/src/metadata/data_quality/interface/test_suite_interface.py @@ -17,6 +17,7 @@ from abc import ABC, abstractmethod from typing import Optional, Set, Type +from metadata.data_quality.api.models import TestCaseResultResponse from metadata.data_quality.builders.validator_builder import ValidatorBuilder from metadata.data_quality.validations.base_test_handler import BaseTestValidator from metadata.data_quality.validations.runtime_param_setter.param_setter import ( @@ -27,7 +28,7 @@ ) from metadata.generated.schema.entity.data.table import Table from metadata.generated.schema.entity.services.databaseService import DatabaseConnection -from metadata.generated.schema.tests.basic import TestCaseResult, TestCaseStatus +from metadata.generated.schema.tests.basic import TestCaseStatus from metadata.generated.schema.tests.testCase import TestCase from metadata.generated.schema.tests.testDefinition import TestDefinition from metadata.ingestion.ometa.ometa_api import OpenMetadata @@ -108,7 +109,7 @@ def _set_runtime_params_setter_fact( """ cls.runtime_params_setter_fact = class_fact - def run_test_case(self, test_case: TestCase) -> Optional[TestCaseResult]: + def run_test_case(self, test_case: TestCase) -> Optional[TestCaseResultResponse]: """run column data quality tests""" runtime_params_setter_fact: RuntimeParameterSetterFactory = ( self._get_runtime_params_setter_fact() @@ -132,17 +133,25 @@ def run_test_case(self, test_case: TestCase) -> Optional[TestCaseResult]: validator_builder.set_runtime_params(runtime_params_setters) validator: BaseTestValidator = validator_builder.validator try: - return validator.run_validation() + test_result = validator.run_validation() + response = TestCaseResultResponse( + testCaseResult=test_result, testCase=test_case + ) + validator.result_with_failed_samples(response) + return response except Exception as err: message = ( f"Error executing {test_case.testDefinition.fullyQualifiedName} - {err}" ) logger.exception(message) - return validator.get_test_case_result_object( - validator.execution_date, - TestCaseStatus.Aborted, - message, - [], + return TestCaseResultResponse( + testCase=test_case, + testCaseResult=validator.get_test_case_result_object( + validator.execution_date, + TestCaseStatus.Aborted, + message, + [], + ), ) def _get_table_config(self): diff --git a/ingestion/src/metadata/data_quality/runner/core.py b/ingestion/src/metadata/data_quality/runner/core.py index 914e57d8b974..ec487a337be0 100644 --- a/ingestion/src/metadata/data_quality/runner/core.py +++ b/ingestion/src/metadata/data_quality/runner/core.py @@ -14,7 +14,6 @@ """ -from metadata.data_quality.api.models import TestCaseResultResponse from metadata.data_quality.interface.test_suite_interface import TestSuiteInterface from metadata.generated.schema.tests.testCase import TestCase from metadata.utils.logger import test_suite_logger @@ -34,12 +33,8 @@ def run_and_handle(self, test_case: TestCase): f"Executing test case {test_case.name.root} " f"for entity {self.test_runner_interface.table_entity.fullyQualifiedName.root}" ) - test_result = self.test_runner_interface.run_test_case( + result = self.test_runner_interface.run_test_case( test_case, ) - if test_result: - return TestCaseResultResponse( - testCaseResult=test_result, testCase=test_case - ) - return None + return result diff --git a/ingestion/src/metadata/data_quality/validations/base_test_handler.py b/ingestion/src/metadata/data_quality/validations/base_test_handler.py index 37c9a865741e..f1b4a57e2fee 100644 --- a/ingestion/src/metadata/data_quality/validations/base_test_handler.py +++ b/ingestion/src/metadata/data_quality/validations/base_test_handler.py @@ -32,6 +32,7 @@ from pydantic import BaseModel +from metadata.data_quality.api.models import TestCaseResultResponse from metadata.data_quality.validations import utils from metadata.data_quality.validations.impact_score import ( DEFAULT_TOP_DIMENSIONS, @@ -188,6 +189,13 @@ def run_validation(self) -> TestCaseResult: return test_result + def result_with_failed_samples(self, result: TestCaseResultResponse) -> None: + """Hook for failed row sampling. No-op by default. + + Overridden by FailedSampleValidatorMixin to fetch and stash + failed row samples on the validator instance. + """ + @abstractmethod def _run_validation(self) -> TestCaseResult: """Execute the specific test validation logic diff --git a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValueLengthsToBeBetween.py b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValueLengthsToBeBetween.py index facd5a603d4b..18a0aac2ec1b 100644 --- a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValueLengthsToBeBetween.py +++ b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValueLengthsToBeBetween.py @@ -28,10 +28,17 @@ BaseColumnValueLengthsToBeBetweenValidator, ) from metadata.data_quality.validations.impact_score import calculate_impact_score_pandas +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + PandasFailedRowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.pandas_validator_mixin import ( PandasValidatorMixin, aggregate_others_statistical_pandas, ) +from metadata.generated.schema.entity.data.table import TableData from metadata.generated.schema.tests.dimensionResult import DimensionResult from metadata.profiler.metrics.registry import Metrics from metadata.utils.logger import test_suite_logger @@ -41,7 +48,10 @@ class ColumnValueLengthsToBeBetweenValidator( - BaseColumnValueLengthsToBeBetweenValidator, PandasValidatorMixin + FailedSampleValidatorMixin, + BaseColumnValueLengthsToBeBetweenValidator, + PandasValidatorMixin, + PandasFailedRowSamplerMixin, ): """Validator for column value lengths to be between test case""" @@ -238,3 +248,21 @@ def compute_row_count(self, column: SQALikeColumn, min_bound: int, max_bound: in ) return row_count, failed_rows + + def filter(self): + min_bound = self.get_min_bound("minLength") + max_bound = self.get_max_bound("maxLength") + filters = [] + if min_bound is not None and min_bound > float("-inf"): + filters.append( + f"{self.get_column().name}.astype('str').str.len() < {min_bound}" + ) + if max_bound is not None and max_bound < float("inf"): + filters.append( + f"{self.get_column().name}.astype('str').str.len() > {max_bound}" + ) + return " or ".join(filters) + + def fetch_failed_rows_sample(self): + cols, rows = self._get_failed_rows_sample() + return TableData(columns=cols, rows=rows) diff --git a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeBetween.py b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeBetween.py index 9aa2fc6d1015..6bddcae468f8 100644 --- a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeBetween.py +++ b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeBetween.py @@ -14,6 +14,7 @@ """ from collections import defaultdict +from datetime import datetime from typing import List, Optional, cast import pandas as pd @@ -27,20 +28,32 @@ BaseColumnValuesToBeBetweenValidator, ) from metadata.data_quality.validations.impact_score import calculate_impact_score_pandas +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + PandasFailedRowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.pandas_validator_mixin import ( PandasValidatorMixin, aggregate_others_statistical_pandas, ) +from metadata.generated.schema.entity.data.table import TableData from metadata.generated.schema.tests.dimensionResult import DimensionResult from metadata.profiler.metrics.registry import Metrics +from metadata.profiler.orm.registry import is_date_time from metadata.utils.logger import test_suite_logger from metadata.utils.sqa_like_column import SQALikeColumn +from metadata.utils.time_utils import convert_timestamp logger = test_suite_logger() class ColumnValuesToBeBetweenValidator( - BaseColumnValuesToBeBetweenValidator, PandasValidatorMixin + FailedSampleValidatorMixin, + BaseColumnValuesToBeBetweenValidator, + PandasValidatorMixin, + PandasFailedRowSamplerMixin, ): """Validator for column values to be between test case""" @@ -237,3 +250,34 @@ def compute_row_count(self, column: SQALikeColumn, min_bound: int, max_bound: in ) return row_count, failed_rows + + def filter(self): + column = self.get_column() + if is_date_time(column.type): + min_bound = self.get_test_case_param_value( + self.test_case.parameterValues, + "minValue", + type_=datetime.fromtimestamp, + default=datetime.min, + pre_processor=convert_timestamp, + ) + max_bound = self.get_test_case_param_value( + self.test_case.parameterValues, + "maxValue", + type_=datetime.fromtimestamp, + default=datetime.max, + pre_processor=convert_timestamp, + ) + else: + min_bound = self.get_min_bound("minValue") + max_bound = self.get_max_bound("maxValue") + filters = [] + if min_bound is not None: + filters.append(f"{column.name} < {min_bound}") + if max_bound is not None: + filters.append(f"{column.name} > {max_bound}") + return " or ".join(filters) + + def fetch_failed_rows_sample(self): + cols, rows = self._get_failed_rows_sample() + return TableData(columns=cols, rows=rows) diff --git a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeInSet.py b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeInSet.py index 504a23e41463..d85ba621f152 100644 --- a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeInSet.py +++ b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeInSet.py @@ -13,6 +13,7 @@ Validator for column value to be in set test case """ +from ast import literal_eval from collections import defaultdict from typing import List, Optional, cast @@ -27,10 +28,17 @@ BaseColumnValuesToBeInSetValidator, ) from metadata.data_quality.validations.impact_score import calculate_impact_score_pandas +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + PandasFailedRowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.pandas_validator_mixin import ( PandasValidatorMixin, aggregate_others_pandas, ) +from metadata.generated.schema.entity.data.table import TableData from metadata.generated.schema.tests.dimensionResult import DimensionResult from metadata.profiler.metrics.core import add_props from metadata.profiler.metrics.registry import Metrics @@ -41,7 +49,10 @@ class ColumnValuesToBeInSetValidator( - BaseColumnValuesToBeInSetValidator, PandasValidatorMixin + FailedSampleValidatorMixin, + BaseColumnValuesToBeInSetValidator, + PandasValidatorMixin, + PandasFailedRowSamplerMixin, ): """Validator for column value to be in set test case""" @@ -196,3 +207,15 @@ def compute_row_count(self, column: SQALikeColumn): NotImplementedError: """ return self._compute_row_count(self.runner, column) + + def filter(self): + items = self.get_test_case_param_value( + self.test_case.parameterValues, + "allowedValues", + literal_eval, + ) + return f"~{self.get_column().name}.isin({items})" + + def fetch_failed_rows_sample(self): + cols, rows = self._get_failed_rows_sample() + return TableData(columns=cols, rows=rows) diff --git a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeNotInSet.py b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeNotInSet.py index 60ce1696e5aa..5f51bd622655 100644 --- a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeNotInSet.py +++ b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeNotInSet.py @@ -13,6 +13,7 @@ Validator for column value to be not in set test case """ +from ast import literal_eval from collections import defaultdict from typing import List, Optional, cast @@ -27,10 +28,17 @@ BaseColumnValuesToBeNotInSetValidator, ) from metadata.data_quality.validations.impact_score import calculate_impact_score_pandas +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + PandasFailedRowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.pandas_validator_mixin import ( PandasValidatorMixin, aggregate_others_pandas, ) +from metadata.generated.schema.entity.data.table import TableData from metadata.generated.schema.tests.dimensionResult import DimensionResult from metadata.profiler.metrics.core import add_props from metadata.profiler.metrics.registry import Metrics @@ -41,7 +49,10 @@ class ColumnValuesToBeNotInSetValidator( - BaseColumnValuesToBeNotInSetValidator, PandasValidatorMixin + FailedSampleValidatorMixin, + BaseColumnValuesToBeNotInSetValidator, + PandasValidatorMixin, + PandasFailedRowSamplerMixin, ): """Validator for column value to be not in set test case""" @@ -182,3 +193,15 @@ def compute_row_count(self, column: SQALikeColumn): NotImplementedError: """ return self._compute_row_count(self.runner, column) + + def filter(self): + items = self.get_test_case_param_value( + self.test_case.parameterValues, + "forbiddenValues", + literal_eval, + ) + return f"{self.get_column().name}.isin({items})" + + def fetch_failed_rows_sample(self): + cols, rows = self._get_failed_rows_sample() + return TableData(columns=cols, rows=rows) diff --git a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeNotNull.py b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeNotNull.py index d350966b2186..aab7629432e3 100644 --- a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeNotNull.py +++ b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeNotNull.py @@ -27,10 +27,17 @@ BaseColumnValuesToBeNotNullValidator, ) from metadata.data_quality.validations.impact_score import calculate_impact_score_pandas +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + PandasFailedRowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.pandas_validator_mixin import ( PandasValidatorMixin, aggregate_others_pandas, ) +from metadata.generated.schema.entity.data.table import TableData from metadata.generated.schema.tests.dimensionResult import DimensionResult from metadata.profiler.metrics.registry import Metrics from metadata.utils.logger import test_suite_logger @@ -40,7 +47,10 @@ class ColumnValuesToBeNotNullValidator( - BaseColumnValuesToBeNotNullValidator, PandasValidatorMixin + FailedSampleValidatorMixin, + BaseColumnValuesToBeNotNullValidator, + PandasValidatorMixin, + PandasFailedRowSamplerMixin, ): """Validator for column values to be not null test case""" @@ -173,3 +183,10 @@ def compute_row_count(self, column: SQALikeColumn): NotImplementedError: """ return self._compute_row_count(self.runner, column) + + def filter(self): + return f"{self.get_column().name}.isnull()" + + def fetch_failed_rows_sample(self): + cols, rows = self._get_failed_rows_sample() + return TableData(columns=cols, rows=rows) diff --git a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeUnique.py b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeUnique.py index ac66976033fb..20758009ac87 100644 --- a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeUnique.py +++ b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeUnique.py @@ -28,10 +28,17 @@ BaseColumnValuesToBeUniqueValidator, ) from metadata.data_quality.validations.impact_score import calculate_impact_score_pandas +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + PandasFailedRowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.pandas_validator_mixin import ( PandasValidatorMixin, aggregate_others_statistical_pandas, ) +from metadata.generated.schema.entity.data.table import TableData from metadata.generated.schema.tests.dimensionResult import DimensionResult from metadata.profiler.metrics.registry import Metrics from metadata.utils.sqa_like_column import SQALikeColumn @@ -42,7 +49,10 @@ class ColumnValuesToBeUniqueValidator( - BaseColumnValuesToBeUniqueValidator, PandasValidatorMixin + FailedSampleValidatorMixin, + BaseColumnValuesToBeUniqueValidator, + PandasValidatorMixin, + PandasFailedRowSamplerMixin, ): """Validator for column values to be unique test case""" @@ -210,3 +220,13 @@ def calculate_failed_count_from_metrics( logger.debug("Full error details: ", exc_info=True) return dimension_results + + def filter(self): + vcs = [df[self.get_column().name].value_counts() for df in self.runner] + sums = sum(vcs) + non_unique = sums[sums > 1].index.tolist() + return f"{self.get_column().name}.isin({non_unique})" + + def fetch_failed_rows_sample(self): + cols, rows = self._get_failed_rows_sample() + return TableData(columns=cols, rows=rows) diff --git a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToMatchRegex.py b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToMatchRegex.py index 910c690afcab..eaf00026fab3 100644 --- a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToMatchRegex.py +++ b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToMatchRegex.py @@ -27,10 +27,17 @@ BaseColumnValuesToMatchRegexValidator, ) from metadata.data_quality.validations.impact_score import calculate_impact_score_pandas +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + PandasFailedRowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.pandas_validator_mixin import ( PandasValidatorMixin, aggregate_others_pandas, ) +from metadata.generated.schema.entity.data.table import TableData from metadata.generated.schema.tests.dimensionResult import DimensionResult from metadata.profiler.metrics.core import add_props from metadata.profiler.metrics.registry import Metrics @@ -41,7 +48,10 @@ class ColumnValuesToMatchRegexValidator( - BaseColumnValuesToMatchRegexValidator, PandasValidatorMixin + FailedSampleValidatorMixin, + BaseColumnValuesToMatchRegexValidator, + PandasValidatorMixin, + PandasFailedRowSamplerMixin, ): """Validator for column values to match regex test case""" @@ -200,3 +210,15 @@ def compute_row_count(self, column: SQALikeColumn): NotImplementedError: """ return self._compute_row_count(self.runner, column) + + def filter(self): + expression = self.get_test_case_param_value( + self.test_case.parameterValues, + "regex", + str, + ) + return f"~{self.get_column().name}.astype('str').str.contains('{expression}')" + + def fetch_failed_rows_sample(self): + cols, rows = self._get_failed_rows_sample() + return TableData(columns=cols, rows=rows) diff --git a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToNotMatchRegex.py b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToNotMatchRegex.py index 89f105c39c36..489a462e564c 100644 --- a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToNotMatchRegex.py +++ b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToNotMatchRegex.py @@ -27,10 +27,17 @@ BaseColumnValuesToNotMatchRegexValidator, ) from metadata.data_quality.validations.impact_score import calculate_impact_score_pandas +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + PandasFailedRowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.pandas_validator_mixin import ( PandasValidatorMixin, aggregate_others_pandas, ) +from metadata.generated.schema.entity.data.table import TableData from metadata.generated.schema.tests.dimensionResult import DimensionResult from metadata.profiler.metrics.core import add_props from metadata.profiler.metrics.registry import Metrics @@ -41,7 +48,10 @@ class ColumnValuesToNotMatchRegexValidator( - BaseColumnValuesToNotMatchRegexValidator, PandasValidatorMixin + FailedSampleValidatorMixin, + BaseColumnValuesToNotMatchRegexValidator, + PandasValidatorMixin, + PandasFailedRowSamplerMixin, ): """Validator for column values to not match regex test case""" @@ -184,3 +194,15 @@ def compute_row_count(self, column: SQALikeColumn): NotImplementedError: """ return self._compute_row_count(self.runner, column) + + def filter(self): + expression = self.get_test_case_param_value( + self.test_case.parameterValues, + "forbiddenRegex", + str, + ) + return f"{self.get_column().name}.astype('str').str.contains('{expression}')" + + def fetch_failed_rows_sample(self): + cols, rows = self._get_failed_rows_sample() + return TableData(columns=cols, rows=rows) diff --git a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValueLengthsToBeBetween.py b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValueLengthsToBeBetween.py index 62ec7f42dfc7..d5a2bc961993 100644 --- a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValueLengthsToBeBetween.py +++ b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValueLengthsToBeBetween.py @@ -24,9 +24,16 @@ from metadata.data_quality.validations.column.base.columnValueLengthsToBeBetween import ( BaseColumnValueLengthsToBeBetweenValidator, ) +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + SQARowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.sqa_validator_mixin import ( SQAValidatorMixin, ) +from metadata.generated.schema.entity.data.table import TableData from metadata.generated.schema.tests.dimensionResult import DimensionResult from metadata.profiler.metrics.registry import Metrics from metadata.profiler.orm.functions.length import LenFn @@ -36,7 +43,10 @@ class ColumnValueLengthsToBeBetweenValidator( - BaseColumnValueLengthsToBeBetweenValidator, SQAValidatorMixin + FailedSampleValidatorMixin, + BaseColumnValueLengthsToBeBetweenValidator, + SQAValidatorMixin, + SQARowSamplerMixin, ): """Validator for column value length to be between test case""" @@ -147,3 +157,20 @@ def _execute_dimensional_validation( logger.debug("Full error details: ", exc_info=True) return dimension_results + + def filter(self): + min_bound = self.get_min_bound("minLength") + max_bound = self.get_max_bound("maxLength") + filters = [] + if min_bound is not None and min_bound > float("-inf"): + filters.append((LenFn(self.get_column()), "lt", min_bound)) + if max_bound is not None and max_bound < float("inf"): + filters.append((LenFn(self.get_column()), "gt", max_bound)) + return { + "filters": filters, + "or_filter": True, + } + + def fetch_failed_rows_sample(self): + cols, rows = self._get_failed_rows_sample() + return TableData(columns=cols, rows=rows) diff --git a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeBetween.py b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeBetween.py index 046813890d81..2b74348d866a 100644 --- a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeBetween.py +++ b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeBetween.py @@ -13,6 +13,7 @@ Validator for column values to be between test case """ import math +from datetime import datetime from typing import List, Optional from sqlalchemy import Column @@ -24,18 +25,30 @@ from metadata.data_quality.validations.column.base.columnValuesToBeBetween import ( BaseColumnValuesToBeBetweenValidator, ) +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + SQARowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.sqa_validator_mixin import ( SQAValidatorMixin, ) +from metadata.generated.schema.entity.data.table import TableData from metadata.generated.schema.tests.dimensionResult import DimensionResult from metadata.profiler.metrics.registry import Metrics +from metadata.profiler.orm.registry import is_date_time from metadata.utils.logger import test_suite_logger +from metadata.utils.time_utils import convert_timestamp logger = test_suite_logger() class ColumnValuesToBeBetweenValidator( - BaseColumnValuesToBeBetweenValidator, SQAValidatorMixin + FailedSampleValidatorMixin, + BaseColumnValuesToBeBetweenValidator, + SQAValidatorMixin, + SQARowSamplerMixin, ): """Validator for column values to be between test case""" @@ -146,3 +159,38 @@ def compute_row_count(self, column: Column, min_bound: int, max_bound: int): ) return row_count, failed_rows + + def filter(self): + column = self.get_column() + if is_date_time(column.type): + min_bound = self.get_test_case_param_value( + self.test_case.parameterValues, # type: ignore + "minValue", + type_=datetime.fromtimestamp, + default=datetime.min, + pre_processor=convert_timestamp, + ) + max_bound = self.get_test_case_param_value( + self.test_case.parameterValues, # type: ignore + "maxValue", + type_=datetime.fromtimestamp, + default=datetime.max, + pre_processor=convert_timestamp, + ) + else: + min_bound = self.get_min_bound("minValue") + max_bound = self.get_max_bound("maxValue") + + filters = [] + if min_bound is not None: + filters.append((column, "lt", min_bound)) + if max_bound is not None: + filters.append((column, "gt", max_bound)) + return { + "filters": filters, + "or_filter": True, + } + + def fetch_failed_rows_sample(self): + cols, rows = self._get_failed_rows_sample() + return TableData(columns=cols, rows=rows) diff --git a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeInSet.py b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeInSet.py index 9f0f8157b8dd..30b0c92068d3 100644 --- a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeInSet.py +++ b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeInSet.py @@ -13,6 +13,7 @@ Validator for column value to be in set test case """ +from ast import literal_eval from typing import List, Optional from sqlalchemy import Column, literal @@ -24,9 +25,16 @@ from metadata.data_quality.validations.column.base.columnValuesToBeInSet import ( BaseColumnValuesToBeInSetValidator, ) +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + SQARowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.sqa_validator_mixin import ( SQAValidatorMixin, ) +from metadata.generated.schema.entity.data.table import TableData from metadata.generated.schema.tests.dimensionResult import DimensionResult from metadata.profiler.metrics.registry import Metrics from metadata.utils.logger import test_suite_logger @@ -35,7 +43,10 @@ class ColumnValuesToBeInSetValidator( - BaseColumnValuesToBeInSetValidator, SQAValidatorMixin + FailedSampleValidatorMixin, + BaseColumnValuesToBeInSetValidator, + SQAValidatorMixin, + SQARowSamplerMixin, ): """Validator for column value to be in set test case""" @@ -133,3 +144,24 @@ def compute_row_count(self, column: Column): NotImplementedError: """ return self._compute_row_count(self.runner, column) + + def filter(self): + items = self.get_test_case_param_value( + self.test_case.parameterValues, # type: ignore + "allowedValues", + literal_eval, + ) + return { + "filters": [ + ( + self.get_column(), + "notin", + items, + ) + ], + "or_filter": False, + } + + def fetch_failed_rows_sample(self): + cols, rows = self._get_failed_rows_sample() + return TableData(columns=cols, rows=rows) diff --git a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotInSet.py b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotInSet.py index db8cc526f98b..364bd7896814 100644 --- a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotInSet.py +++ b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotInSet.py @@ -13,6 +13,7 @@ Validator for column value to be not in set test case """ +from ast import literal_eval from typing import List, Optional from sqlalchemy import Column @@ -24,9 +25,16 @@ from metadata.data_quality.validations.column.base.columnValuesToBeNotInSet import ( BaseColumnValuesToBeNotInSetValidator, ) +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + SQARowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.sqa_validator_mixin import ( SQAValidatorMixin, ) +from metadata.generated.schema.entity.data.table import TableData from metadata.generated.schema.tests.dimensionResult import DimensionResult from metadata.profiler.metrics.registry import Metrics from metadata.utils.logger import test_suite_logger @@ -35,7 +43,10 @@ class ColumnValuesToBeNotInSetValidator( - BaseColumnValuesToBeNotInSetValidator, SQAValidatorMixin + FailedSampleValidatorMixin, + BaseColumnValuesToBeNotInSetValidator, + SQAValidatorMixin, + SQARowSamplerMixin, ): """Validator for column value to be not in set test case""" @@ -121,3 +132,18 @@ def compute_row_count(self, column: Column): NotImplementedError: """ return self._compute_row_count(self.runner, column) + + def filter(self): + items = self.get_test_case_param_value( + self.test_case.parameterValues, # type: ignore + "forbiddenValues", + literal_eval, + ) + return { + "filters": [(self.get_column(), "in", items)], + "or_filter": False, + } + + def fetch_failed_rows_sample(self): + cols, rows = self._get_failed_rows_sample() + return TableData(columns=cols, rows=rows) diff --git a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotNull.py b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotNull.py index 014b42a70c71..7e5a0ce679b1 100644 --- a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotNull.py +++ b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotNull.py @@ -24,9 +24,16 @@ from metadata.data_quality.validations.column.base.columnValuesToBeNotNull import ( BaseColumnValuesToBeNotNullValidator, ) +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + SQARowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.sqa_validator_mixin import ( SQAValidatorMixin, ) +from metadata.generated.schema.entity.data.table import TableData from metadata.generated.schema.tests.dimensionResult import DimensionResult from metadata.profiler.metrics.registry import Metrics from metadata.utils.logger import test_suite_logger @@ -35,7 +42,10 @@ class ColumnValuesToBeNotNullValidator( - BaseColumnValuesToBeNotNullValidator, SQAValidatorMixin + FailedSampleValidatorMixin, + BaseColumnValuesToBeNotNullValidator, + SQAValidatorMixin, + SQARowSamplerMixin, ): """Validator for column values to be not null test case""" @@ -116,3 +126,13 @@ def compute_row_count(self, column: Column): NotImplementedError: """ return self._compute_row_count(self.runner, column) + + def filter(self): + return { + "filters": [(self.get_column(), "eq", None)], + "or_filter": False, + } + + def fetch_failed_rows_sample(self): + cols, rows = self._get_failed_rows_sample() + return TableData(columns=cols, rows=rows) diff --git a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeUnique.py b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeUnique.py index 4a292baf7d6c..c6939019abdc 100644 --- a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeUnique.py +++ b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeUnique.py @@ -14,9 +14,9 @@ """ import logging -from typing import List, Optional +from typing import List, Optional, cast -from sqlalchemy import Column, case, func, literal_column, select +from sqlalchemy import Column, case, func, inspect, literal_column, select from sqlalchemy.exc import SQLAlchemyError from metadata.data_quality.validations.base_test_handler import ( @@ -26,19 +26,30 @@ from metadata.data_quality.validations.column.base.columnValuesToBeUnique import ( BaseColumnValuesToBeUniqueValidator, ) +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + SQARowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.sqa_validator_mixin import ( SQAValidatorMixin, ) +from metadata.generated.schema.entity.data.table import TableData from metadata.generated.schema.tests.dimensionResult import DimensionResult from metadata.profiler.metrics.registry import Metrics from metadata.profiler.orm.functions.unique_count import _unique_count_dimensional_cte from metadata.profiler.orm.registry import Dialects +from metadata.profiler.processor.runner import QueryRunner logger = logging.getLogger(__name__) class ColumnValuesToBeUniqueValidator( - BaseColumnValuesToBeUniqueValidator, SQAValidatorMixin + FailedSampleValidatorMixin, + BaseColumnValuesToBeUniqueValidator, + SQAValidatorMixin, + SQARowSamplerMixin, ): """Validator for column values to be unique test case""" @@ -217,3 +228,25 @@ def build_others_metric_expressions(others_source): } return build_others_metric_expressions + + def filter(self): + self.runner = cast(QueryRunner, self.runner) + col = self.get_column_from_list( + self.test_case.entityLink.root, + inspect(self.runner.dataset).c, + ) + filters = [ + ( + col, + "in", + (self.runner._build_query(col).group_by(col).having(func.count() > 1)), + ) + ] + return { + "filters": filters, + "or_filter": False, + } + + def fetch_failed_rows_sample(self): + cols, rows = self._get_failed_rows_sample() + return TableData(columns=cols, rows=rows) diff --git a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToMatchRegex.py b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToMatchRegex.py index 433c3abaaa5f..534e73e0bba5 100644 --- a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToMatchRegex.py +++ b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToMatchRegex.py @@ -15,7 +15,7 @@ from typing import List, Optional, Tuple -from sqlalchemy import Column +from sqlalchemy import Column, not_ from sqlalchemy.exc import CompileError, SQLAlchemyError from metadata.data_quality.validations.base_test_handler import ( @@ -25,9 +25,16 @@ from metadata.data_quality.validations.column.base.columnValuesToMatchRegex import ( BaseColumnValuesToMatchRegexValidator, ) +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + SQARowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.sqa_validator_mixin import ( SQAValidatorMixin, ) +from metadata.generated.schema.entity.data.table import TableData from metadata.generated.schema.tests.dimensionResult import DimensionResult from metadata.profiler.metrics.core import add_props from metadata.profiler.metrics.registry import Metrics @@ -37,7 +44,10 @@ class ColumnValuesToMatchRegexValidator( - BaseColumnValuesToMatchRegexValidator, SQAValidatorMixin + FailedSampleValidatorMixin, + BaseColumnValuesToMatchRegexValidator, + SQAValidatorMixin, + SQARowSamplerMixin, ): """Validator for column values to match regex test case""" @@ -157,3 +167,15 @@ def compute_row_count(self, column: Column): NotImplementedError: """ return self._compute_row_count(self.runner, column) + + def filter(self): + expression = self.get_test_case_param_value( + self.test_case.parameterValues, # type: ignore + "regex", + str, + ) + return not_(self.get_column().regexp_match(expression)) + + def fetch_failed_rows_sample(self): + cols, rows = self._get_failed_rows_sample() + return TableData(columns=cols, rows=rows) diff --git a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToNotMatchRegex.py b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToNotMatchRegex.py index dc059ef975ac..17d8eb483ac1 100644 --- a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToNotMatchRegex.py +++ b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToNotMatchRegex.py @@ -25,9 +25,16 @@ from metadata.data_quality.validations.column.base.columnValuesToNotMatchRegex import ( BaseColumnValuesToNotMatchRegexValidator, ) +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + SQARowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.sqa_validator_mixin import ( SQAValidatorMixin, ) +from metadata.generated.schema.entity.data.table import TableData from metadata.generated.schema.tests.dimensionResult import DimensionResult from metadata.profiler.metrics.core import add_props from metadata.profiler.metrics.registry import Metrics @@ -37,7 +44,10 @@ class ColumnValuesToNotMatchRegexValidator( - BaseColumnValuesToNotMatchRegexValidator, SQAValidatorMixin + FailedSampleValidatorMixin, + BaseColumnValuesToNotMatchRegexValidator, + SQAValidatorMixin, + SQARowSamplerMixin, ): """Validator for column values to not match regex test case""" @@ -130,3 +140,18 @@ def compute_row_count(self, column: Column): NotImplementedError: """ return self._compute_row_count(self.runner, column) + + def filter(self): + expression = self.get_test_case_param_value( + self.test_case.parameterValues, # type: ignore + "forbiddenRegex", + str, + ) + return { + "filters": [(self.get_column(), "regexp_match", expression)], + "or_filter": False, + } + + def fetch_failed_rows_sample(self): + cols, rows = self._get_failed_rows_sample() + return TableData(columns=cols, rows=rows) diff --git a/ingestion/src/metadata/data_quality/validations/mixins/failed_row_sampler_mixin.py b/ingestion/src/metadata/data_quality/validations/mixins/failed_row_sampler_mixin.py new file mode 100644 index 000000000000..bda676694d34 --- /dev/null +++ b/ingestion/src/metadata/data_quality/validations/mixins/failed_row_sampler_mixin.py @@ -0,0 +1,85 @@ +# Copyright 2025 Collate +# Licensed under the Collate Community License, Version 1.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Mixins for fetching failed row samples from test case validations. + +SQARowSamplerMixin: SQLAlchemy-based row sampling (builds query, captures compiled SQL) +PandasFailedRowSamplerMixin: DataFrame-based row sampling (filters chunks via df.query()) +""" + +from typing import Any, List, Tuple, cast + +from sqlalchemy import inspect + +from metadata.profiler.processor.runner import QueryRunner + +FAILED_ROW_SAMPLE_SIZE = 50 + + +class PandasFailedRowSamplerMixin: + """Mixin to fetch failed row samples from Pandas DataFrames""" + + def _get_failed_rows_sample(self) -> Tuple[List[str], List[List[Any]]]: + cols = None + rows = [] + for chunk in self.runner(): + if cols is None: + cols = chunk.columns.tolist() + prepared_chunk = chunk[cols] + _filter = self.filter() + + if isinstance(_filter, str): + # Backwards-compatible path: string expression evaluated via DataFrame.query + filtered_chunk = prepared_chunk.query(_filter) + else: + # New path: support boolean masks, callables, or pre-filtered DataFrames + criteria = _filter(prepared_chunk) if callable(_filter) else _filter + + if criteria is None: + # No filtering; keep full chunk + filtered_chunk = prepared_chunk + else: + # Try treating the criteria as a mask for boolean indexing first. + # If that fails, assume it is already a filtered DataFrame-like object. + try: + filtered_chunk = prepared_chunk[criteria] + except Exception: # pylint: disable=broad-except + filtered_chunk = criteria + + chunk_rows = filtered_chunk.values.tolist() + rows.extend(chunk_rows[:FAILED_ROW_SAMPLE_SIZE]) + if len(rows) >= FAILED_ROW_SAMPLE_SIZE: + break + + return cols or [], rows + + +class SQARowSamplerMixin: + """Mixin to fetch failed row samples from SQLAlchemy queries""" + + def _get_failed_rows_sample(self) -> Tuple[List[str], List[List[Any]]]: + # pylint: disable=protected-access + self.runner = cast(QueryRunner, self.runner) + cols = list(inspect(self.runner.dataset).c) + _filter = self.filter() + if isinstance(_filter, dict): + query = self.runner._select_from_sample(*cols, query_filter_=_filter) + else: + query = self.runner._select_from_sample(*cols) + query = query.filter(_filter) + + self._inspection_query = str( + query.statement.compile(compile_kwargs={"literal_binds": True}) + ) + + rows = query.limit(FAILED_ROW_SAMPLE_SIZE).all() + return [col.name for col in cols], [list(row) for row in rows] diff --git a/ingestion/src/metadata/data_quality/validations/mixins/failed_sample_validator_mixin.py b/ingestion/src/metadata/data_quality/validations/mixins/failed_sample_validator_mixin.py new file mode 100644 index 000000000000..f6845e45cc2f --- /dev/null +++ b/ingestion/src/metadata/data_quality/validations/mixins/failed_sample_validator_mixin.py @@ -0,0 +1,77 @@ +# Copyright 2025 Collate +# Licensed under the Collate Community License, Version 1.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Mixin that orchestrates failed row sampling for test case validators. + +When a test case has computePassedFailedRowCount=True and the result is Failed, +this mixin fetches a sample of failed rows and the inspection query (for SQL +sources), attaching them directly to the TestCaseResult instance. + +BaseTestValidator.run_validation() calls self.result_with_failed_samples() +which is a no-op by default. This mixin overrides it to do the actual work. +""" + +import traceback +from abc import ABC, abstractmethod +from typing import Optional + +from metadata.data_quality.api.models import TestCaseResultResponse +from metadata.generated.schema.entity.data.table import TableData +from metadata.generated.schema.tests.basic import TestCaseStatus +from metadata.utils.logger import test_suite_logger + +logger = test_suite_logger() + + +class FailedSampleValidatorMixin(ABC): + """ABC mixin providing failed row sampling orchestration. + + Concrete validators must implement: + - fetch_failed_rows_sample() -> TableData + - filter() -> filter expression (dict for SQA, string for Pandas) + """ + + def get_inspection_query(self) -> Optional[str]: + return getattr(self, "_inspection_query", None) + + @abstractmethod + def fetch_failed_rows_sample(self) -> TableData: + raise NotImplementedError + + def result_with_failed_samples(self, result: TestCaseResultResponse) -> None: + """Fetch failed row samples and attach them to the result. + + Called by BaseTestValidator.run_validation() at the end of validation. + Only fetches samples when: + - test_case.computePassedFailedRowCount is True + - result.testCaseResult.testCaseStatus is Failed + + Attaches failedRowsSample and inspectionQuery directly on the + TestCaseResult instance for the runner/sink to pick up. + """ + if not ( + getattr(result.testCase, "computePassedFailedRowCount", False) + and result.testCaseResult.testCaseStatus == TestCaseStatus.Failed + ): + return + + try: + result.failedRowsSample = self.fetch_failed_rows_sample() + except Exception: + logger.debug(traceback.format_exc()) + logger.error("Failed to fetch failed rows sample") + + try: + result.inspectionQuery = self.get_inspection_query() + except Exception: + logger.debug(traceback.format_exc()) + logger.error("Failed to get inspection query") diff --git a/ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableCustomSQLQuery.py b/ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableCustomSQLQuery.py index 0adc458e0650..25a146346908 100644 --- a/ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableCustomSQLQuery.py +++ b/ingestion/src/metadata/data_quality/validations/table/sqlalchemy/tableCustomSQLQuery.py @@ -13,7 +13,8 @@ Validator for table custom SQL Query test case """ -from typing import Optional, Tuple, cast +import traceback +from typing import Any, List, Optional, Tuple, cast import sqlparse from sqlalchemy import text @@ -21,6 +22,13 @@ from sqlparse.sql import Statement, Token, Where from sqlparse.tokens import Keyword +from metadata.data_quality.api.models import TestCaseResultResponse +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + FAILED_ROW_SAMPLE_SIZE, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) from metadata.data_quality.validations.mixins.sqa_validator_mixin import ( SQAValidatorMixin, ) @@ -31,7 +39,8 @@ BaseTableCustomSQLQueryValidator, Strategy, ) -from metadata.generated.schema.tests.basic import TestCaseResult +from metadata.generated.schema.entity.data.table import TableData +from metadata.generated.schema.tests.basic import TestCaseResult, TestCaseStatus from metadata.profiler.metrics.registry import Metrics from metadata.profiler.orm.functions.table_metric_computer import TableMetricComputer from metadata.profiler.processor.runner import QueryRunner @@ -41,7 +50,9 @@ logger = ingestion_logger() -class TableCustomSQLQueryValidator(BaseTableCustomSQLQueryValidator, SQAValidatorMixin): +class TableCustomSQLQueryValidator( + FailedSampleValidatorMixin, BaseTableCustomSQLQueryValidator, SQAValidatorMixin +): """Validator for table custom SQL Query test case""" def _replace_where_clause( @@ -368,3 +379,54 @@ def compute_row_count(self) -> Optional[int]: if row: return row._asdict().get(Metrics.rowCount.name) return None + + def _get_strategy(self): + return self.get_test_case_param_value( + self.test_case.parameterValues, # type: ignore + "strategy", + Strategy, + ) + + def fetch_failed_rows_sample(self): + cols, rows = self._get_custom_sql_failed_rows() + return TableData(columns=cols, rows=rows) + + def _get_custom_sql_failed_rows(self) -> Tuple[List[str], List[List[Any]]]: + sql_expression = self.get_test_case_param_value( + self.test_case.parameterValues, # type: ignore + "sqlExpression", + str, + ) + rows = self._run_results(sql_expression, self._get_strategy())[ + :FAILED_ROW_SAMPLE_SIZE + ] + if len(rows) == 0: + return [], [] + return [str(col) for col in rows[0]._fields], [list(row) for row in rows] + + def get_inspection_query(self): + return self.get_test_case_param_value( + self.test_case.parameterValues, # type: ignore + "sqlExpression", + str, + ) + + def result_with_failed_samples(self, result: TestCaseResultResponse) -> None: + """Override: tableCustomSQLQuery uses ROWS strategy check instead of + computePassedFailedRowCount, and sets validateColumns=False.""" + if ( + result.testCaseResult.testCaseStatus == TestCaseStatus.Failed + and self._get_strategy() == Strategy.ROWS + ): + result.validateColumns = False + try: + result.failedRowsSample = self.fetch_failed_rows_sample() + except Exception: + logger.debug(traceback.format_exc()) + logger.error("Failed to fetch failed rows sample") + + try: + result.inspectionQuery = self.get_inspection_query() + except Exception: + logger.debug(traceback.format_exc()) + logger.error("Failed to get inspection query") diff --git a/ingestion/src/metadata/ingestion/ometa/mixins/tests_mixin.py b/ingestion/src/metadata/ingestion/ometa/mixins/tests_mixin.py index f799789279b1..beef67f4d620 100644 --- a/ingestion/src/metadata/ingestion/ometa/mixins/tests_mixin.py +++ b/ingestion/src/metadata/ingestion/ometa/mixins/tests_mixin.py @@ -394,6 +394,36 @@ def ingest_failed_rows_sample( return None + def get_failed_rows_sample(self, test_case: TestCase) -> Optional[TableData]: + """ + GET failed row sample data for a test case. + + :param test_case: The test case to retrieve sample data for + """ + resp = None + try: + resp = self.client.get( + f"{self.get_suffix(TestCase)}/{test_case.id.root}/failedRowsSample", + ) + except Exception as exc: + logger.debug(traceback.format_exc()) + logger.warning( + f"Error trying to GET failed rows sample for " + f"{test_case.fullyQualifiedName.root}: {exc}" + ) + + if resp: + try: + return TableData(**resp) + except Exception as exc: + logger.debug(traceback.format_exc()) + logger.warning( + f"Error parsing failed rows sample for " + f"{test_case.fullyQualifiedName.root}: {exc}" + ) + + return None + def ingest_inspection_query( self, test_case: TestCase, inspection_query: str ) -> Optional[TestCase]: diff --git a/ingestion/src/metadata/ingestion/sink/metadata_rest.py b/ingestion/src/metadata/ingestion/sink/metadata_rest.py index 803541ba0e07..3e091840a696 100644 --- a/ingestion/src/metadata/ingestion/sink/metadata_rest.py +++ b/ingestion/src/metadata/ingestion/sink/metadata_rest.py @@ -732,8 +732,42 @@ def write_test_case_results(self, record: TestCaseResultResponse): logger.debug( f"Successfully ingested test case results for test case {record.testCase.name.root}" ) + self._ingest_failed_rows_sample(record) return Either(right=res) + def _ingest_failed_rows_sample(self, record: TestCaseResultResponse): + """Ingest failed row sample and inspection query if present on the record.""" + if record.failedRowsSample is not None: + try: + self.metadata.ingest_failed_rows_sample( + record.testCase, + record.failedRowsSample, + validate=record.validateColumns, + ) + logger.debug( + f"Successfully ingested failed rows sample for {record.testCase.name.root}" + ) + except Exception: + logger.debug(traceback.format_exc()) + logger.error( + f"Failed to ingest failed rows sample for {record.testCase.name.root}" + ) + + if record.inspectionQuery is not None: + try: + self.metadata.ingest_inspection_query( + record.testCase, + record.inspectionQuery, + ) + logger.debug( + f"Successfully ingested inspection query for {record.testCase.name.root}" + ) + except Exception: + logger.debug(traceback.format_exc()) + logger.error( + f"Failed to ingest inspection query for {record.testCase.name.root}" + ) + @_run_dispatch.register def write_test_case_resolution_status( self, record: OMetaTestCaseResolutionStatus @@ -951,6 +985,7 @@ def write_test_case_result_list(self, record: TestCaseResults): test_results=result.testCaseResult, test_case_fqn=result.testCase.fullyQualifiedName.root, ) + self._ingest_failed_rows_sample(result) self.status.scanned(result) return Either(right=record) diff --git a/ingestion/src/metadata/sdk/data_quality/dataframes/dataframe_validation_engine.py b/ingestion/src/metadata/sdk/data_quality/dataframes/dataframe_validation_engine.py index bfeb4ee6faed..ef18f3d2b224 100644 --- a/ingestion/src/metadata/sdk/data_quality/dataframes/dataframe_validation_engine.py +++ b/ingestion/src/metadata/sdk/data_quality/dataframes/dataframe_validation_engine.py @@ -84,7 +84,8 @@ def _execute_single_test( ) try: - return validator.run_validation() + result = validator.run_validation() + return result except Exception as err: message = ( f"Error executing {test_case.testDefinition.fullyQualifiedName} - {err}" diff --git a/ingestion/tests/integration/data_quality/test_failed_row_samples.py b/ingestion/tests/integration/data_quality/test_failed_row_samples.py new file mode 100644 index 000000000000..d02620d00558 --- /dev/null +++ b/ingestion/tests/integration/data_quality/test_failed_row_samples.py @@ -0,0 +1,403 @@ +""" +Integration tests for failed row sampling. + +Runs data quality tests against a PostgreSQL database and asserts that +failed row samples are published for failing tests and not for passing tests. +""" + +from typing import List, Optional + +import pandas as pd +import pytest + +from _openmetadata_testutils.helpers.assumption import Assumption, assume +from metadata.data_quality.api.models import ( + TestCaseDefinition, + TestSuiteProcessorConfig, +) +from metadata.generated.schema.entity.data.table import Table +from metadata.generated.schema.entity.services.databaseService import DatabaseService +from metadata.generated.schema.metadataIngestion.testSuitePipeline import ( + TestSuiteConfigType, +) +from metadata.generated.schema.tests.basic import TestCaseStatus +from metadata.generated.schema.tests.testCase import TestCase, TestCaseParameterValue +from metadata.ingestion.models.custom_pydantic import BaseModel +from metadata.ingestion.ometa.ometa_api import OpenMetadata +from metadata.utils.constants import SAMPLE_DATA_DEFAULT_COUNT +from metadata.workflow.data_quality import TestSuiteWorkflow +from metadata.workflow.metadata import MetadataWorkflow + + +class SampleDataParameters(BaseModel): + class Config: + arbitrary_types_allowed = True + + test_case_definition: TestCaseDefinition + assumptions: List[Assumption] + table: str = "customer" + expected_query: Optional[str] = None + + def __init__(self, *args, **kwargs): + if args: + field_names = list(self.__annotations__.keys()) + kwargs.update(dict(zip(field_names, args))) + super().__init__(**kwargs) + + +class HasColumn(Assumption): + def __init__(self, column_name: str): + super().__init__() + self.column_name = column_name + + def assume_positive(self, df): + assert self.column_name in df.columns + + +class HasExactlyColumns(Assumption): + def __init__(self, num_columns: int): + super().__init__() + self.num_columns = num_columns + + def assume_positive(self, df): + assert len(df.columns) == self.num_columns + + +DEFAULT_COLUMNS = [ + "customer_id", + "store_id", + "first_name", + "last_name", + "email", + "address_id", + "activebool", + "create_date", + "last_update", + "active", + "json_field", +] +DEFAULT_COLUMN_ASSUMPTIONS = [HasColumn(column) for column in DEFAULT_COLUMNS] + [ + HasExactlyColumns(len(DEFAULT_COLUMNS)) +] + + +FAILING_TEST_PARAMS = [ + pytest.param( + SampleDataParameters(*t), + id=t[0].name, + ) + for t in [ + ( + TestCaseDefinition( + name="email_is_not_null", + testDefinitionName="columnValuesToBeNotNull", + columnName="email", + computePassedFailedRowCount=True, + ), + [ + ~assume.notnull("email"), + assume.arbitrary("customer_id", lambda x: x % 10 == 0), + *DEFAULT_COLUMN_ASSUMPTIONS, + ], + "bad_data_customer", + ), + ( + TestCaseDefinition( + name="customer_id_between_0_100", + testDefinitionName="columnValuesToBeBetween", + columnName="customer_id", + computePassedFailedRowCount=True, + parameterValues=[ + TestCaseParameterValue(name="minValue", value="0"), + TestCaseParameterValue(name="maxValue", value="100"), + ], + ), + [~assume.between("customer_id", 0, 100), *DEFAULT_COLUMN_ASSUMPTIONS], + ), + ( + TestCaseDefinition( + name="there_is_a_jonnny", + testDefinitionName="columnValuesToBeInSet", + columnName="first_name", + computePassedFailedRowCount=True, + parameterValues=[ + TestCaseParameterValue(name="allowedValues", value=str(["Jonnny"])), + ], + ), + [~assume.column_values_in("first_name", ["Jonnny"])], + ), + ( + TestCaseDefinition( + name="assume_there_is_no_tim", + testDefinitionName="columnValuesToBeNotInSet", + columnName="first_name", + computePassedFailedRowCount=True, + parameterValues=[ + TestCaseParameterValue(name="forbiddenValues", value=str(["Tim"])), + ], + ), + [ + assume.column_values_in("first_name", ["Tim"]), + *DEFAULT_COLUMN_ASSUMPTIONS, + ], + ), + ( + TestCaseDefinition( + name="names_up_to_three_chars", + testDefinitionName="columnValueLengthsToBeBetween", + columnName="first_name", + computePassedFailedRowCount=True, + parameterValues=[ + TestCaseParameterValue(name="minLength", value="0"), + TestCaseParameterValue(name="maxLength", value="3"), + ], + ), + [ + ~assume.length_between("first_name", 0, 3), + *DEFAULT_COLUMN_ASSUMPTIONS, + ], + ), + ( + TestCaseDefinition( + name="name_starts_with_j", + testDefinitionName="columnValuesToMatchRegex", + columnName="first_name", + computePassedFailedRowCount=True, + parameterValues=[ + TestCaseParameterValue(name="regex", value="^J"), + ], + ), + [ + ~assume.match_regex("first_name", "^J"), + *DEFAULT_COLUMN_ASSUMPTIONS, + ], + ), + ( + TestCaseDefinition( + name="name_does_not_start_with_j", + testDefinitionName="columnValuesToNotMatchRegex", + columnName="first_name", + computePassedFailedRowCount=True, + parameterValues=[ + TestCaseParameterValue(name="forbiddenRegex", value="^J"), + ], + ), + [ + assume.match_regex("first_name", "^J"), + *DEFAULT_COLUMN_ASSUMPTIONS, + ], + ), + ( + TestCaseDefinition( + name="unique_value_test", + testDefinitionName="columnValuesToBeUnique", + columnName="first_name", + computePassedFailedRowCount=True, + ), + [ + ~assume.unique("first_name"), + *DEFAULT_COLUMN_ASSUMPTIONS, + ], + ), + ( + TestCaseDefinition( + name="custom_sql_test", + testDefinitionName="tableCustomSQLQuery", + parameterValues=[ + TestCaseParameterValue( + name="sqlExpression", + value="SELECT *, 'extra_value' as extra_column FROM customer", + ), + TestCaseParameterValue(name="strategy", value="ROWS"), + ], + ), + [ + *[HasColumn(column) for column in DEFAULT_COLUMNS], + HasExactlyColumns(len(DEFAULT_COLUMNS) + 1), + HasColumn("extra_column"), + ], + ), + ] +] + + +@pytest.fixture(scope="module") +def extra_sql_commands(): + return [] + + +@pytest.fixture(scope="module") +def sql_commands(extra_sql_commands): + return [ + "CREATE TABLE IF NOT EXISTS bad_data_customer AS SELECT * FROM customer;", + "UPDATE public.bad_data_customer SET email = NULL WHERE MOD(customer_id, 10) = 0;", + "UPDATE public.bad_data_customer SET first_name = 'Steveo' WHERE first_name = 'Steve';", + ] + extra_sql_commands + + +@pytest.fixture(scope="module") +def prepare_postgres(postgres_container, sql_commands): + """Execute SQL commands to set up test data in the dvdrental database.""" + from sqlalchemy import create_engine, text + from sqlalchemy.engine.url import make_url + + engine = create_engine( + make_url(postgres_container.get_connection_url()).set(database="dvdrental") + ) + with engine.begin() as conn: + for command in sql_commands: + conn.execute(text(command)) + + +@pytest.fixture(scope="module") +def ingest_postgres_metadata( + prepare_postgres, + postgres_service, + metadata: OpenMetadata, + sink_config, + workflow_config, + run_workflow, +): + """Ingest metadata after preparing the database with test data.""" + wf_config = { + "source": { + "type": postgres_service.connection.config.type.value.lower(), + "serviceName": postgres_service.fullyQualifiedName.root, + "serviceConnection": postgres_service.connection.model_copy( + update={ + "config": postgres_service.connection.config.model_copy( + update={"ingestAllDatabases": True} + ) + } + ), + "sourceConfig": { + "config": { + "schemaFilterPattern": {"excludes": ["information_schema"]}, + } + }, + }, + "sink": sink_config, + "workflowConfig": workflow_config, + } + run_workflow(MetadataWorkflow, wf_config) + + +def _run_test_suite( + metadata: OpenMetadata, + db_service: DatabaseService, + table: Table, + test_case_definitions: list, + sink_config: dict, + workflow_config: dict, + run_workflow, +): + config = { + "source": { + "type": db_service.connection.config.type.value.lower(), + "serviceName": f"MyTestSuite_{db_service.name.root}", + "sourceConfig": { + "config": { + "type": TestSuiteConfigType.TestSuite.value, + "entityFullyQualifiedName": table.fullyQualifiedName.root, + } + }, + }, + "processor": { + "type": "orm-test-runner", + "config": TestSuiteProcessorConfig( + testCases=test_case_definitions + ).model_dump(), + }, + "sink": sink_config, + "workflowConfig": workflow_config, + } + run_workflow(TestSuiteWorkflow, config) + + +@pytest.mark.parametrize("parameters", FAILING_TEST_PARAMS) +def test_failing_tests_publish_failed_samples( + postgres_service: DatabaseService, + ingest_postgres_metadata, + patch_passwords_for_db_services, + metadata: OpenMetadata, + parameters: SampleDataParameters, + cleanup_fqns, + run_workflow, + sink_config, + workflow_config, +): + table: Table = metadata.get_by_name( + Table, + f"{postgres_service.fullyQualifiedName.root}.dvdrental.public.{parameters.table}", + nullable=False, + ) + _run_test_suite( + metadata, + postgres_service, + table, + [parameters.test_case_definition], + sink_config, + workflow_config, + run_workflow, + ) + test_fqn_parts = [table.fullyQualifiedName.root] + if parameters.test_case_definition.columnName: + test_fqn_parts.append(parameters.test_case_definition.columnName) + test_fqn_parts.append(parameters.test_case_definition.name) + test_case_entity: TestCase = metadata.get_by_name( + entity=TestCase, + fqn=".".join(test_fqn_parts), + fields=["*"], + nullable=False, + ) + cleanup_fqns(TestCase, test_case_entity.fullyQualifiedName.root) + assert test_case_entity.testCaseResult.testCaseStatus == TestCaseStatus.Failed + failed_sample = metadata.get_failed_rows_sample(test_case_entity) + assert failed_sample is not None + df = pd.DataFrame( + failed_sample.rows, columns=[c.root for c in failed_sample.columns] + ) + assert len(df) <= SAMPLE_DATA_DEFAULT_COUNT + assert len(df) > 0 + for assumption in parameters.assumptions: + assumption.validate(df) + assert test_case_entity.inspectionQuery is not None + + +def test_passing_tests_dont_publish( + postgres_service: DatabaseService, + ingest_postgres_metadata, + patch_passwords_for_db_services, + metadata: OpenMetadata, + cleanup_fqns, + run_workflow, + sink_config, + workflow_config, +): + table: Table = metadata.get_by_name( + Table, + f"{postgres_service.fullyQualifiedName.root}.dvdrental.public.customer", + nullable=False, + ) + test_case_definition = TestCaseDefinition( + name="name_is_not_null", + testDefinitionName="columnValuesToBeNotNull", + columnName="first_name", + computePassedFailedRowCount=True, + ) + _run_test_suite( + metadata, + postgres_service, + table, + [test_case_definition], + sink_config, + workflow_config, + run_workflow, + ) + test_case_entity: TestCase = metadata.get_or_create_test_case( + f"{table.fullyQualifiedName.root}.{test_case_definition.columnName}.{test_case_definition.name}" + ) + cleanup_fqns(TestCase, test_case_entity.fullyQualifiedName.root) + assert test_case_entity.testCaseResult.testCaseStatus == TestCaseStatus.Success + failed_sample = metadata.get_failed_rows_sample(test_case_entity) + assert failed_sample is None diff --git a/ingestion/tests/unit/data_quality/validations/test_failed_sample_mixin.py b/ingestion/tests/unit/data_quality/validations/test_failed_sample_mixin.py new file mode 100644 index 000000000000..78e8c7a6a433 --- /dev/null +++ b/ingestion/tests/unit/data_quality/validations/test_failed_sample_mixin.py @@ -0,0 +1,281 @@ +""" +Unit tests for the FailedSampleValidatorMixin and row sampler mixins. + +Tests the orchestration logic of result_with_failed_samples(): + - Only fetches samples when computePassedFailedRowCount=True AND status=Failed + - Handles exceptions gracefully + - Stashes data on the result object +""" + +from unittest.mock import MagicMock, patch + +from metadata.data_quality.api.models import TestCaseResultResponse +from metadata.data_quality.validations.mixins.failed_row_sampler_mixin import ( + FAILED_ROW_SAMPLE_SIZE, + PandasFailedRowSamplerMixin, + SQARowSamplerMixin, +) +from metadata.data_quality.validations.mixins.failed_sample_validator_mixin import ( + FailedSampleValidatorMixin, +) +from metadata.generated.schema.entity.data.table import TableData +from metadata.generated.schema.tests.basic import TestCaseResult, TestCaseStatus + + +class ConcreteValidator(FailedSampleValidatorMixin): + """Minimal concrete implementation for testing the mixin.""" + + def __init__(self, sample_data=None, inspection_query=None, raise_on_fetch=False): + self._sample_data = sample_data + self._inspection_query_val = inspection_query + self._raise_on_fetch = raise_on_fetch + + def fetch_failed_rows_sample(self): + if self._raise_on_fetch: + raise RuntimeError("fetch error") + return self._sample_data + + def get_inspection_query(self): + return self._inspection_query_val + + def filter(self): + return {} + + +def _make_test_case(compute_row_count=True): + tc = MagicMock() + tc.computePassedFailedRowCount = compute_row_count + return tc + + +def _make_test_case_result(status=TestCaseStatus.Failed): + result = MagicMock(spec=TestCaseResult) + result.testCaseStatus = status + return result + + +def _make_response(compute_row_count=True, status=TestCaseStatus.Failed): + response = MagicMock(spec=TestCaseResultResponse) + response.testCase = _make_test_case(compute_row_count) + response.testCaseResult = _make_test_case_result(status) + response.failedRowsSample = None + response.inspectionQuery = None + return response + + +class TestFailedSampleValidatorMixin: + def test_samples_fetched_when_failed_and_flag_set(self): + sample = TableData(columns=["a", "b"], rows=[["1", "2"]]) + validator = ConcreteValidator(sample_data=sample, inspection_query="SELECT 1") + response = _make_response(compute_row_count=True, status=TestCaseStatus.Failed) + + validator.result_with_failed_samples(response) + + assert response.failedRowsSample == sample + assert response.inspectionQuery == "SELECT 1" + + def test_no_samples_when_status_is_success(self): + sample = TableData(columns=["a"], rows=[["1"]]) + validator = ConcreteValidator(sample_data=sample) + response = _make_response(compute_row_count=True, status=TestCaseStatus.Success) + + validator.result_with_failed_samples(response) + + assert response.failedRowsSample is None + + def test_no_samples_when_flag_is_false(self): + sample = TableData(columns=["a"], rows=[["1"]]) + validator = ConcreteValidator(sample_data=sample) + response = _make_response(compute_row_count=False, status=TestCaseStatus.Failed) + + validator.result_with_failed_samples(response) + + assert response.failedRowsSample is None + + def test_no_samples_when_flag_is_none(self): + validator = ConcreteValidator(sample_data=TableData(columns=[], rows=[])) + response = _make_response(status=TestCaseStatus.Failed) + response.testCase.computePassedFailedRowCount = None + + validator.result_with_failed_samples(response) + + assert response.failedRowsSample is None + + def test_fetch_error_does_not_propagate(self): + validator = ConcreteValidator(raise_on_fetch=True) + response = _make_response(compute_row_count=True, status=TestCaseStatus.Failed) + + validator.result_with_failed_samples(response) + + assert response.failedRowsSample is None + + def test_inspection_query_none_by_default(self): + sample = TableData(columns=["a"], rows=[["1"]]) + validator = ConcreteValidator(sample_data=sample, inspection_query=None) + response = _make_response(compute_row_count=True, status=TestCaseStatus.Failed) + + validator.result_with_failed_samples(response) + + assert response.failedRowsSample == sample + assert response.inspectionQuery is None + + +class TestPandasFailedRowSamplerMixin: + def test_respects_sample_size_limit(self): + import pandas as pd + + large_df = pd.DataFrame( + {"col1": range(100), "col2": [f"val_{i}" for i in range(100)]} + ) + + class TestValidator(PandasFailedRowSamplerMixin): + def runner(self_inner): + def gen(): + yield large_df + + return gen() + + def filter(self_inner): + return "col1 >= 0" + + validator = TestValidator() + cols, rows = validator._get_failed_rows_sample() + + assert len(rows) <= FAILED_ROW_SAMPLE_SIZE + assert cols == ["col1", "col2"] + + def test_empty_result(self): + import pandas as pd + + df = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]}) + + class TestValidator(PandasFailedRowSamplerMixin): + def runner(self_inner): + def gen(): + yield df + + return gen() + + def filter(self_inner): + return "col1 > 100" + + validator = TestValidator() + cols, rows = validator._get_failed_rows_sample() + + assert cols == ["col1", "col2"] + assert len(rows) == 0 + + def test_multiple_chunks(self): + import pandas as pd + + chunk1 = pd.DataFrame({"a": [1, 2], "b": ["x", "y"]}) + chunk2 = pd.DataFrame({"a": [3, 4], "b": ["z", "w"]}) + + class TestValidator(PandasFailedRowSamplerMixin): + def runner(self_inner): + def gen(): + yield chunk1 + yield chunk2 + + return gen() + + def filter(self_inner): + return "a >= 1" + + validator = TestValidator() + cols, rows = validator._get_failed_rows_sample() + + assert cols == ["a", "b"] + assert len(rows) == 4 + + +class TestSQARowSamplerMixin: + def test_dict_filter_uses_select_from_sample(self): + mock_runner = MagicMock() + mock_col = MagicMock() + mock_col.name = "test_col" + mock_inspect = MagicMock() + mock_inspect.c = [mock_col] + + mock_query = MagicMock() + mock_runner._select_from_sample.return_value = mock_query + mock_query.limit.return_value.all.return_value = [ + (1, "a"), + (2, "b"), + ] + mock_query.statement.compile.return_value = "SELECT ..." + + class TestValidator(SQARowSamplerMixin): + pass + + validator = TestValidator() + validator.runner = mock_runner + + with patch( + "metadata.data_quality.validations.mixins.failed_row_sampler_mixin.inspect", + return_value=mock_inspect, + ): + validator.filter = lambda: {"filters": [], "or_filter": False} + cols, rows = validator._get_failed_rows_sample() + + assert cols == ["test_col"] + assert len(rows) == 2 + mock_runner._select_from_sample.assert_called_once() + + def test_non_dict_filter_uses_query_filter(self): + mock_runner = MagicMock() + mock_col = MagicMock() + mock_col.name = "test_col" + mock_inspect = MagicMock() + mock_inspect.c = [mock_col] + + mock_query = MagicMock() + mock_runner._select_from_sample.return_value = mock_query + filtered_query = MagicMock() + mock_query.filter.return_value = filtered_query + filtered_query.limit.return_value.all.return_value = [(1,)] + filtered_query.statement.compile.return_value = "SELECT ..." + + class TestValidator(SQARowSamplerMixin): + pass + + validator = TestValidator() + validator.runner = mock_runner + + sqa_filter = MagicMock() + with patch( + "metadata.data_quality.validations.mixins.failed_row_sampler_mixin.inspect", + return_value=mock_inspect, + ): + validator.filter = lambda: sqa_filter + cols, rows = validator._get_failed_rows_sample() + + mock_query.filter.assert_called_once_with(sqa_filter) + assert cols == ["test_col"] + + def test_captures_inspection_query(self): + mock_runner = MagicMock() + mock_col = MagicMock() + mock_col.name = "id" + mock_inspect = MagicMock() + mock_inspect.c = [mock_col] + + mock_query = MagicMock() + mock_runner._select_from_sample.return_value = mock_query + mock_query.limit.return_value.all.return_value = [] + mock_query.statement.compile.return_value = "SELECT id FROM table WHERE ..." + + class TestValidator(SQARowSamplerMixin): + pass + + validator = TestValidator() + validator.runner = mock_runner + + with patch( + "metadata.data_quality.validations.mixins.failed_row_sampler_mixin.inspect", + return_value=mock_inspect, + ): + validator.filter = lambda: {"filters": [], "or_filter": False} + validator._get_failed_rows_sample() + + assert validator._inspection_query == "SELECT id FROM table WHERE ..." diff --git a/openmetadata-ui/src/main/resources/ui/package.json b/openmetadata-ui/src/main/resources/ui/package.json index f4e059d199cb..553f7d20de81 100644 --- a/openmetadata-ui/src/main/resources/ui/package.json +++ b/openmetadata-ui/src/main/resources/ui/package.json @@ -298,4 +298,4 @@ "lodash-es": ">=4.17.23", "markdown-it": ">=14.1.1" } -} \ No newline at end of file +} diff --git a/openmetadata-ui/src/main/resources/ui/playwright/e2e/Features/FailedTestCaseSampleData.spec.ts b/openmetadata-ui/src/main/resources/ui/playwright/e2e/Features/FailedTestCaseSampleData.spec.ts new file mode 100644 index 000000000000..1b7d776079ca --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/playwright/e2e/Features/FailedTestCaseSampleData.spec.ts @@ -0,0 +1,128 @@ +/* + * Copyright 2025 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { expect, test } from '@playwright/test'; +import { PLAYWRIGHT_INGESTION_TAG_OBJ } from '../../constant/config'; +import { TableClass } from '../../support/entity/TableClass'; +import { getApiContext, redirectToHomePage, uuid } from '../../utils/common'; +import { getFailedRowsData, visitDataQualityTab } from '../../utils/testCases'; + +// use the admin user to login +test.use({ + storageState: 'playwright/.auth/admin.json', +}); + +test.beforeEach(async ({ page }) => { + await redirectToHomePage(page); +}); + +const createTestCaseWithSampleData = async ( + apiContext: Awaited>['apiContext'], + table: TableClass +) => { + const columnName = table.entity.columns[0].name; + const tableFqn = table.entityResponseData?.fullyQualifiedName; + + // Create test case + const testCase = await apiContext + .post('/api/v1/dataQuality/testCases', { + data: { + name: `pw_column_value_max_to_be_between_${uuid()}`, + entityLink: `<#E::table::${tableFqn}::columns::${columnName}>`, + parameterValues: [ + { name: 'minValueForMaxInCol', value: 90001 }, + { name: 'maxValueForMaxInCol', value: 96162 }, + ], + testDefinition: 'columnValueMaxToBeBetween', + }, + }) + .then((res) => res.json()); + + // Add failed result + await apiContext.post( + `/api/v1/dataQuality/testCases/testCaseResults/${encodeURIComponent( + testCase.fullyQualifiedName + )}`, + { + data: { + result: + 'Found min=10001, max=27809 vs. the expected min=90001, max=96162.', + testCaseStatus: 'Failed', + testResultValue: [ + { name: 'minValueForMaxInCol', value: '10001' }, + { name: 'maxValueForMaxInCol', value: '27809' }, + ], + timestamp: Date.now(), + }, + } + ); + + // Add failed rows sample + await apiContext.put( + `/api/v1/dataQuality/testCases/${testCase.id}/failedRowsSample`, + { data: getFailedRowsData(table) } + ); + + return testCase; +}; + +test( + 'FailedTestCaseSampleData', + PLAYWRIGHT_INGESTION_TAG_OBJ, + async ({ page }) => { + const { apiContext } = await getApiContext(page); + const table = new TableClass(); + await table.create(apiContext); + + const testCase = await createTestCaseWithSampleData(apiContext, table); + const testCaseName = testCase.name; + + await test.step('Highlight the failed test case sample data', async () => { + await visitDataQualityTab(page, table); + + await page.click( + `[data-testid="${testCaseName}"] >> text=${testCaseName}` + ); + + await page.waitForSelector( + '[data-testid="test-case-result-tab-container"]' + ); + + await expect( + page.locator('.failed-sample-data-column').first() + ).toBeVisible(); + + const columns = await page.$$('.failed-sample-data-column'); + + expect(columns).toHaveLength(3); + }); + + await test.step('Delete sample data', async () => { + await page.click('[data-testid="sample-data-manage-button"]'); + await page.click('[data-testid="delete-button"]'); + await page.waitForSelector('.ant-modal-body'); + await page.fill('[data-testid="confirmation-text-input"]', 'DELETE'); + const deleteSampleData = page.waitForResponse( + '/api/v1/dataQuality/testCases/*/failedRowsSample' + ); + await page.click('[data-testid="confirm-button"]'); + await deleteSampleData; + await page.waitForSelector('[data-testid="sample-data-manage-button"]', { + state: 'hidden', + }); + }); + + // Cleanup + await table.delete(apiContext); + } +); diff --git a/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/FailedTestCaseSampleData/FailedTestCaseSampleData.component.tsx b/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/FailedTestCaseSampleData/FailedTestCaseSampleData.component.tsx new file mode 100644 index 000000000000..d4b0cf31b6cf --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/FailedTestCaseSampleData/FailedTestCaseSampleData.component.tsx @@ -0,0 +1,292 @@ +/* + * Copyright 2025 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { Button, Dropdown, Space, Table, Tooltip, Typography } from 'antd'; +import { ItemType } from 'antd/lib/menu/hooks/useItems'; +import { AxiosError } from 'axios'; +import classNames from 'classnames'; +import { isUndefined } from 'lodash'; +import { useEffect, useMemo, useState } from 'react'; +import { useTranslation } from 'react-i18next'; +import { Link, useParams } from 'react-router-dom'; + +import { ReactComponent as IconDelete } from '../../../../assets/svg/ic-delete.svg'; +import { ReactComponent as IconDropdown } from '../../../../assets/svg/menu.svg'; +import { usePermissionProvider } from '../../../../context/PermissionProvider/PermissionProvider'; +import { ResourceEntity } from '../../../../context/PermissionProvider/PermissionProvider.interface'; +import { EntityType } from '../../../../enums/entity.enum'; +import { Operation } from '../../../../generated/entity/policies/policy'; +import { TableData } from '../../../../generated/tests/testCase'; +import { TestCasePageTabs } from '../../../../pages/IncidentManager/IncidentManager.interface'; +import { + deleteTestCaseFailedSampleData, + getTestCaseFailedSampleData, +} from '../../../../rest/testAPI'; +import { getEntityDeleteMessage } from '../../../../utils/CommonUtils'; +import { getColumnNameFromEntityLink } from '../../../../utils/EntityUtils'; +import { checkPermission } from '../../../../utils/PermissionsUtils'; +import { getTestCaseDetailPagePath } from '../../../../utils/RouterUtils'; +import { showErrorToast } from '../../../../utils/ToastUtils'; +import Loader from '../../../common/Loader/Loader'; +import { ManageButtonItemLabel } from '../../../common/ManageButtonContentItem/ManageButtonContentItem.component'; +import { RowData } from '../../../Database/SampleDataTable/RowData'; +import { + SampleData, + SampleDataType, +} from '../../../Database/SampleDataTable/SampleData.interface'; +import EntityDeleteModal from '../../../Modals/EntityDeleteModal/EntityDeleteModal'; +import './failed-test-case-sample-data.less'; +import { FailedTestCaseSampleDataProps } from './FailedTestCaseSampleData.interface'; + +const DIFF_TYPE = 'diffType'; +const DIFF_TYPE_VALUES = { + ADD: '+', + REMOVE: '-', + NOT_EQUAL: '!=', +}; + +const FailedTestCaseSampleData = ({ + testCaseData, +}: FailedTestCaseSampleDataProps) => { + const { t } = useTranslation(); + const [sampleData, setSampleData] = useState(); + const [isLoading, setIsLoading] = useState(false); + const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false); + const [showActions, setShowActions] = useState(false); + const { permissions } = usePermissionProvider(); + const { version } = useParams<{ version: string }>(); + const isVersionPage = !isUndefined(version); + const columnName = useMemo( + () => + testCaseData?.entityLink + ? getColumnNameFromEntityLink(testCaseData?.entityLink) + : undefined, + [testCaseData] + ); + const hasViewSampleDataPermission = useMemo(() => { + return checkPermission( + Operation.ViewSampleData, + ResourceEntity.TEST_CASE, + permissions + ); + }, [permissions]); + + const hasEditPermission = useMemo(() => { + return isVersionPage + ? false + : checkPermission( + Operation.EditAll, + ResourceEntity.TEST_CASE, + permissions + ); + }, [permissions, isVersionPage]); + + const handleDeleteModal = () => { + setIsDeleteModalOpen((prev) => !prev); + }; + + const manageButtonContent: ItemType[] = [ + { + label: ( + + ), + key: 'delete-button', + onClick: (e) => { + e.domEvent.stopPropagation(); + setShowActions(false); + handleDeleteModal(); + }, + }, + ]; + const getSampleDataWithType = (sampleData: TableData) => { + const updatedColumns = sampleData?.columns?.map((column) => { + return { + name: column, + title: + column === DIFF_TYPE ? ( + '' + ) : ( +
+ {column} +
+ ), + dataIndex: column, + key: column, + accessor: column, + width: column === DIFF_TYPE ? undefined : 210, + render: (data: SampleDataType) => ({ + props: { + className: classNames({ + 'failed-sample-data-column': column === columnName, + 'diff-type-sample-data-column': column === DIFF_TYPE, + }), + }, + children: , + }), + }; + }); + + const data = (sampleData?.rows ?? []).map((item) => { + const dataObject: Record = {}; + (sampleData?.columns ?? []).forEach((col, index) => { + dataObject[col] = item[index]; + }); + + return dataObject; + }); + + return { + columns: updatedColumns, + rows: data, + }; + }; + + const fetchFailedTestCaseSampleData = async () => { + if (testCaseData?.id) { + setIsLoading(true); + try { + const response = await getTestCaseFailedSampleData(testCaseData.id); + setSampleData(getSampleDataWithType(response)); + } catch { + setSampleData(undefined); + } finally { + setIsLoading(false); + } + } + + return; + }; + + const handleDeleteSampleData = async () => { + if (testCaseData?.id) { + try { + await deleteTestCaseFailedSampleData(testCaseData.id); + handleDeleteModal(); + fetchFailedTestCaseSampleData(); + } catch (error) { + showErrorToast( + error as AxiosError, + t('server.delete-entity-error', { + entity: t('label.sample-data'), + }) + ); + } + } + + return; + }; + + useEffect(() => { + if (hasViewSampleDataPermission) { + fetchFailedTestCaseSampleData(); + } + }, [testCaseData?.id, hasViewSampleDataPermission]); + + if (!hasViewSampleDataPermission) { + return <>; + } + + if (isLoading) { + return ; + } + + if (isUndefined(sampleData)) { + return <>; + } + + return ( +
+ + + {t('label.sample-data')} + +
+ {testCaseData?.inspectionQuery && !isVersionPage && ( + + + + )} + {hasEditPermission && ( + + + + + + )} +
+
+ { + const type = record?.diffType; + + return classNames({ + 'remove-sample-data': type === DIFF_TYPE_VALUES.REMOVE, + 'add-sample-data': type === DIFF_TYPE_VALUES.ADD, + 'not-equal-sample-data': type === DIFF_TYPE_VALUES.NOT_EQUAL, + }); + }} + rowKey="name" + scroll={{ x: 'max-content' }} + size="small" + /> + {isDeleteModalOpen && ( + + )} + + ); +}; + +export default FailedTestCaseSampleData; diff --git a/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/FailedTestCaseSampleData/FailedTestCaseSampleData.interface.ts b/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/FailedTestCaseSampleData/FailedTestCaseSampleData.interface.ts new file mode 100644 index 000000000000..296cb38cdd5f --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/FailedTestCaseSampleData/FailedTestCaseSampleData.interface.ts @@ -0,0 +1,18 @@ +/* + * Copyright 2025 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { TestCase } from '../../../../generated/tests/testCase'; + +export interface FailedTestCaseSampleDataProps { + testCaseData?: TestCase; +} diff --git a/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/FailedTestCaseSampleData/failed-test-case-sample-data.less b/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/FailedTestCaseSampleData/failed-test-case-sample-data.less new file mode 100644 index 000000000000..cf526bfa93b0 --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/FailedTestCaseSampleData/failed-test-case-sample-data.less @@ -0,0 +1,63 @@ +/* + * Copyright 2025 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@import (reference) '../../../../styles/variables.less'; + +.ant-table-cell.failed-sample-data-column, +.ant-table-row:hover .ant-table-cell.failed-sample-data-column { + background-color: fade(@red-3, 5%); + + .ant-typography { + color: @red-3; + } +} + +.ant-table-wrapper + .ant-table-bordered + .ant-table-container + .ant-table-content + > table + tbody + > tr + td.ant-table-cell.diff-type-sample-data-column { + border-right: 1px solid rgba(0, 0, 0, 0.06); + padding-right: 16px; +} + +.add-sample-data { + background: fade(@green-3, 5%); + + .diff-type-sample-data-column { + .ant-typography { + color: @green-3; + } + } +} +.remove-sample-data { + background: fade(@red-3, 5%); + + .diff-type-sample-data-column { + .ant-typography { + color: @red-3; + } + } +} +.not-equal-sample-data { + background: #f8f8f8; + + .diff-type-sample-data-column { + .ant-typography { + color: @grey-4; + } + } +} diff --git a/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/SqlQueryTab/AddSqlQueryFormModal/AddSqlQueryFormModal.component.tsx b/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/SqlQueryTab/AddSqlQueryFormModal/AddSqlQueryFormModal.component.tsx new file mode 100644 index 000000000000..a4985f4994c6 --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/SqlQueryTab/AddSqlQueryFormModal/AddSqlQueryFormModal.component.tsx @@ -0,0 +1,203 @@ +/* + * Copyright 2025 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { Form, FormProps, Input, Modal } from 'antd'; +import { AxiosError } from 'axios'; +import { useEffect, useState } from 'react'; +import { useTranslation } from 'react-i18next'; + +import { HTTP_STATUS_CODE } from '../../../../../constants/Auth.constants'; +import { NO_PERMISSION_FOR_ACTION } from '../../../../../constants/HelperTextUtil'; +import { usePermissionProvider } from '../../../../../context/PermissionProvider/PermissionProvider'; +import { CSMode } from '../../../../../enums/codemirror.enum'; +import { EntityType, FqnPart } from '../../../../../enums/entity.enum'; +import { OwnerType } from '../../../../../enums/user.enum'; +import { CreateQuery } from '../../../../../generated/api/data/createQuery'; +import { Table } from '../../../../../generated/entity/data/table'; +import { useApplicationStore } from '../../../../../hooks/useApplicationStore'; +import { useTestCaseStore } from '../../../../../pages/IncidentManager/IncidentManagerDetailPage/useTestCase.store'; +import { postQuery } from '../../../../../rest/queryAPI'; +import { getTableDetailsByFQN } from '../../../../../rest/tableAPI'; +import { getPartialNameFromTableFQN } from '../../../../../utils/CommonUtils'; +import { getCurrentMillis } from '../../../../../utils/date-time/DateTimeUtils'; +import { + showErrorToast, + showSuccessToast, +} from '../../../../../utils/ToastUtils'; +import Loader from '../../../../common/Loader/Loader'; +import RichTextEditor from '../../../../common/RichTextEditor/RichTextEditor'; +import SchemaEditor from '../../../../Database/SchemaEditor/SchemaEditor'; +import { AddSqlQueryFormModalProps } from './AddSqlQueryFormModal.interface'; + +const AddSqlQueryFormModal = ({ + open, + onCancel, +}: AddSqlQueryFormModalProps) => { + const [form] = Form.useForm(); + const { t } = useTranslation(); + const { permissions } = usePermissionProvider(); + const { currentUser } = useApplicationStore(); + + const { testCase } = useTestCaseStore(); + const [isLoading, setIsLoading] = useState(true); + const [isSaving, setIsSaving] = useState(false); + const [table, setTable] = useState
(); + + const fetchTableData = async (entityFQN: string) => { + setIsLoading(true); + const tableFQN = getPartialNameFromTableFQN( + entityFQN, + [FqnPart.Service, FqnPart.Database, FqnPart.Schema, FqnPart.Table], + '.' + ); + try { + const response = await getTableDetailsByFQN(tableFQN); + form.setFieldsValue({ + table: response.fullyQualifiedName ?? tableFQN, + }); + setTable(response); + } catch (error) { + showErrorToast(error as AxiosError); + } finally { + setIsLoading(false); + } + }; + + const handleSubmit: FormProps['onFinish'] = async (values): Promise => { + setIsSaving(true); + const updatedValues: CreateQuery = { + description: values.description, + query: values.query ?? testCase?.inspectionQuery, + owners: [ + { + id: currentUser?.id ?? '', + type: OwnerType.USER, + }, + ], + queryUsedIn: [ + { + id: table?.id ?? '', + type: EntityType.TABLE, + }, + ], + queryDate: getCurrentMillis(), + service: getPartialNameFromTableFQN( + table?.fullyQualifiedName ?? testCase?.fullyQualifiedName ?? '', + [FqnPart.Service] + ), + }; + + try { + await postQuery(updatedValues); + showSuccessToast( + t('server.create-entity-success', { entity: t('label.query') }) + ); + onCancel(); + } catch (error) { + if ( + (error as AxiosError).response?.status === HTTP_STATUS_CODE.CONFLICT + ) { + showErrorToast( + t('server.entity-already-exist-message-without-name', { + entity: t('label.query'), + entityPlural: t('label.query-lowercase-plural'), + }) + ); + } else { + showErrorToast( + t('server.create-entity-error', { + entity: t('label.query-plural'), + }) + ); + } + } finally { + setIsSaving(false); + } + }; + + useEffect(() => { + if (testCase) { + fetchTableData(testCase?.entityFQN ?? ''); + form.setFieldsValue({ + query: testCase.inspectionQuery, + }); + } + }, [testCase]); + + return ( + + {isLoading ? ( + + ) : ( +
+ + + + + + + + + + + )} +
+ ); +}; + +export default AddSqlQueryFormModal; diff --git a/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/SqlQueryTab/AddSqlQueryFormModal/AddSqlQueryFormModal.interface.ts b/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/SqlQueryTab/AddSqlQueryFormModal/AddSqlQueryFormModal.interface.ts new file mode 100644 index 000000000000..ddefd6326ae2 --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/SqlQueryTab/AddSqlQueryFormModal/AddSqlQueryFormModal.interface.ts @@ -0,0 +1,17 @@ +/* + * Copyright 2025 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +export interface AddSqlQueryFormModalProps { + open: boolean; + onCancel: () => void; +} diff --git a/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/SqlQueryTab/SqlQueryTab.component.tsx b/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/SqlQueryTab/SqlQueryTab.component.tsx new file mode 100644 index 000000000000..ce95feedbbfa --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/SqlQueryTab/SqlQueryTab.component.tsx @@ -0,0 +1,106 @@ +/* + * Copyright 2025 Collate. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { Button, Col, Row } from 'antd'; +import { isEmpty, isUndefined } from 'lodash'; +import { useMemo, useState } from 'react'; +import { useTranslation } from 'react-i18next'; +import { useParams } from 'react-router-dom'; +import { ChangeDescription } from '../../../../generated/tests/testCase'; + +import { usePermissionProvider } from '../../../../context/PermissionProvider/PermissionProvider'; +import { useTestCaseStore } from '../../../../pages/IncidentManager/IncidentManagerDetailPage/useTestCase.store'; +import { + getChangedEntityNewValue, + getChangedEntityOldValue, + getChangedEntityStatus, + getDiffByFieldName, + getDiffDisplayValue, +} from '../../../../utils/EntityVersionUtils'; +import Loader from '../../../common/Loader/Loader'; +import QueryViewer from '../../../common/QueryViewer/QueryViewer.component'; +import '../TestCaseResultTab/test-case-result-tab.style.less'; +import AddSqlQueryFormModal from './AddSqlQueryFormModal/AddSqlQueryFormModal.component'; + +const SqlQueryTab = () => { + const { testCase, isLoading } = useTestCaseStore(); + const { version } = useParams<{ version: string }>(); + const isVersionPage = !isUndefined(version); + const { permissions } = usePermissionProvider(); + const { t } = useTranslation(); + const [isOpen, setIsOpen] = useState(false); + + const versionDiffInspectionQuery = useMemo(() => { + const diff = getDiffByFieldName( + 'inspectionQuery', + testCase?.changeDescription as ChangeDescription, + true + ); + + let oldValue = getChangedEntityOldValue(diff) ?? ''; + let newValue = getChangedEntityNewValue(diff) ?? ''; + if (isEmpty(oldValue) && isEmpty(newValue)) { + oldValue = testCase?.inspectionQuery ?? ''; + newValue = testCase?.inspectionQuery ?? ''; + } + const status = getChangedEntityStatus(oldValue, newValue); + + return ( +
+ {getDiffDisplayValue({ + oldValue, + newValue, + status, + })} +
+ ); + }, [testCase?.changeDescription, testCase?.inspectionQuery]); + + if (isLoading) { + return ; + } + + return ( + + {permissions.query?.Create && !isVersionPage && ( +
+ + + )} + + {isVersionPage ? ( + versionDiffInspectionQuery + ) : ( + + )} + + {isOpen && ( + + { + setIsOpen(false); + }} + /> + + )} + + ); +}; + +export default SqlQueryTab; diff --git a/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/TestCaseResultTab/TestCaseResultTabClassBase.ts b/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/TestCaseResultTab/TestCaseResultTabClassBase.ts index 66d1eeb18b6d..833d47562cbd 100644 --- a/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/TestCaseResultTab/TestCaseResultTabClassBase.ts +++ b/openmetadata-ui/src/main/resources/ui/src/components/DataQuality/IncidentManager/TestCaseResultTab/TestCaseResultTabClassBase.ts @@ -11,6 +11,7 @@ * limitations under the License. */ import { TestCase } from '../../../../generated/tests/testCase'; +import FailedTestCaseSampleData from '../FailedTestCaseSampleData/FailedTestCaseSampleData.component'; export interface AdditionalComponentInterface { id: string; @@ -21,7 +22,7 @@ class TestCaseResultTabClassBase { public getAdditionalComponents( _testCaseData?: TestCase ): Array { - return []; + return [{ id: 'failed-sample-data', Component: FailedTestCaseSampleData }]; } public getAlertBanner(): React.FC | null { diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ar-sa.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ar-sa.json index 874d66450392..a1e36ab257bc 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ar-sa.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ar-sa.json @@ -63,6 +63,7 @@ "add-row": "إضافة صف", "add-suggestion": "إضافة اقتراح", "add-term-boost": "إضافة تعزيز مصطلح", + "add-to-table": "إضافة إلى الجدول", "add-widget-plural": "إضافة أدوات", "add-workflow-agent": "إضافة وكيل {{workflow}}", "add-workflow-ingestion": "إضافة استيعاب {{workflow}}", @@ -773,6 +774,7 @@ "explore-domain": "استكشاف النطاق", "explore-metric-plural": "استكشاف المقاييس", "explore-now": "استكشاف الآن", + "explore-with-query": "استكشاف بالاستعلام", "export": "تصدير", "export-as-type": "تصدير كـ {{type}}", "export-entity": "تصدير {{entity}}", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/de-de.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/de-de.json index 0306c1630106..277ddcdc650a 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/de-de.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/de-de.json @@ -63,6 +63,7 @@ "add-row": "Zeile hinzufügen", "add-suggestion": "Vorschlag hinzufügen", "add-term-boost": "Begriffsverstärkung hinzufügen", + "add-to-table": "Zur Tabelle hinzufügen", "add-widget-plural": "Widgets hinzufügen", "add-workflow-agent": "{{workflow}} Agent hinzufügen", "add-workflow-ingestion": "Ingestion für {{workflow}} hinzufügen", @@ -773,6 +774,7 @@ "explore-domain": "Domäne erkunden", "explore-metric-plural": "Metriken Erkunden", "explore-now": "Jetzt erkunden", + "explore-with-query": "Mit Abfrage erkunden", "export": "Exportieren", "export-as-type": "Exportieren als {{type}}", "export-entity": "{{entity}} exportieren", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/en-us.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/en-us.json index 4b1973c6faec..22dbaa9cebb7 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/en-us.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/en-us.json @@ -63,6 +63,7 @@ "add-row": "Add Row", "add-suggestion": "Add suggestion", "add-term-boost": "Add Term Boost", + "add-to-table": "Add to Table", "add-widget-plural": "Add Widgets", "add-workflow-agent": "Add {{workflow}} Agent", "add-workflow-ingestion": "Add {{workflow}} Ingestion", @@ -773,6 +774,7 @@ "explore-domain": "Explore Domain", "explore-metric-plural": "Explore Metrics", "explore-now": "Explore Now", + "explore-with-query": "Explore with Query", "export": "Export", "export-as-type": "Export as {{type}}", "export-entity": "Export {{entity}}", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/es-es.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/es-es.json index cafbcdb5781b..010c261ed172 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/es-es.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/es-es.json @@ -63,6 +63,7 @@ "add-row": "Añadir Fila", "add-suggestion": "Añadir sugerencia", "add-term-boost": "Agregar Impulso de Término", + "add-to-table": "Agregar a la tabla", "add-widget-plural": "Agregar widgets", "add-workflow-agent": "Agregar {{workflow}} Agente", "add-workflow-ingestion": "Añadir proceso de {{workflow}}", @@ -773,6 +774,7 @@ "explore-domain": "Explorar dominio", "explore-metric-plural": "Explorar Métricas", "explore-now": "Explorar ahora", + "explore-with-query": "Explorar con consulta", "export": "Exportar", "export-as-type": "Exportar como {{type}}", "export-entity": "Exportar {{entity}}", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/fr-fr.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/fr-fr.json index b63553a48b82..567e2b6ab630 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/fr-fr.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/fr-fr.json @@ -63,6 +63,7 @@ "add-row": "Ajouter une Ligne", "add-suggestion": "Ajouter une suggestion", "add-term-boost": "Ajouter un Boost de Terme", + "add-to-table": "Ajouter à la table", "add-widget-plural": "Ajouter des widgets", "add-workflow-agent": "Ajouter l'Agent {{workflow}}", "add-workflow-ingestion": "Ajouter l'ingestion de {{workflow}}", @@ -773,6 +774,7 @@ "explore-domain": "Explorer le domaine", "explore-metric-plural": "Explorer les Métriques", "explore-now": "Explorer Maintenant", + "explore-with-query": "Explorer avec une requête", "export": "Exporter", "export-as-type": "Exporter en tant que {{type}}", "export-entity": "Exporter {{entity}}", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/gl-es.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/gl-es.json index a1526666446d..73410191d71a 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/gl-es.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/gl-es.json @@ -63,6 +63,7 @@ "add-row": "Engadir fila", "add-suggestion": "Engadir suxestión", "add-term-boost": "Engadir Impulso de Termo", + "add-to-table": "Engadir á táboa", "add-widget-plural": "Engadir widgets", "add-workflow-agent": "Engadir {{workflow}} Agente", "add-workflow-ingestion": "Engadir a inxestión de {{workflow}}", @@ -773,6 +774,7 @@ "explore-domain": "Explorar dominio", "explore-metric-plural": "Explorar Métricas", "explore-now": "Explorar agora", + "explore-with-query": "Explorar con consulta", "export": "Exportar", "export-as-type": "Exportar como {{type}}", "export-entity": "Exportar {{entity}}", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/he-he.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/he-he.json index 21d8e2fbcc74..19ac4762b96f 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/he-he.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/he-he.json @@ -63,6 +63,7 @@ "add-row": "הוסף שורה", "add-suggestion": "הוסף הצעה", "add-term-boost": "הוסף הגברת מונח", + "add-to-table": "הוסף לטבלה", "add-widget-plural": "הוסף ווידג'טים", "add-workflow-agent": "הוסף סוכן {{workflow}}", "add-workflow-ingestion": "הוסף הצפנת {{workflow}}", @@ -773,6 +774,7 @@ "explore-domain": "חקור דומיין", "explore-metric-plural": "חקור מדדים", "explore-now": "חקור עכשיו", + "explore-with-query": "חקור עם שאילתה", "export": "ייצא", "export-as-type": "ייצא כ {{type}}", "export-entity": "ייצא {{entity}}", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ja-jp.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ja-jp.json index 7606534a1077..bdbd8d75b307 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ja-jp.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ja-jp.json @@ -63,6 +63,7 @@ "add-row": "行を追加", "add-suggestion": "提案を追加", "add-term-boost": "用語ブーストを追加", + "add-to-table": "テーブルに追加", "add-widget-plural": "ウィジェットを追加", "add-workflow-agent": "{{workflow}} エージェントを追加", "add-workflow-ingestion": "{{workflow}} インジェストを追加", @@ -773,6 +774,7 @@ "explore-domain": "ドメインを探索", "explore-metric-plural": "メトリクスを探索", "explore-now": "今すぐ探索", + "explore-with-query": "クエリで探索", "export": "エクスポート", "export-as-type": "{{type}}としてエクスポート", "export-entity": "{{entity}} をエクスポート", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ko-kr.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ko-kr.json index 019da416d7b1..cbde9459be7a 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ko-kr.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ko-kr.json @@ -63,6 +63,7 @@ "add-row": "행 추가", "add-suggestion": "제안 추가", "add-term-boost": "용어 부스트 추가", + "add-to-table": "테이블에 추가", "add-widget-plural": "위젯 추가", "add-workflow-agent": "{{workflow}} 에이전트 추가", "add-workflow-ingestion": "{{workflow}} 수집 추가", @@ -773,6 +774,7 @@ "explore-domain": "도메인 탐색", "explore-metric-plural": "메트릭 탐색", "explore-now": "지금 탐색", + "explore-with-query": "쿼리로 탐색", "export": "내보내기", "export-as-type": "{{type}}(으)로 내보내기", "export-entity": "{{entity}} 내보내기", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/mr-in.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/mr-in.json index d34b90dbd684..395e4fcf451f 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/mr-in.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/mr-in.json @@ -63,6 +63,7 @@ "add-row": "पंक्ति जोडा", "add-suggestion": "प्रस्ताव जोडा", "add-term-boost": "टर्म बूस्ट जोडा", + "add-to-table": "सारणीमध्ये जोडा", "add-widget-plural": "विजेट्स जोडा", "add-workflow-agent": "{{workflow}} एजंट जोडा", "add-workflow-ingestion": "{{workflow}} अंतर्ग्रहण जोडा", @@ -773,6 +774,7 @@ "explore-domain": "डोमेन एक्सप्लोर करा", "explore-metric-plural": "मेट्रिक्स एक्सप्लोर करा", "explore-now": "आता अन्वेषण करा", + "explore-with-query": "क्वेरीसह शोधा", "export": "निर्यात करा", "export-as-type": "{{type}} म्हणून निर्यात करा", "export-entity": "{{entity}} निर्यात करा", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/nl-nl.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/nl-nl.json index af44588b7c7a..0be2320e0223 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/nl-nl.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/nl-nl.json @@ -63,6 +63,7 @@ "add-row": "Rij toevoegen", "add-suggestion": "Suggestie toevoegen", "add-term-boost": "Term Boost toevoegen", + "add-to-table": "Toevoegen aan tabel", "add-widget-plural": "Widgets toevoegen", "add-workflow-agent": "{{workflow}} Agent toevoegen", "add-workflow-ingestion": "{{workflow}} ingestie toevoegen", @@ -773,6 +774,7 @@ "explore-domain": "Domein verkennen", "explore-metric-plural": "Metrieken Verkennen", "explore-now": "Verken nu", + "explore-with-query": "Verkennen met query", "export": "Exporteren", "export-as-type": "Exporteren als {{type}}", "export-entity": "{{entity}} exporteren", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pr-pr.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pr-pr.json index 50070179c016..227f60eb0906 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pr-pr.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pr-pr.json @@ -63,6 +63,7 @@ "add-row": "Add Row", "add-suggestion": "اضافه کردن پیشنهاد", "add-term-boost": "تقویت مترادف اضافه کنید", + "add-to-table": "Add to Table", "add-widget-plural": "Add Widgets", "add-workflow-agent": "Agregar {{workflow}} Agente", "add-workflow-ingestion": "اضافه کردن گردش کار {{ workflow}}", @@ -773,6 +774,7 @@ "explore-domain": "کاوش دامنه", "explore-metric-plural": "کاوش متریک‌ها", "explore-now": "اکنون کاوش کنید", + "explore-with-query": "Explore with Query", "export": "صادر کردن", "export-as-type": "صادرات به عنوان {{type}}", "export-entity": "صادر کردن {{entity}}", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-br.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-br.json index 871377deaddd..5fb531bc0010 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-br.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-br.json @@ -63,6 +63,7 @@ "add-row": "Adicionar linha", "add-suggestion": "Adicionar sugestão", "add-term-boost": "Adicionar Impulso de Termo", + "add-to-table": "Adicionar à tabela", "add-widget-plural": "Adicionar widgets", "add-workflow-agent": "Agregar {{workflow}} Agente", "add-workflow-ingestion": "Adicionar Ingestão {{workflow}}", @@ -773,6 +774,7 @@ "explore-domain": "Explorar domínio", "explore-metric-plural": "Explorar Métricas", "explore-now": "Explorar Agora", + "explore-with-query": "Explorar com consulta", "export": "Exportar", "export-as-type": "Exportar como {{type}}", "export-entity": "Exportar {{entity}}", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-pt.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-pt.json index 0ce1feebdb26..a0dfef3df1ad 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-pt.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/pt-pt.json @@ -63,6 +63,7 @@ "add-row": "Adicionar Linha", "add-suggestion": "Adicionar sugestão", "add-term-boost": "Adicionar Impulso de Termo", + "add-to-table": "Adicionar à tabela", "add-widget-plural": "Adicionar widgets", "add-workflow-agent": "Adicionar Agente {{workflow}}", "add-workflow-ingestion": "Adicionar Ingestão {{workflow}}", @@ -773,6 +774,7 @@ "explore-domain": "Explorar domínio", "explore-metric-plural": "Explorar Métricas", "explore-now": "Explorar Agora", + "explore-with-query": "Explorar com consulta", "export": "Exportar", "export-as-type": "Exportar como {{type}}", "export-entity": "Exportar {{entity}}", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ru-ru.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ru-ru.json index 0421da495ac8..8a4afa2f9775 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/ru-ru.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/ru-ru.json @@ -63,6 +63,7 @@ "add-row": "Добавить строку", "add-suggestion": "Добавить предложение", "add-term-boost": "Добавить вес тега", + "add-to-table": "Добавить в таблицу", "add-widget-plural": "Добавить виджеты", "add-workflow-agent": "Добавить агента {{workflow}}", "add-workflow-ingestion": "Добавить {{workflow}}", @@ -773,6 +774,7 @@ "explore-domain": "Исследовать домен", "explore-metric-plural": "Исследовать Метрики", "explore-now": "Исследовать сейчас", + "explore-with-query": "Исследовать с запросом", "export": "Экспортировать", "export-as-type": "Экспортировать как {{type}}", "export-entity": "Экспортировать объект «{{entity}}»", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/th-th.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/th-th.json index 5467c8451512..95617bbd9bf1 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/th-th.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/th-th.json @@ -63,6 +63,7 @@ "add-row": "เพิ่มแถว", "add-suggestion": "เพิ่มข้อเสนอแนะ", "add-term-boost": "เพิ่มการเพิ่มคำศัพท์", + "add-to-table": "เพิ่มลงในตาราง", "add-widget-plural": "เพิ่มวิดเจ็ต", "add-workflow-agent": "เพิ่มเอเจนต์ {{workflow}}", "add-workflow-ingestion": "เพิ่มการนำเข้าความคิด {{workflow}}", @@ -773,6 +774,7 @@ "explore-domain": "สำรวจโดเมน", "explore-metric-plural": "สำรวจเมตริก", "explore-now": "สำรวจตอนนี้", + "explore-with-query": "สำรวจด้วยคิวรี", "export": "ส่งออก", "export-as-type": "ส่งออกเป็น {{type}}", "export-entity": "ส่งออก {{entity}}", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/tr-tr.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/tr-tr.json index e08c070a9521..8dc80dd06ca3 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/tr-tr.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/tr-tr.json @@ -63,6 +63,7 @@ "add-row": "Satır Ekle", "add-suggestion": "Öneri ekle", "add-term-boost": "Terim Desteği Ekle", + "add-to-table": "Tabloya ekle", "add-widget-plural": "Widget'lar ekle", "add-workflow-agent": "{{workflow}} Agent'ı Ekle", "add-workflow-ingestion": "{{workflow}} Alımı Ekle", @@ -773,6 +774,7 @@ "explore-domain": "Alan Adını Keşfet", "explore-metric-plural": "Metrikleri Keşfet", "explore-now": "Şimdi Keşfet", + "explore-with-query": "Sorgu ile keşfet", "export": "Dışa Aktar", "export-as-type": "{{type}} olarak dışa aktar", "export-entity": "{{entity}} Dışa Aktar", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-cn.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-cn.json index fa367b50f01c..400553f214de 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-cn.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-cn.json @@ -63,6 +63,7 @@ "add-row": "添加行", "add-suggestion": "添加建议", "add-term-boost": "添加术语提升", + "add-to-table": "添加到表", "add-widget-plural": "添加小部件", "add-workflow-agent": "添加{{workflow}}代理", "add-workflow-ingestion": "添加{{workflow}}提取", @@ -773,6 +774,7 @@ "explore-domain": "探索域", "explore-metric-plural": "探索指标", "explore-now": "现在探索", + "explore-with-query": "使用查询探索", "export": "导出", "export-as-type": "导出为 {{type}}", "export-entity": "导出{{entity}}", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-tw.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-tw.json index b216a9f15033..414559b64642 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-tw.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/zh-tw.json @@ -63,6 +63,7 @@ "add-row": "新增資料列", "add-suggestion": "新增建議", "add-term-boost": "新增詞彙權重", + "add-to-table": "新增至表格", "add-widget-plural": "新增小工具", "add-workflow-agent": "新增 {{workflow}} 代理程式", "add-workflow-ingestion": "新增 {{workflow}} 擷取", @@ -773,6 +774,7 @@ "explore-domain": "探索領域", "explore-metric-plural": "探索指標", "explore-now": "立即探索", + "explore-with-query": "使用查詢探索", "export": "匯出", "export-as-type": "匯出為 {{type}}", "export-entity": "匯出 {{entity}}", diff --git a/openmetadata-ui/src/main/resources/ui/src/pages/IncidentManager/IncidentManagerDetailPage/TestCaseClassBase.ts b/openmetadata-ui/src/main/resources/ui/src/pages/IncidentManager/IncidentManagerDetailPage/TestCaseClassBase.ts index 8bbb279461ab..3f0508f7f3f4 100644 --- a/openmetadata-ui/src/main/resources/ui/src/pages/IncidentManager/IncidentManagerDetailPage/TestCaseClassBase.ts +++ b/openmetadata-ui/src/main/resources/ui/src/pages/IncidentManager/IncidentManagerDetailPage/TestCaseClassBase.ts @@ -15,6 +15,7 @@ import TabsLabel from '../../../components/common/TabsLabel/TabsLabel.component' import { TabsLabelProps } from '../../../components/common/TabsLabel/TabsLabel.interface'; import { TestCaseFormType } from '../../../components/DataQuality/AddDataQualityTest/AddDataQualityTest.interface'; import DimensionalityTab from '../../../components/DataQuality/IncidentManager/DimensionalityTab/DimensionalityTab'; +import SqlQueryTab from '../../../components/DataQuality/IncidentManager/SqlQueryTab/SqlQueryTab.component'; import TestCaseIncidentTab from '../../../components/DataQuality/IncidentManager/TestCaseIncidentTab/TestCaseIncidentTab.component'; import TestCaseResultTab from '../../../components/DataQuality/IncidentManager/TestCaseResultTab/TestCaseResultTab.component'; import { TabSpecificField } from '../../../enums/entity.enum'; @@ -45,7 +46,7 @@ class TestCaseClassBase { isVersionPage: boolean, showDimensionalityTab = false ): TestCaseTabType[] { - return [ + const [firstTab, ...rest] = [ { LabelComponent: TabsLabel, labelProps: { @@ -84,6 +85,24 @@ class TestCaseClassBase { }, ]), ]; + + return [ + firstTab, + ...(this.showSqlQueryTab + ? [ + { + LabelComponent: TabsLabel, + labelProps: { + id: 'sql-query', + name: i18n.t('label.sql-uppercase-query'), + }, + Tab: SqlQueryTab, + key: TestCasePageTabs.SQL_QUERY, + }, + ] + : []), + ...rest, + ]; } setShowSqlQueryTab(showSqlQueryTab: boolean) { @@ -98,6 +117,7 @@ class TestCaseClassBase { TabSpecificField.OWNERS, TabSpecificField.INCIDENT_ID, TabSpecificField.TAGS, + 'inspectionQuery', ]; } diff --git a/openmetadata-ui/src/main/resources/ui/src/rest/testAPI.ts b/openmetadata-ui/src/main/resources/ui/src/rest/testAPI.ts index 91e0ef90d378..f5919e632a92 100644 --- a/openmetadata-ui/src/main/resources/ui/src/rest/testAPI.ts +++ b/openmetadata-ui/src/main/resources/ui/src/rest/testAPI.ts @@ -25,6 +25,7 @@ import { CreateTestDefinition } from '../generated/api/tests/createTestDefinitio import { CreateTestSuite } from '../generated/api/tests/createTestSuite'; import { DataQualityReport } from '../generated/tests/dataQualityReport'; import { + TableData, TestCase, TestCaseDimensionResult, TestCaseResult, @@ -473,3 +474,21 @@ export const exportTestCasesInCSV = async ( return response.data; }; + +export const getTestCaseFailedSampleData = async ( + id: string +): Promise => { + const response = await APIClient.get( + `${testCaseUrl}/${id}/failedRowsSample` + ); + + return response.data; +}; + +export const deleteTestCaseFailedSampleData = async (id: string) => { + const response = await APIClient.delete( + `${testCaseUrl}/${id}/failedRowsSample` + ); + + return response.data; +}; diff --git a/openmetadata-ui/src/main/resources/ui/vite.config.ts b/openmetadata-ui/src/main/resources/ui/vite.config.ts index 89192eb17234..9a02c03e3c6a 100644 --- a/openmetadata-ui/src/main/resources/ui/vite.config.ts +++ b/openmetadata-ui/src/main/resources/ui/vite.config.ts @@ -40,19 +40,19 @@ export default defineConfig(({ mode }) => { return html .replace( /(]*src=["'])(\.\/)?assets\//g, - '$1${basePath}assets/', + '$1${basePath}assets/' ) .replace( /(]*href=["'])(\.\/)?assets\//g, - '$1${basePath}assets/', + '$1${basePath}assets/' ) .replace( /(]*src=["'])(\.\/)?assets\//g, - '$1${basePath}assets/', + '$1${basePath}assets/' ) .replace( /(]*src=["'])(\.\/)?images\//g, - '$1${basePath}images/', + '$1${basePath}images/' ); }, }, @@ -92,7 +92,7 @@ export default defineConfig(({ mode }) => { antd: path.resolve(__dirname, 'node_modules/antd'), '@deuex-solutions/react-tour': path.resolve( __dirname, - 'node_modules/@deuex-solutions/react-tour/dist/reacttour.min.js', + 'node_modules/@deuex-solutions/react-tour/dist/reacttour.min.js' ), }, extensions: ['.ts', '.tsx', '.js', '.jsx', '.css', '.less', '.svg'],