Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from metadata.generated.schema.api.data.createChart import CreateChartRequest
from metadata.generated.schema.api.data.createDashboard import CreateDashboardRequest
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
from metadata.generated.schema.entity.data.chart import Chart
from metadata.generated.schema.entity.data.chart import Chart as LineageChart
from metadata.generated.schema.entity.data.dashboard import (
Dashboard as LineageDashboard,
)
Expand Down Expand Up @@ -225,7 +225,7 @@ def yield_dashboard(
FullyQualifiedEntityName(
fqn.build(
self.metadata,
entity_type=Chart,
entity_type=LineageChart,
service_name=self.context.get().dashboard_service,
chart_name=chart,
)
Expand Down Expand Up @@ -456,10 +456,24 @@ def _yield_lineage_from_query(
fqn=to_fqn,
)

chart_fqn = fqn.build(
self.metadata,
entity_type=LineageChart,
service_name=self.config.serviceName,
chart_name=str(chart_details.id),
)
chart_entity = self.metadata.get_by_name(
entity=LineageChart,
fqn=chart_fqn,
)
Comment on lines +459 to +468
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Performance: Chart entity lookup repeated inside source_tables loop

In _yield_lineage_from_query, the chart_fqn/chart_entity lookup (lines 459-468) is inside the for table in lineage_parser.source_tables: loop, but neither depends on table. This means get_by_name is called for the same chart entity on every loop iteration, making a redundant API call per source table.

The pre-existing to_entity (dashboard) lookup has the same issue, so this follows the existing pattern — but the new code doubles the number of redundant API calls per iteration.

Suggested fix:

Move both `to_entity` and `chart_entity` lookups above the `for table in lineage_parser.source_tables:` loop (before line 415), since they only depend on `dashboard_name` and `chart_details.id`, not on `table`.

Was this helpful? React with 👍 / 👎 | Reply gitar fix to apply this suggestion


for from_entity in from_entities or []:
yield self._get_add_lineage_request(
to_entity=to_entity, from_entity=from_entity
)
yield self._get_add_lineage_request(
to_entity=chart_entity, from_entity=from_entity
)

def _yield_lineage_from_api(
self,
Expand Down Expand Up @@ -535,7 +549,21 @@ def _yield_lineage_from_api(
fqn=to_fqn,
)

chart_fqn = fqn.build(
self.metadata,
entity_type=LineageChart,
service_name=self.config.serviceName,
chart_name=str(chart_details.id),
)
chart_entity = self.metadata.get_by_name(
entity=LineageChart,
fqn=chart_fqn,
)

for from_entity in from_entities or []:
yield self._get_add_lineage_request(
to_entity=to_entity, from_entity=from_entity
)
yield self._get_add_lineage_request(
to_entity=chart_entity, from_entity=from_entity
)
107 changes: 79 additions & 28 deletions ingestion/tests/unit/topology/dashboard/test_metabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from metadata.generated.schema.api.data.createChart import CreateChartRequest
from metadata.generated.schema.api.data.createDashboard import CreateDashboardRequest
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
from metadata.generated.schema.entity.data.chart import Chart as LineageChart
from metadata.generated.schema.entity.data.dashboard import (
Dashboard as LineageDashboard,
)
Expand Down Expand Up @@ -83,6 +84,14 @@
),
)

EXAMPLE_CHART = LineageChart(
id="a1b2c3d4-1234-5678-abcd-ef0123456789",
name="lineage_chart",
service=EntityReference(
id="c3eb265f-5445-4ad3-ba5e-797d3a3071bb", type="dashboardService"
),
)

EXAMPLE_TABLE = [
Table(
id="0bd6bd6f-7fea-4a98-98c7-3b37073629c7",
Expand Down Expand Up @@ -168,6 +177,20 @@
)
)

EXPECTED_CHART_LINEAGE = AddLineageRequest(
edge=EntitiesEdge(
fromEntity=EntityReference(
id="0bd6bd6f-7fea-4a98-98c7-3b37073629c7",
type="table",
),
toEntity=EntityReference(
id="a1b2c3d4-1234-5678-abcd-ef0123456789",
type="chart",
),
lineageDetails=LineageDetails(source=LineageSource.DashboardLineage),
)
)

MOCK_DASHBOARD_DETAILS = MetabaseDashboardDetails(
description="SAMPLE DESCRIPTION", name="test_db", id="1", card_ids=["1", "2", "3"]
)
Expand Down Expand Up @@ -280,7 +303,6 @@ def test_yield_dashboard(self):
self.assertEqual(EXPECTED_DASHBOARD, [res.right for res in results])

@patch.object(fqn, "build", return_value=None)
@patch.object(OpenMetadata, "get_by_name", return_value=EXAMPLE_DASHBOARD)
@patch.object(OpenMetadata, "search_in_any_service", return_value=EXAMPLE_TABLE)
@patch.object(
MetabaseSource, "_get_database_service", return_value=MOCK_DATABASE_SERVICE
Expand All @@ -294,35 +316,64 @@ def test_yield_lineage(self, *_):
schema="test_schema", display_name="test_table"
)

# if no db service name then no lineage generated
result = self.metabase.yield_dashboard_lineage_details(
dashboard_details=MOCK_DASHBOARD_DETAILS, db_service_prefix=None
)
self.assertEqual(next(result).right, EXPECTED_LINEAGE)

# test out _yield_lineage_from_api
mock_dashboard = deepcopy(MOCK_DASHBOARD_DETAILS)
mock_dashboard.card_ids = [MOCK_DASHBOARD_DETAILS.card_ids[0]]
result = self.metabase.yield_dashboard_lineage_details(
dashboard_details=mock_dashboard,
db_service_prefix=f"{MOCK_DATABASE_SERVICE.name}",
)
self.assertEqual(next(result).right, EXPECTED_LINEAGE)

# test out _yield_lineage_from_query
mock_dashboard.card_ids = [MOCK_DASHBOARD_DETAILS.card_ids[1]]
result = self.metabase.yield_dashboard_lineage_details(
dashboard_details=mock_dashboard,
db_service_prefix=f"{MOCK_DATABASE_SERVICE.name}",
)
self.assertEqual(next(result).right, EXPECTED_LINEAGE)
# _yield_lineage_from_api: get_by_name called for dashboard then chart
# _yield_lineage_from_query: get_by_name called for dashboard then chart
# Total for MOCK_DASHBOARD_DETAILS (cards 1, 2): 4 calls → dashboard, chart, dashboard, chart
with patch.object(
OpenMetadata,
"get_by_name",
side_effect=[
EXAMPLE_DASHBOARD,
EXAMPLE_CHART,
EXAMPLE_DASHBOARD,
EXAMPLE_CHART,
],
):
result = self.metabase.yield_dashboard_lineage_details(
dashboard_details=MOCK_DASHBOARD_DETAILS, db_service_prefix=None
)
lineage_results = [r.right for r in result if r.right is not None]
self.assertIn(EXPECTED_LINEAGE, lineage_results)
self.assertIn(EXPECTED_CHART_LINEAGE, lineage_results)

# test out _yield_lineage_from_api (card 1 only): 2 calls → dashboard, chart
with patch.object(
OpenMetadata,
"get_by_name",
side_effect=[EXAMPLE_DASHBOARD, EXAMPLE_CHART],
):
mock_dashboard = deepcopy(MOCK_DASHBOARD_DETAILS)
mock_dashboard.card_ids = [MOCK_DASHBOARD_DETAILS.card_ids[0]]
result = self.metabase.yield_dashboard_lineage_details(
dashboard_details=mock_dashboard,
db_service_prefix=f"{MOCK_DATABASE_SERVICE.name}",
)
lineage_results = [r.right for r in result if r.right is not None]
self.assertIn(EXPECTED_LINEAGE, lineage_results)
self.assertIn(EXPECTED_CHART_LINEAGE, lineage_results)

# test out _yield_lineage_from_query (card 2 only): 2 calls → dashboard, chart
with patch.object(
OpenMetadata,
"get_by_name",
side_effect=[EXAMPLE_DASHBOARD, EXAMPLE_CHART],
):
mock_dashboard.card_ids = [MOCK_DASHBOARD_DETAILS.card_ids[1]]
result = self.metabase.yield_dashboard_lineage_details(
dashboard_details=mock_dashboard,
db_service_prefix=f"{MOCK_DATABASE_SERVICE.name}",
)
lineage_results = [r.right for r in result if r.right is not None]
self.assertIn(EXPECTED_LINEAGE, lineage_results)
self.assertIn(EXPECTED_CHART_LINEAGE, lineage_results)

# test out if no query type
mock_dashboard.card_ids = [MOCK_DASHBOARD_DETAILS.card_ids[2]]
result = self.metabase.yield_dashboard_lineage_details(
dashboard_details=mock_dashboard, db_service_prefix="db.service.name"
)
self.assertEqual(list(result), [])
with patch.object(OpenMetadata, "get_by_name", return_value=EXAMPLE_DASHBOARD):
mock_dashboard.card_ids = [MOCK_DASHBOARD_DETAILS.card_ids[2]]
result = self.metabase.yield_dashboard_lineage_details(
dashboard_details=mock_dashboard, db_service_prefix="db.service.name"
)
self.assertEqual(list(result), [])

def test_include_owners_flag_enabled(self):
"""
Expand Down
Loading