From d82c572c5bc74e8dfbc85ebf1c43bebe5530780f Mon Sep 17 00:00:00 2001 From: Br1an67 <932039080@qq.com> Date: Mon, 2 Mar 2026 01:12:16 +0800 Subject: [PATCH] fix: preserve nodes with unassigned community level in filtering Modify _filter_under_community_level to retain nodes where level is NaN, which represent isolated nodes not assigned to any community by the Leiden algorithm. Previously these nodes were silently dropped because NaN comparisons always evaluate to False in pandas. Fixes #1808 --- .../graphrag/query/indexer_adapters.py | 2 +- .../unit/query/test_filter_community_level.py | 42 +++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 tests/unit/query/test_filter_community_level.py diff --git a/packages/graphrag/graphrag/query/indexer_adapters.py b/packages/graphrag/graphrag/query/indexer_adapters.py index 7119ad842c..a6ded1f9f7 100644 --- a/packages/graphrag/graphrag/query/indexer_adapters.py +++ b/packages/graphrag/graphrag/query/indexer_adapters.py @@ -221,5 +221,5 @@ def _filter_under_community_level( ) -> pd.DataFrame: return cast( "pd.DataFrame", - df[df.level <= community_level], + df[(df.level <= community_level) | df.level.isna()], ) diff --git a/tests/unit/query/test_filter_community_level.py b/tests/unit/query/test_filter_community_level.py new file mode 100644 index 0000000000..5410edc3f2 --- /dev/null +++ b/tests/unit/query/test_filter_community_level.py @@ -0,0 +1,42 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +"""Tests for _filter_under_community_level preserving NaN-level nodes.""" + +import numpy as np +import pandas as pd + +from graphrag.query.indexer_adapters import _filter_under_community_level + + +def test_filter_preserves_nan_level_nodes(): + """Nodes with level=NaN should not be discarded by the filter. + + Regression test for issue #1808 where isolated nodes without a community + assignment (level=None) were incorrectly dropped. + """ + df = pd.DataFrame({ + "id": ["a", "b", "c", "d"], + "level": [0, 1, 2, np.nan], + "community": [1, 2, 3, np.nan], + }) + + result = _filter_under_community_level(df, community_level=1) + + # Should keep level 0, 1 (<=1) and NaN (unassigned) + assert len(result) == 3 + assert set(result["id"].tolist()) == {"a", "b", "d"} + + +def test_filter_excludes_higher_level_nodes(): + """Nodes with level > community_level should be excluded.""" + df = pd.DataFrame({ + "id": ["a", "b", "c"], + "level": [0, 2, 3], + "community": [1, 2, 3], + }) + + result = _filter_under_community_level(df, community_level=1) + + assert len(result) == 1 + assert result["id"].tolist() == ["a"]