From 1137f5adb63d7c83da0e4bf56c157679a5fa8863 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 13:30:14 +0000 Subject: [PATCH 1/2] fix: update second-stage model estimand when pre-instantiated in TwoStageRegression (closes #1335) When second_stage_model is passed as a pre-instantiated CausalEstimator, the estimator's _target_estimand was never updated to modified_target_estimand (which has identifier_method='backdoor' and the correct backdoor_variables). Instead the original mediation estimand (with identifier_method='mediation', default_backdoor_id=None) was used, causing KeyError: None when the second-stage model called get_backdoor_variables() during fit(). Fix: explicitly update _target_estimand to modified_target_estimand when a pre-instantiated CausalEstimator is supplied. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: github-actions[bot] --- .../two_stage_regression_estimator.py | 12 ++-- .../test_two_stage_regression_estimator.py | 57 +++++++++++++++++-- 2 files changed, 58 insertions(+), 11 deletions(-) diff --git a/dowhy/causal_estimators/two_stage_regression_estimator.py b/dowhy/causal_estimators/two_stage_regression_estimator.py index 1ca1c8b67..a058571c4 100644 --- a/dowhy/causal_estimators/two_stage_regression_estimator.py +++ b/dowhy/causal_estimators/two_stage_regression_estimator.py @@ -116,17 +116,19 @@ def __init__( modified_target_estimand.identifier_method = "backdoor" modified_target_estimand.backdoor_variables = self._target_estimand.mediation_second_stage_confounders if second_stage_model is not None: - self._second_stage_model = ( - second_stage_model - if isinstance(second_stage_model, CausalEstimator) - else second_stage_model( + if isinstance(second_stage_model, CausalEstimator): + self._second_stage_model = second_stage_model + # Update the estimand so the second-stage model uses the correct + # backdoor configuration rather than the original mediation estimand. + self._second_stage_model._target_estimand = modified_target_estimand + else: + self._second_stage_model = second_stage_model( modified_target_estimand, test_significance=self._significance_test, evaluate_effect_strength=self._effect_strength_eval, confidence_intervals=self._confidence_intervals, **kwargs, ) - ) else: self._second_stage_model = self.__class__.DEFAULT_SECOND_STAGE_MODEL( modified_target_estimand, diff --git a/tests/causal_estimators/test_two_stage_regression_estimator.py b/tests/causal_estimators/test_two_stage_regression_estimator.py index c27b21c49..78b53ef59 100644 --- a/tests/causal_estimators/test_two_stage_regression_estimator.py +++ b/tests/causal_estimators/test_two_stage_regression_estimator.py @@ -124,9 +124,7 @@ def test_frontdoor_estimator(self): target "X" ] ] - """.replace( - "\n", "" - ) + """.replace("\n", "") N_SAMPLES = 10000 # Generate the data @@ -209,9 +207,7 @@ def _make_mediation_data(n=2000, seed=42): edge [ source "X" target "Y" ] edge [ source "M" target "Y" ] ] -""".replace( - "\n", " " -) +""".replace("\n", " ") class TestTwoStageRegressionMediationNIE: @@ -316,3 +312,52 @@ def test_nde_estimand_uses_correct_backdoor_variables(self): nde_estimand = estimator._second_stage_model_nde._target_estimand assert nde_estimand.identifier_method == "backdoor" assert nde_estimand.backdoor_variables == estimand.mediation_second_stage_confounders + + +class TestTwoStageRegressionPreinstantiatedSecondStage: + """Regression tests for #1335: KeyError when second_stage_model is a pre-instantiated CausalEstimator. + + When a user passes an already-constructed estimator instance as second_stage_model, + the TwoStageRegressionEstimator must update its _target_estimand to use the + modified (backdoor) estimand rather than the original mediation estimand. + """ + + def test_nie_with_preinstantiated_second_stage_no_keyerror(self): + """Passing a pre-instantiated second_stage_model must not raise KeyError.""" + import statsmodels.api as sm + + from dowhy.causal_estimators.generalized_linear_model_estimator import GeneralizedLinearModelEstimator + + df = _make_mediation_data() + model = CausalModel(data=df, treatment="X", outcome="Y", graph=_MEDIATION_GML) + estimand = model.identify_effect( + estimand_type=EstimandType.NONPARAMETRIC_NIE, + proceed_when_unidentifiable=True, + ) + second_stage = GeneralizedLinearModelEstimator(identified_estimand=estimand, glm_family=sm.families.Gaussian()) + # This must not raise KeyError: None + estimate = model.estimate_effect( + identified_estimand=estimand, + method_name="mediation.two_stage_regression", + method_params={"second_stage_model": second_stage}, + ) + assert np.isfinite(estimate.value) + + def test_nie_preinstantiated_second_stage_estimand_updated(self): + """The pre-instantiated second_stage_model's _target_estimand is updated to backdoor.""" + import statsmodels.api as sm + + from dowhy.causal_estimators.generalized_linear_model_estimator import GeneralizedLinearModelEstimator + + df = _make_mediation_data() + model = CausalModel(data=df, treatment="X", outcome="Y", graph=_MEDIATION_GML) + estimand = model.identify_effect( + estimand_type=EstimandType.NONPARAMETRIC_NIE, + proceed_when_unidentifiable=True, + ) + second_stage = GeneralizedLinearModelEstimator(identified_estimand=estimand, glm_family=sm.families.Gaussian()) + estimator = TwoStageRegressionEstimator( + identified_estimand=estimand, + second_stage_model=second_stage, + ) + assert estimator._second_stage_model._target_estimand.identifier_method == "backdoor" From 5ee3d4f06bbfe1c5ef71df881ec992f712d8d1af Mon Sep 17 00:00:00 2001 From: Emre Kiciman Date: Sun, 19 Apr 2026 02:01:49 -0700 Subject: [PATCH 2/2] fix formatting Signed-off-by: Emre Kiciman --- .../test_two_stage_regression_estimator.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/causal_estimators/test_two_stage_regression_estimator.py b/tests/causal_estimators/test_two_stage_regression_estimator.py index 78b53ef59..3174e9712 100644 --- a/tests/causal_estimators/test_two_stage_regression_estimator.py +++ b/tests/causal_estimators/test_two_stage_regression_estimator.py @@ -124,7 +124,9 @@ def test_frontdoor_estimator(self): target "X" ] ] - """.replace("\n", "") + """.replace( + "\n", "" + ) N_SAMPLES = 10000 # Generate the data @@ -207,7 +209,9 @@ def _make_mediation_data(n=2000, seed=42): edge [ source "X" target "Y" ] edge [ source "M" target "Y" ] ] -""".replace("\n", " ") +""".replace( + "\n", " " +) class TestTwoStageRegressionMediationNIE: