Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions dowhy/causal_estimators/two_stage_regression_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,17 +116,19 @@ def __init__(
modified_target_estimand.identifier_method = "backdoor"
modified_target_estimand.backdoor_variables = self._target_estimand.mediation_second_stage_confounders
if second_stage_model is not None:
self._second_stage_model = (
second_stage_model
if isinstance(second_stage_model, CausalEstimator)
else second_stage_model(
if isinstance(second_stage_model, CausalEstimator):
self._second_stage_model = second_stage_model
# Update the estimand so the second-stage model uses the correct
# backdoor configuration rather than the original mediation estimand.
self._second_stage_model._target_estimand = modified_target_estimand
else:
self._second_stage_model = second_stage_model(
Comment on lines 118 to +125
Copy link

Copilot AI Apr 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pre-instantiated-model handling was fixed for second_stage_model, but first_stage_model still accepts a pre-instantiated CausalEstimator without updating its _target_estimand to the backdoor-modified estimand. If a user passes a pre-instantiated RegressionEstimator (e.g., GeneralizedLinearModelEstimator) for the first stage with a mediation estimand, RegressionEstimator.fit() will raise (identifier_method='mediation') or hit the same default_backdoor_id=None issue. Consider applying the same pattern used below for second_stage_model to the first_stage_model branch as well (i.e., set self._first_stage_model._target_estimand to the first-stage modified_target_estimand).

Copilot uses AI. Check for mistakes.
modified_target_estimand,
test_significance=self._significance_test,
evaluate_effect_strength=self._effect_strength_eval,
confidence_intervals=self._confidence_intervals,
**kwargs,
)
)
else:
self._second_stage_model = self.__class__.DEFAULT_SECOND_STAGE_MODEL(
modified_target_estimand,
Expand Down
57 changes: 51 additions & 6 deletions tests/causal_estimators/test_two_stage_regression_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,7 @@ def test_frontdoor_estimator(self):
target "X"
]
]
""".replace(
"\n", ""
)
""".replace("\n", "")

N_SAMPLES = 10000
# Generate the data
Expand Down Expand Up @@ -209,9 +207,7 @@ def _make_mediation_data(n=2000, seed=42):
edge [ source "X" target "Y" ]
edge [ source "M" target "Y" ]
]
""".replace(
"\n", " "
)
""".replace("\n", " ")


class TestTwoStageRegressionMediationNIE:
Expand Down Expand Up @@ -316,3 +312,52 @@ def test_nde_estimand_uses_correct_backdoor_variables(self):
nde_estimand = estimator._second_stage_model_nde._target_estimand
assert nde_estimand.identifier_method == "backdoor"
assert nde_estimand.backdoor_variables == estimand.mediation_second_stage_confounders


class TestTwoStageRegressionPreinstantiatedSecondStage:
"""Regression tests for #1335: KeyError when second_stage_model is a pre-instantiated CausalEstimator.

When a user passes an already-constructed estimator instance as second_stage_model,
the TwoStageRegressionEstimator must update its _target_estimand to use the
modified (backdoor) estimand rather than the original mediation estimand.
"""

def test_nie_with_preinstantiated_second_stage_no_keyerror(self):
"""Passing a pre-instantiated second_stage_model must not raise KeyError."""
import statsmodels.api as sm

from dowhy.causal_estimators.generalized_linear_model_estimator import GeneralizedLinearModelEstimator

df = _make_mediation_data()
model = CausalModel(data=df, treatment="X", outcome="Y", graph=_MEDIATION_GML)
estimand = model.identify_effect(
estimand_type=EstimandType.NONPARAMETRIC_NIE,
proceed_when_unidentifiable=True,
)
second_stage = GeneralizedLinearModelEstimator(identified_estimand=estimand, glm_family=sm.families.Gaussian())
# This must not raise KeyError: None
estimate = model.estimate_effect(
identified_estimand=estimand,
method_name="mediation.two_stage_regression",
method_params={"second_stage_model": second_stage},
)
assert np.isfinite(estimate.value)

def test_nie_preinstantiated_second_stage_estimand_updated(self):
"""The pre-instantiated second_stage_model's _target_estimand is updated to backdoor."""
import statsmodels.api as sm

from dowhy.causal_estimators.generalized_linear_model_estimator import GeneralizedLinearModelEstimator

df = _make_mediation_data()
model = CausalModel(data=df, treatment="X", outcome="Y", graph=_MEDIATION_GML)
estimand = model.identify_effect(
estimand_type=EstimandType.NONPARAMETRIC_NIE,
proceed_when_unidentifiable=True,
)
second_stage = GeneralizedLinearModelEstimator(identified_estimand=estimand, glm_family=sm.families.Gaussian())
estimator = TwoStageRegressionEstimator(
identified_estimand=estimand,
second_stage_model=second_stage,
)
assert estimator._second_stage_model._target_estimand.identifier_method == "backdoor"
Comment on lines +329 to +367
Copy link

Copilot AI Apr 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new regression coverage exercises a pre-instantiated second_stage_model, but the same failure mode can occur when first_stage_model is passed as a pre-instantiated estimator instance (its _target_estimand isn’t currently rewritten to the backdoor-modified estimand). Adding a test that passes a pre-instantiated first-stage estimator (and asserts no exception / correct identifier_method) would help prevent a partial regression fix.

Copilot uses AI. Check for mistakes.
Loading