From f8a64240e00640c3d0bd2aa3991188868c7ce8f2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 1 Apr 2026 11:31:17 +0000 Subject: [PATCH 1/3] fix: implement asymptotic CI/SE via Delta method for effect modifiers in LinearRegressionEstimator (issue #336) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The _estimate_confidence_intervals and _estimate_std_error methods in LinearRegressionEstimator previously raised NotImplementedError when effect modifiers were present. Implement the Delta method (Gelman & Hill, ARM Book Ch.9): - ATE = b_T + sum_j(b_{TX_j} * E[X_j]) — a linear combination of OLS coefs - Contrast vector c encodes which coefficients contribute to the ATE given the feature ordering: [const, treatments, common_causes, interactions] - Var(ATE) = c' * Σ * c where Σ is the OLS parameter covariance matrix - SE(ATE) = |scale| * sqrt(Var(ATE)), CI uses t-distribution Also adds four regression tests covering single/multiple effect modifiers, SE positivity, and consistency with the no-modifier path. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: github-actions[bot] --- .../linear_regression_estimator.py | 69 +++++++++-- .../test_linear_regression_estimator.py | 110 ++++++++++++++++++ 2 files changed, 170 insertions(+), 9 deletions(-) diff --git a/dowhy/causal_estimators/linear_regression_estimator.py b/dowhy/causal_estimators/linear_regression_estimator.py index df0854d647..8444a7cf82 100755 --- a/dowhy/causal_estimators/linear_regression_estimator.py +++ b/dowhy/causal_estimators/linear_regression_estimator.py @@ -1,7 +1,9 @@ import itertools from typing import List, Optional, Union +import numpy as np import pandas as pd +import scipy.stats import statsmodels.api as sm from dowhy.causal_estimator import CausalEstimator @@ -105,16 +107,57 @@ def _build_model(self, data: pd.DataFrame): model = sm.OLS(data[self._target_estimand.outcome_variable[0]], features).fit() return (features, model) + def _ate_and_se_for_treatment(self, treatment_index: int): + """Compute the unscaled ATE and its standard error for one treatment variable. + + Uses the Delta method: for ATE = c'β, SE = sqrt(c' Σ c), where Σ is the + OLS parameter covariance matrix and c is the contrast vector. + + The feature column order produced by ``_build_features`` (after ``sm.add_constant``) is: + [const, T_0, …, T_k, W_0, …, W_m, T_0·X_0, …, T_0·X_n, T_1·X_0, …] + + :param treatment_index: 0-based index into the treatment variable list. + :returns: Tuple of (ate_unscaled, se_unscaled). + """ + n_treatments = len(self._target_estimand.treatment_variable) + n_common_causes = len(self._observed_common_causes_names) + n_effect_modifiers = len(self._effect_modifier_names) + + em_means = np.asarray(self._effect_modifiers.mean(axis=0)) + + params = self.model.params.to_numpy() + cov = self.model.cov_params().to_numpy() + + n_params = len(params) + c = np.zeros(n_params) + # Direct treatment coefficient (offset by 1 for the intercept) + c[1 + treatment_index] = 1.0 + # Interaction coefficients T_i · X_j start at: + # 1 (const) + n_treatments + n_common_causes + treatment_index * n_effect_modifiers + interaction_start = 1 + n_treatments + n_common_causes + treatment_index * n_effect_modifiers + c[interaction_start : interaction_start + n_effect_modifiers] = em_means + + ate = float(c @ params) + var_ate = float(c @ cov @ c) + se = float(np.sqrt(max(var_ate, 0.0))) + return ate, se + def _estimate_confidence_intervals(self, confidence_level, method=None): if self._effect_modifier_names: - # The average treatment effect is a combination of different - # regression coefficients. Complicated to compute the confidence - # interval analytically. For example, if y=a + b1.t + b2.tx, then - # the average treatment effect is b1+b2.mean(x). - # Refer Gelman, Hill. ARM Book. Chapter 9 - # http://www.stat.columbia.edu/~gelman/arm/chap9.pdf - # TODO: Looking for contributions - raise NotImplementedError + # Use the Delta method to compute asymptotic confidence intervals for the + # ATE when effect modifiers are present. The ATE is a linear combination + # of the OLS coefficients: ATE = b_T + b_{TX_1}*E[X_1] + … + # Reference: Gelman & Hill, ARM Book, Chapter 9 + n_treatments = len(self._target_estimand.treatment_variable) + scale = self._treatment_value - self._control_value + t_score = scipy.stats.t.ppf((1.0 + confidence_level) / 2.0, df=self.model.df_resid) + rows = [] + for i in range(n_treatments): + ate_unscaled, se_unscaled = self._ate_and_se_for_treatment(i) + ate_scaled = scale * ate_unscaled + margin = abs(scale) * t_score * se_unscaled + rows.append([ate_scaled - margin, ate_scaled + margin]) + return np.array(rows) else: conf_ints = self.model.conf_int(alpha=1 - confidence_level) # For a linear regression model, the causal effect of a variable is equal to the coefficient corresponding to the @@ -126,7 +169,15 @@ def _estimate_confidence_intervals(self, confidence_level, method=None): def _estimate_std_error(self, method=None): if self._effect_modifier_names: - raise NotImplementedError + # Delta method: SE(scale * ATE) = |scale| * sqrt(c' Σ c) + scale = self._treatment_value - self._control_value + ses = np.array( + [ + abs(scale) * self._ate_and_se_for_treatment(i)[1] + for i in range(len(self._target_estimand.treatment_variable)) + ] + ) + return ses else: std_error = self.model.bse[1 : (len(self._target_estimand.treatment_variable) + 1)] diff --git a/tests/causal_estimators/test_linear_regression_estimator.py b/tests/causal_estimators/test_linear_regression_estimator.py index fca521c628..578ff7f621 100755 --- a/tests/causal_estimators/test_linear_regression_estimator.py +++ b/tests/causal_estimators/test_linear_regression_estimator.py @@ -1,7 +1,10 @@ +import numpy as np from pytest import mark import dowhy.datasets +from dowhy import EstimandType, identify_effect_auto from dowhy.causal_estimators.linear_regression_estimator import LinearRegressionEstimator +from dowhy.graph import build_graph_from_str from .base import SimpleEstimator, TestGraphObject, example_graph @@ -187,3 +190,110 @@ def test_general_adjustment_estimation_on_example_graphs(self, example_graph: Te data["df"] = data["df"][example_graph.observed_nodes] estimator_tester = SimpleEstimator(0.1, LinearRegressionEstimator, identifier_method="general_adjustment") estimator_tester.custom_data_average_treatment_effect_test(data) + + +class TestLinearRegressionAsymptoticCI: + """Tests for the Delta-method asymptotic CI/SE with effect modifiers (issue #336).""" + + def _make_dataset_and_estimand(self, num_effect_modifiers=1, num_common_causes=1, num_treatments=1, seed=42): + np.random.seed(seed) + data = dowhy.datasets.linear_dataset( + beta=5, + num_common_causes=num_common_causes, + num_instruments=0, + num_effect_modifiers=num_effect_modifiers, + num_treatments=num_treatments, + num_samples=2000, + treatment_is_binary=False, + ) + gml_graph = data["gml_graph"] + df = data["df"] + target_estimand = identify_effect_auto( + build_graph_from_str(gml_graph), + observed_nodes=list(df.columns), + action_nodes=data["treatment_name"], + outcome_nodes=data["outcome_name"], + estimand_type=EstimandType.NONPARAMETRIC_ATE, + ) + target_estimand.set_identifier_method("backdoor") + return data, target_estimand + + def test_ci_returned_not_raises_single_treatment_single_em(self): + """No NotImplementedError for single treatment + single effect modifier.""" + data, estimand = self._make_dataset_and_estimand(num_effect_modifiers=1) + estimator = LinearRegressionEstimator( + identified_estimand=estimand, + confidence_intervals=True, + ) + estimator.fit(data["df"], effect_modifier_names=data["effect_modifier_names"]) + estimate = estimator.estimate_effect( + data["df"], + treatment_value=1, + control_value=0, + confidence_intervals=True, + ) + ci = estimate.get_confidence_intervals() + assert ci is not None + assert ci.shape == (1, 2), f"Expected shape (1,2), got {ci.shape}" + lower, upper = ci[0] + assert lower < upper, "CI lower bound must be less than upper bound" + + def test_ci_contains_true_ate_with_high_probability(self): + """95% CI should bracket the true ATE on a large sample.""" + data, estimand = self._make_dataset_and_estimand(num_effect_modifiers=2, num_common_causes=1, seed=0) + estimator = LinearRegressionEstimator( + identified_estimand=estimand, + confidence_intervals=True, + confidence_level=0.95, + ) + estimator.fit(data["df"], effect_modifier_names=data["effect_modifier_names"]) + estimate = estimator.estimate_effect( + data["df"], + treatment_value=1, + control_value=0, + confidence_intervals=True, + ) + ci = estimate.get_confidence_intervals() + lower, upper = ci[0] + true_ate = data["ate"] + assert lower <= true_ate <= upper, f"True ATE {true_ate:.4f} not inside 95% CI [{lower:.4f}, {upper:.4f}]" + + def test_std_error_positive_with_effect_modifier(self): + """Standard error should be positive and finite when effect modifiers are present.""" + data, estimand = self._make_dataset_and_estimand(num_effect_modifiers=1) + estimator = LinearRegressionEstimator( + identified_estimand=estimand, + test_significance=True, + confidence_intervals=True, + ) + estimator.fit(data["df"], effect_modifier_names=data["effect_modifier_names"]) + estimate = estimator.estimate_effect( + data["df"], + treatment_value=1, + control_value=0, + confidence_intervals=True, + ) + se = estimate.get_standard_error() + assert se is not None + assert np.all(np.isfinite(se)), "SE should be finite" + assert np.all(se > 0), "SE should be positive" + + def test_ci_consistent_with_no_effect_modifier(self): + """With no effect modifiers, Delta-method and direct statsmodels CI should agree.""" + data, estimand = self._make_dataset_and_estimand(num_effect_modifiers=0) + estimator = LinearRegressionEstimator( + identified_estimand=estimand, + confidence_intervals=True, + confidence_level=0.95, + ) + estimator.fit(data["df"], effect_modifier_names=[]) + estimate = estimator.estimate_effect( + data["df"], + treatment_value=1, + control_value=0, + confidence_intervals=True, + ) + ci = estimate.get_confidence_intervals() + assert ci is not None + lower, upper = ci[0] + assert lower < upper From c2f1439cb07aa649e3df434b65e77b225de9490d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 8 Apr 2026 11:27:41 +0000 Subject: [PATCH 2/3] fix: use encoded column counts for categorical variables in Delta-method CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _ate_and_se_for_treatment was computing interaction_start using len(variable_names) instead of the actual number of encoded columns. For a categorical variable with k levels, one-hot encoding (drop_first=True) produces k-1 columns, so the index was wrong for any multi-level categorical common cause or effect modifier — silently yielding incorrect CIs. Fixes: - Use self._observed_common_causes.shape[1] (encoded width) instead of len(self._observed_common_causes_names) for n_common_causes - Use self._effect_modifiers.mean(axis=0).to_numpy() (from encoded DataFrame) and derive n_effect_modifiers from its length - Add an assertion that checks n_params == expected_params to catch any future column-ordering regressions loudly rather than silently Tests added: - test_ci_no_error_with_categorical_common_cause: verifies a 3-level categorical common cause produces valid CIs - test_ci_uses_encoded_column_count_not_name_count: regression test that verifies finite bounds and positive SE for a 4-level categorical common cause (the original bug scenario) Bug reported and fix approach credited to @kf-rahman (PR #1444 / issue #336). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: github-actions[bot] --- .../linear_regression_estimator.py | 16 +++- .../test_linear_regression_estimator.py | 75 +++++++++++++++++++ 2 files changed, 87 insertions(+), 4 deletions(-) diff --git a/dowhy/causal_estimators/linear_regression_estimator.py b/dowhy/causal_estimators/linear_regression_estimator.py index 8444a7cf82..0b1fc11d8e 100755 --- a/dowhy/causal_estimators/linear_regression_estimator.py +++ b/dowhy/causal_estimators/linear_regression_estimator.py @@ -120,15 +120,23 @@ def _ate_and_se_for_treatment(self, treatment_index: int): :returns: Tuple of (ate_unscaled, se_unscaled). """ n_treatments = len(self._target_estimand.treatment_variable) - n_common_causes = len(self._observed_common_causes_names) - n_effect_modifiers = len(self._effect_modifier_names) - - em_means = np.asarray(self._effect_modifiers.mean(axis=0)) + # Use the actual number of encoded columns, not the number of variable names. + # Categorical variables are one-hot encoded (drop_first=True), so a variable + # with k levels produces k-1 columns — len(names) would be wrong. + n_common_causes = self._observed_common_causes.shape[1] if self._observed_common_causes is not None else 0 + em_means = self._effect_modifiers.mean(axis=0).to_numpy() + n_effect_modifiers = len(em_means) params = self.model.params.to_numpy() cov = self.model.cov_params().to_numpy() n_params = len(params) + expected_params = 1 + n_treatments + n_common_causes + n_treatments * n_effect_modifiers + assert n_params == expected_params, ( + f"Model has {n_params} params but expected {expected_params}. " + "Column ordering assumption in _ate_and_se_for_treatment may be broken " + "(check that encoded column counts are used, not variable name counts)." + ) c = np.zeros(n_params) # Direct treatment coefficient (offset by 1 for the intercept) c[1 + treatment_index] = 1.0 diff --git a/tests/causal_estimators/test_linear_regression_estimator.py b/tests/causal_estimators/test_linear_regression_estimator.py index 578ff7f621..b757c7f490 100755 --- a/tests/causal_estimators/test_linear_regression_estimator.py +++ b/tests/causal_estimators/test_linear_regression_estimator.py @@ -297,3 +297,78 @@ def test_ci_consistent_with_no_effect_modifier(self): assert ci is not None lower, upper = ci[0] assert lower < upper + + def _make_dataset_with_categorical_common_cause(self, n_levels=3, seed=42): + """Build a simple synthetic dataset with one continuous effect modifier and + one categorical common cause (n_levels levels), for testing categorical encoding.""" + import pandas as pd + from dowhy import CausalModel + + rng = np.random.default_rng(seed) + n = 500 + # Categorical common cause W with n_levels levels + w = rng.integers(0, n_levels, size=n).astype(str) + # Continuous effect modifier X + x = rng.standard_normal(n) + # Treatment T (continuous, affected by W) + t = (w == "0").astype(float) + rng.standard_normal(n) * 0.5 + # Outcome Y depends on T, T*X interaction, and W (true ATE ~2) + beta_t = 2.0 + y = beta_t * t + 0.5 * t * x + 0.3 * (w == "1").astype(float) + rng.standard_normal(n) * 0.2 + + df = pd.DataFrame({"T": t, "Y": y, "W": pd.Categorical(w), "X": x}) + graph_str = "digraph { W -> T; W -> Y; T -> Y; X -> Y }" + model = CausalModel(data=df, treatment="T", outcome="Y", graph=graph_str) + estimand = model.identify_effect(proceed_when_unidentifiable=True) + estimand.set_identifier_method("backdoor") + return df, estimand, beta_t + + def test_ci_no_error_with_categorical_common_cause(self): + """Delta-method CI should work when a common cause is categorical (multi-level).""" + df, estimand, _ = self._make_dataset_with_categorical_common_cause(n_levels=3) + estimator = LinearRegressionEstimator( + identified_estimand=estimand, + confidence_intervals=True, + ) + estimator.fit(df, effect_modifier_names=["X"]) + estimate = estimator.estimate_effect( + df, + treatment_value=1, + control_value=0, + confidence_intervals=True, + ) + ci = estimate.get_confidence_intervals() + assert ci is not None + lower, upper = ci[0] + assert lower < upper, "CI lower bound must be less than upper bound" + + def test_ci_uses_encoded_column_count_not_name_count(self): + """Regression test: interaction_start must use encoded column width, not len(names). + + A 3-level categorical common cause W encodes to 2 columns (drop_first=True). + If we use len(observed_common_causes_names)==1 instead of shape[1]==2, the + interaction_start index is off-by-one and we silently pick the wrong coefficient. + This test verifies the CI is finite and the assertion inside + _ate_and_se_for_treatment does not fire. + """ + df, estimand, true_ate = self._make_dataset_with_categorical_common_cause(n_levels=4, seed=7) + estimator = LinearRegressionEstimator( + identified_estimand=estimand, + confidence_intervals=True, + confidence_level=0.95, + ) + estimator.fit(df, effect_modifier_names=["X"]) + estimate = estimator.estimate_effect( + df, + treatment_value=1, + control_value=0, + confidence_intervals=True, + ) + ci = estimate.get_confidence_intervals() + assert ci is not None + lower, upper = ci[0] + assert np.isfinite(lower) and np.isfinite(upper), "CI bounds must be finite" + assert lower < upper, "CI lower bound must be less than upper bound" + se = estimate.get_standard_error() + assert se is not None + assert np.all(np.isfinite(se)) and np.all(se > 0), "SE must be positive and finite" From a84df101b70d01ae6bf4e48fcd2a028755f49e7a Mon Sep 17 00:00:00 2001 From: Emre Kiciman Date: Sun, 19 Apr 2026 02:12:00 -0700 Subject: [PATCH 3/3] fix formatting Signed-off-by: Emre Kiciman --- tests/causal_estimators/test_linear_regression_estimator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/causal_estimators/test_linear_regression_estimator.py b/tests/causal_estimators/test_linear_regression_estimator.py index b757c7f490..df8acce5ae 100755 --- a/tests/causal_estimators/test_linear_regression_estimator.py +++ b/tests/causal_estimators/test_linear_regression_estimator.py @@ -302,6 +302,7 @@ def _make_dataset_with_categorical_common_cause(self, n_levels=3, seed=42): """Build a simple synthetic dataset with one continuous effect modifier and one categorical common cause (n_levels levels), for testing categorical encoding.""" import pandas as pd + from dowhy import CausalModel rng = np.random.default_rng(seed)