Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 113 additions & 56 deletions dowhy/causal_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,9 @@ def estimate_effect(
):
"""Estimate the identified causal effect.

Currently requires an explicit method name to be specified. Method names follow the convention of identification method followed by the specific estimation method: "[backdoor/iv/frontdoor].estimation_method_name". For a list of supported methods, check out the :doc:`User Guide </user_guide/causal_tasks/estimating_causal_effects/index>`. Here are some examples.
Method names follow the convention of identification method followed by the specific estimation method:
"[backdoor/iv/frontdoor].estimation_method_name". For a list of supported methods, check out the
:doc:`User Guide </user_guide/causal_tasks/estimating_causal_effects/index>`. Here are some examples.

* Propensity Score Matching: "backdoor.propensity_score_matching"
* Propensity Score Stratification: "backdoor.propensity_score_stratification"
Expand All @@ -269,13 +271,25 @@ def estimate_effect(
* Regression Discontinuity: "iv.regression_discontinuity"
* Two Stage Regression: "frontdoor.two_stage_regression"

In addition, you can directly call any of the EconML estimation methods. The convention is "[backdoor/iv].econml.path-to-estimator-class". For example, for the double machine learning estimator ("DML" class) that is located inside "dml" module of EconML, you can use the method name, "backdoor.econml.dml.DML". See :doc:`this demo notebook </example_notebooks/dowhy-conditional-treatment-effects>`.
In addition, you can directly call any of the EconML estimation methods. The convention is
"[backdoor/iv].econml.path-to-estimator-class". For example, for the double machine learning estimator
("DML" class) that is located inside "dml" module of EconML, you can use the method name,
"backdoor.econml.dml.DML". See :doc:`this demo notebook </example_notebooks/dowhy-conditional-treatment-effects>`.

When ``method_name`` is omitted (``None``), a default is selected automatically:

* **Backdoor identified** – propensity score stratification (binary treatment) or linear regression
(continuous treatment).
* **IV identified** – instrumental variable estimator.
* **Frontdoor identified** – two-stage regression.

An ``INFO``-level log message records which method was chosen.

:param identified_estimand: a probability expression
that represents the effect to be estimated. Output of
CausalModel.identify_effect method
:param method_name: name of the estimation method to be used.
:param method_name: name of the estimation method to be used. When ``None``, a default
is chosen automatically based on the identified estimand (see above).
:param control_value: Value of the treatment in the control group, for effect estimation. If treatment is multi-variate, this can be a list.
:param treatment_value: Value of the treatment in the treated group, for effect estimation. If treatment is multi-variate, this can be a list.
:param test_significance: Binary flag on whether to additionally do a statistical signficance test for the estimate.
Expand All @@ -295,62 +309,78 @@ def estimate_effect(
effect_modifiers = self._graph.get_effect_modifiers(self._treatment, self._outcome)

if method_name is None:
# TODO add propensity score as default backdoor method, iv as default iv method, add an informational message to show which method has been selected.
pass
else:
# TODO add dowhy as a prefix to all dowhy estimators
num_components = len(method_name.split("."))
str_arr = method_name.split(".", maxsplit=1)
identifier_name = str_arr[0]
estimator_name = str_arr[1]
# This is done as all dowhy estimators have two parts and external ones have two or more parts
if num_components > 2:
estimator_package = estimator_name.split(".")[0]
if estimator_package == "dowhy": # For updated dowhy methods
estimator_method = estimator_name.split(".", maxsplit=1)[
1
] # discard dowhy from the full package name
causal_estimator_class = causal_estimators.get_class_object(estimator_method + "_estimator")
else:
third_party_estimator_package = estimator_package
causal_estimator_class = causal_estimators.get_class_object(
third_party_estimator_package, estimator_name
)
if method_params is None:
method_params = {}
# Define the third-party estimation method to be used
method_params[third_party_estimator_package + "_estimator"] = estimator_name
else: # For older dowhy methods
self.logger.info(estimator_name)
# Process the dowhy estimators
causal_estimator_class = causal_estimators.get_class_object(estimator_name + "_estimator")

if method_params is not None and (num_components <= 2 or estimator_package == "dowhy"):
extra_args = method_params.get("init_params", {})
else:
extra_args = {}
if method_params is None:
method_params = {}

identified_estimand.set_identifier_method(identifier_name)

# If not fit_estimator, attempt to retrieve existing estimator.
# Keep original behaviour to create new estimator if none found.
causal_estimator = None
if not fit_estimator:
causal_estimator = self.get_estimator(method_name)

if causal_estimator is None:
causal_estimator = causal_estimator_class(
if identified_estimand.no_directed_path:
self.logger.warning(
"No directed path from %s to %s. Causal effect is zero.", self._treatment, self._outcome
)
return CausalEstimate(
None,
None,
None,
0,
identified_estimand,
test_significance=test_significance,
evaluate_effect_strength=evaluate_effect_strength,
confidence_intervals=confidence_intervals,
**method_params,
**extra_args,
None,
control_value=control_value,
treatment_value=treatment_value,
)
Comment on lines 311 to 325
Copy link

Copilot AI Apr 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new early-return path for method_name is None + identified_estimand.no_directed_path is behaviorally important (it avoids the previous UnboundLocalError), but it isn’t covered by the added tests. Adding a focused test that constructs a graph with no directed path and asserts estimate.value == 0 (and no exception) would prevent regressions in this branch.

Copilot uses AI. Check for mistakes.
method_name = self._select_default_method_name(identified_estimand)
self.logger.info(
"No method_name provided. Automatically using '%s'. "
"Pass method_name explicitly to suppress this message.",
method_name,
)

self._estimator_cache[method_name] = causal_estimator
# TODO add dowhy as a prefix to all dowhy estimators
num_components = len(method_name.split("."))
str_arr = method_name.split(".", maxsplit=1)
identifier_name = str_arr[0]
estimator_name = str_arr[1]
# This is done as all dowhy estimators have two parts and external ones have two or more parts
if num_components > 2:
estimator_package = estimator_name.split(".")[0]
if estimator_package == "dowhy": # For updated dowhy methods
estimator_method = estimator_name.split(".", maxsplit=1)[1] # discard dowhy from the full package name
causal_estimator_class = causal_estimators.get_class_object(estimator_method + "_estimator")
else:
third_party_estimator_package = estimator_package
causal_estimator_class = causal_estimators.get_class_object(
third_party_estimator_package, estimator_name
)
if method_params is None:
method_params = {}
# Define the third-party estimation method to be used
method_params[third_party_estimator_package + "_estimator"] = estimator_name
else: # For older dowhy methods
self.logger.info(estimator_name)
# Process the dowhy estimators
causal_estimator_class = causal_estimators.get_class_object(estimator_name + "_estimator")

if method_params is not None and (num_components <= 2 or estimator_package == "dowhy"):
extra_args = method_params.get("init_params", {})
else:
extra_args = {}
if method_params is None:
method_params = {}

identified_estimand.set_identifier_method(identifier_name)

# If not fit_estimator, attempt to retrieve existing estimator.
# Keep original behaviour to create new estimator if none found.
causal_estimator = None
if not fit_estimator:
causal_estimator = self.get_estimator(method_name)

if causal_estimator is None:
causal_estimator = causal_estimator_class(
identified_estimand,
test_significance=test_significance,
evaluate_effect_strength=evaluate_effect_strength,
confidence_intervals=confidence_intervals,
**method_params,
**extra_args,
)

self._estimator_cache[method_name] = causal_estimator

return estimate_effect(
self._data,
Expand All @@ -366,6 +396,33 @@ def estimate_effect(
method_params,
)

def _select_default_method_name(self, identified_estimand):
"""Choose a default estimation method based on the identified estimand.

Priority: backdoor > iv > frontdoor. Within backdoor, propensity-score
stratification is used for binary treatments (≀2 unique values) and linear
regression for continuous treatments.

:param identified_estimand: Output of :meth:`identify_effect`.
:raises ValueError: if no valid estimand is available for auto-selection.
:returns: A method-name string suitable for :meth:`estimate_effect`.
"""
estimands = identified_estimand.estimands or {}
if estimands.get("backdoor") is not None:
treatment_col = self._treatment[0]
if self._data[treatment_col].nunique() <= 2:
return "backdoor.propensity_score_stratification"
return "backdoor.linear_regression"
Comment on lines +412 to +415
Copy link

Copilot AI Apr 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_select_default_method_name infers binary/continuous treatment using only self._treatment[0]. For multivariate treatments (supported by parse_state and dataset generators), this can misclassify the treatment type and auto-select an estimator that’s inappropriate or fails later. Consider either (a) requiring method_name explicitly when len(self._treatment) > 1, or (b) checking all treatment columns and treating the treatment as binary only if every treatment has ≀2 unique values (and otherwise falling back to a continuous-safe default).

Copilot uses AI. Check for mistakes.
if estimands.get("iv") is not None:
return "iv.instrumental_variable"
if estimands.get("frontdoor") is not None:
return "frontdoor.two_stage_regression"
raise ValueError(
"Could not automatically determine an estimation method: no valid backdoor, "
"instrumental variable, or frontdoor estimand was identified. "
"Please specify method_name explicitly."
)

def do(self, x, identified_estimand, method_name=None, fit_estimator=True, method_params=None):
"""Do operator for estimating values of the outcome after intervening on treatment.

Expand Down
62 changes: 62 additions & 0 deletions tests/test_causal_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,68 @@ def test_causal_estimator_cache(self):
assert (estimates[1].estimator) == model.get_estimator(methods[1])
assert (estimates[0].estimator) != model.get_estimator(methods[1]) # check not same object

def test_estimate_effect_default_method_binary_treatment(self):
"""When method_name=None and treatment is binary, auto-selects propensity_score_stratification."""
data = dowhy.datasets.linear_dataset(
beta=10,
num_common_causes=2,
num_samples=500,
num_treatments=1,
treatment_is_binary=True,
)
model = CausalModel(
data=data["df"],
treatment=data["treatment_name"],
outcome=data["outcome_name"],
graph=data["gml_graph"],
proceed_when_unidentifiable=True,
)
identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
# method_name=None should auto-select without raising an error
estimate = model.estimate_effect(identified_estimand, method_name=None, control_value=0, treatment_value=1)
assert estimate is not None
assert estimate.estimator.__class__.__name__ == "PropensityScoreStratificationEstimator"

def test_estimate_effect_default_method_continuous_treatment(self):
"""When method_name=None and treatment is continuous, auto-selects linear_regression."""
data = dowhy.datasets.linear_dataset(
beta=10,
num_common_causes=2,
num_samples=500,
num_treatments=1,
treatment_is_binary=False,
)
model = CausalModel(
data=data["df"],
treatment=data["treatment_name"],
outcome=data["outcome_name"],
graph=data["gml_graph"],
proceed_when_unidentifiable=True,
)
identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
estimate = model.estimate_effect(identified_estimand, method_name=None, control_value=0, treatment_value=1)
assert estimate is not None
assert estimate.estimator.__class__.__name__ == "LinearRegressionEstimator"

def test_estimate_effect_default_method_no_valid_estimand_raises(self):
"""When method_name=None and no valid estimand exists, raises ValueError."""
import networkx as nx

# Graph with no valid adjustment set and no IV
graph = nx.DiGraph([("T", "Y")])
data = pd.DataFrame({"T": [0, 1, 0, 1, 0], "Y": [1, 2, 1, 3, 1]})
model = CausalModel(data=data, treatment="T", outcome="Y", graph=graph)
# Force an identified estimand where all strategies return None
identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
# If backdoor/iv/frontdoor all happen to be None, we expect ValueError.
# In a simple T->Y graph with no confounders, backdoor IS valid (empty set).
# We test the ValueError path by mocking the estimands.
identified_estimand.estimands["backdoor"] = None
identified_estimand.estimands["iv"] = None
identified_estimand.estimands["frontdoor"] = None
with pytest.raises(ValueError, match="method_name explicitly"):
model.estimate_effect(identified_estimand, method_name=None)


if __name__ == "__main__":
pytest.main([__file__])
Loading