Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
a0ec6a8
skip hh in simple simulate
i-am-sijia Nov 13, 2025
8623516
retain `household_id` by default in the drop_unused_columns()
i-am-sijia Nov 20, 2025
397f250
skip choice for interaction sample simulate
i-am-sijia Nov 20, 2025
fa08f11
don't log hh id when bad choice df is empy
i-am-sijia Nov 20, 2025
f26cc80
add hh id to user specified `chooser_columns` when `chooser_columns` …
i-am-sijia Nov 20, 2025
76fd833
blacken
i-am-sijia Nov 20, 2025
96e73ec
update test utils to probs raises
i-am-sijia Nov 20, 2025
cf98cd2
[skip ci] shadow price calc without failed choices
i-am-sijia Dec 10, 2025
baa47fa
[skip ci] special case in trip mode choice
i-am-sijia Dec 10, 2025
6cffd9b
[skip ci] adjust sample_rate
i-am-sijia Dec 10, 2025
0e34b7d
[skip ci] add final logging of num hh skipped
i-am-sijia Dec 10, 2025
5da9715
unit test
i-am-sijia Dec 10, 2025
50034fd
update CI
i-am-sijia Dec 10, 2025
a6ed5cb
track and summarize failed households by component
i-am-sijia Jan 19, 2026
91227b6
`skipped_household_ids` is now a dictionary
i-am-sijia Jan 19, 2026
c986e5b
print more loggings about trips missing modes
i-am-sijia Jan 19, 2026
e6a8c1b
adjust trip expansion factor using hh weight instead of count
i-am-sijia Jan 19, 2026
ee52916
update state tables only when new households are skipped
i-am-sijia Jan 19, 2026
5316890
put a limit on num of households allowed to skip
i-am-sijia Jan 20, 2026
f0a2581
auto debug tracing newly failed choice
i-am-sijia Jan 23, 2026
f04bf14
update test
i-am-sijia Jan 25, 2026
9d3d018
Add documentation to user guide
i-am-sijia Jan 25, 2026
06c6d75
Merge branch 'main' into skip-failed-hh
i-am-sijia Jan 25, 2026
c257e3c
update doc
i-am-sijia Mar 9, 2026
a734c16
increase sample rate decimals
i-am-sijia Mar 9, 2026
d28faa0
handle zero `sample_rate` when calculating weights
i-am-sijia Mar 17, 2026
71cc5fd
black
i-am-sijia Mar 17, 2026
dcb2864
[skip ci] confirm trace files are generated for skipped hh
i-am-sijia Mar 17, 2026
0615cfc
Revert "increase sample rate decimals"
i-am-sijia Mar 17, 2026
93c3ba9
add note for not rounding sample rate
i-am-sijia Mar 17, 2026
96e01f9
write trip matrices even when `sample_rate` is all zero
i-am-sijia Mar 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/core_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ jobs:

- run: uv run pytest test/test_skim_name_conflicts.py
- run: uv run pytest test/random_seed/test_random_seed.py
- run: uv run pytest test/skip_failed_choices/test_skip_failed_choices.py

builtin_regional_models:
needs: foundation
Expand Down
32 changes: 32 additions & 0 deletions activitysim/abm/models/location_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,11 @@ def location_sample(
):
# FIXME - MEMORY HACK - only include columns actually used in spec
chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS
# Drop this when PR #1017 is merged
if ("household_id" not in chooser_columns) and (
"household_id" in persons_merged.columns
):
chooser_columns = chooser_columns + ["household_id"]
choosers = persons_merged[chooser_columns]

# create wrapper with keys for this lookup - in this case there is a home_zone_id in the choosers
Expand Down Expand Up @@ -390,6 +395,11 @@ def location_presample(
# FIXME maybe we should add it for multi-zone (from maz_taz) if missing?
chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS
chooser_columns = [HOME_TAZ if c == HOME_MAZ else c for c in chooser_columns]
# Drop this when PR #1017 is merged
if ("household_id" not in chooser_columns) and (
"household_id" in persons_merged.columns
):
chooser_columns = chooser_columns + ["household_id"]
choosers = persons_merged[chooser_columns]

# create wrapper with keys for this lookup - in this case there is a HOME_TAZ in the choosers
Expand Down Expand Up @@ -620,6 +630,11 @@ def run_location_simulate(

# FIXME - MEMORY HACK - only include columns actually used in spec
chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS
# Drop this when PR #1017 is merged
if ("household_id" not in chooser_columns) and (
"household_id" in persons_merged.columns
):
chooser_columns = chooser_columns + ["household_id"]
choosers = persons_merged[chooser_columns]

alt_dest_col_name = model_settings.ALT_DEST_COL_NAME
Expand Down Expand Up @@ -1072,6 +1087,23 @@ def iterate_location_choice(
else:
choices_df = choices_df_

# drop choices that belong to the failed households: state.skipped_household_ids
# so that their choices are not considered in shadow price calculations
# first append household_id to choices_df
choices_df = choices_df.merge(
persons_merged_df[["household_id"]],
left_index=True,
right_index=True,
how="left",
)
if len(choices_df) > 0:
choices_df = choices_df[
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should check here how many choices are getting dropped, and

  1. emit a warning that N choices have failed, and
  2. check if N exceeds configured failure thresholds (e.g. not more than 1% of all households), in which case an error should be raised.

Copy link
Copy Markdown
Member Author

@i-am-sijia i-am-sijia Jan 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both the warning and the check have been implemented in core.workflow.state.update_table(). I implemented them in the state.py instead of in model components to make the code base clean, i.e., less code duplication.

~choices_df["household_id"].isin(
state.get("skipped_household_ids", set())
)
]
choices_df = choices_df.drop(columns=["household_id"])

spc.set_choices(
choices=choices_df["choice"],
segment_ids=persons_merged_df[chooser_segment_column].reindex(
Expand Down
5 changes: 5 additions & 0 deletions activitysim/abm/models/school_escorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,11 @@ def school_escorting(
# reduce memory by limiting columns if selected columns are supplied
chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS
if chooser_columns is not None:
# Drop this when PR #1017 is merged
if ("household_id" not in chooser_columns) and (
"household_id" in choosers.columns
):
chooser_columns = chooser_columns + ["household_id"]
chooser_columns = chooser_columns + participant_columns
choosers = choosers[chooser_columns]

Expand Down
21 changes: 21 additions & 0 deletions activitysim/abm/models/trip_matrices.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,12 @@ def write_trip_matrices(
.TAZ.tolist()
)

# print out number of households skipped due to failed choices
if state.settings.skip_failed_choices:
logger.info(
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logging level for this should be higher than "info"; at least warning if not "error" level.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've moved the summary of number of skipped households, total and total by model component, to cli.run.run(), which is a better home than trip_matrices.py.

f"\n!!!\nATTENTION: Skipped households with failed choices during simulation. Number of households skipped: {state.get('num_skipped_households', 0)}.\n!!!"
)


def annotate_trips(
state: workflow.State,
Expand Down Expand Up @@ -393,6 +399,21 @@ def write_matrices(
if not matrix_settings:
logger.error("Missing MATRICES setting in write_trip_matrices.yaml")

hh_weight_col = model_settings.HH_EXPANSION_WEIGHT_COL
if hh_weight_col:
if state.get("num_skipped_households", 0) > 0:
logger.info(
f"Adjusting household expansion weights in {hh_weight_col} to account for {state.get('num_skipped_households', 0)} skipped households."
)
# adjust the hh expansion weights to account for skipped households
adjustment_factor = state.get_dataframe("households").shape[0] / (
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should the adjustment to expansion weights be based not on the number of skipped households, but instead on the original expansion weight total and the total expansion weight of the households we dropped? E.g. if we drop a household with a giant expansion weight that's a bigger thing than dropping a household with a smaller one.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've updated the adjustment to expansion weights to be based on the weight of households instead of counts. I agree with you in theory that it should be based on weights. However, I don't think it will make a difference in application. Here's why. In simulation, 99% of the time ActivitySim has the same sample rate for all households and the same expansion weight (calculated as 1/sample rate), this is especially true when ActivitySim uses an input synthetic population with weight of 1 for each record implicitly built in. In contrast to simulation, in estimation, the input households are very likely to have different weights, however, we don't use weights when estimating a model, and trip_matrices step is usually skipped in the estimation mode.

state.get_dataframe("households").shape[0]
+ state.get("num_skipped_households", 0)
)
aggregate_trips[hh_weight_col] = (
aggregate_trips[hh_weight_col] * adjustment_factor
)

for matrix in matrix_settings:
matrix_is_tap = matrix.is_tap

Expand Down
14 changes: 13 additions & 1 deletion activitysim/abm/models/trip_mode_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,14 @@ def trip_mode_choice(
"trip_mode_choice choices", trips_df[mode_column_name], value_counts=True
)

assert not trips_df[mode_column_name].isnull().any()
# if we're skipping failed choices, the trip modes for failed simulations will be null
if state.settings.skip_failed_choices:
mask_skipped = trips_df["household_id"].isin(
state.get("skipped_household_ids", set())
)
assert not trips_df.loc[~mask_skipped, mode_column_name].isnull().any()
else:
assert not trips_df[mode_column_name].isnull().any()

state.add_table("trips", trips_df)

Expand All @@ -382,6 +389,11 @@ def trip_mode_choice(
# need to update locals_dict to access skims that are the same .shape as trips table
locals_dict = {}
locals_dict.update(constants)
if state.settings.skip_failed_choices:
mask_skipped = trips_merged["household_id"].isin(
state.get("skipped_household_ids", set())
)
trips_merged = trips_merged.loc[~mask_skipped]
simulate.set_skim_wrapper_targets(trips_merged, skims)
locals_dict.update(skims)
locals_dict["timeframe"] = "trip"
Expand Down
4 changes: 4 additions & 0 deletions activitysim/abm/models/util/school_escort_tours_trips.py
Original file line number Diff line number Diff line change
Expand Up @@ -1043,6 +1043,10 @@ def force_escortee_trip_modes_to_match_chauffeur(state: workflow.State, trips):
f"Changed {diff.sum()} trip modes of school escortees to match their chauffeur"
)

# trip_mode can be na if the run allows skipping failed choices and the trip mode choice has failed
if state.settings.skip_failed_choices:
return trips
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than just being OK here, code should:

  1. emit a warning that N choices have failed, and
  2. check if N exceeds configured failure thresholds (e.g. not more than 1% of all choices), in which case an error should be raised.

Copy link
Copy Markdown
Member Author

@i-am-sijia i-am-sijia Jan 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the warning for item 1. For item 2, I implemented the check in core.workflow.state.update_table(), immediately after the households are being dropped to make sure it raises an error when N (or the weighted N) exceeds the threshold. Implementing the check in the state instead of in model components makes the code base clean, i.e., less code duplication. The global parameter of the threshold is added in core.configuration.top.fraction_of_failed_choices_allowed.


assert (
~trips.trip_mode.isna()
).all(), f"Missing trip mode for {trips[trips.trip_mode.isna()]}"
Expand Down
10 changes: 10 additions & 0 deletions activitysim/abm/models/util/tour_destination.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,11 @@ def run_destination_sample(
# if special person id is passed
chooser_id_column = model_settings.CHOOSER_ID_COLUMN

# Drop this when PR #1017 is merged
if ("household_id" not in chooser_columns) and (
"household_id" in persons_merged.columns
):
chooser_columns = chooser_columns + ["household_id"]
persons_merged = persons_merged[
[c for c in persons_merged.columns if c in chooser_columns]
]
Expand Down Expand Up @@ -799,6 +804,11 @@ def run_destination_simulate(
# if special person id is passed
chooser_id_column = model_settings.CHOOSER_ID_COLUMN

# Drop this when PR #1017 is merged
if ("household_id" not in chooser_columns) and (
"household_id" in persons_merged.columns
):
chooser_columns = chooser_columns + ["household_id"]
persons_merged = persons_merged[
[c for c in persons_merged.columns if c in chooser_columns]
]
Expand Down
6 changes: 6 additions & 0 deletions activitysim/abm/models/util/tour_od.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,9 @@ def run_od_sample(
choosers = tours
# FIXME - MEMORY HACK - only include columns actually used in spec
chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS
# Drop this when PR #1017 is merged
if ("household_id" not in chooser_columns) and ("household_id" in choosers.columns):
chooser_columns = chooser_columns + ["household_id"]
choosers = choosers[chooser_columns]

# interaction_sample requires that choosers.index.is_monotonic_increasing
Expand Down Expand Up @@ -951,6 +954,9 @@ def run_od_simulate(

# FIXME - MEMORY HACK - only include columns actually used in spec
chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS
# Drop this when PR #1017 is merged
if ("household_id" not in chooser_columns) and ("household_id" in choosers.columns):
chooser_columns = chooser_columns + ["household_id"]
choosers = choosers[chooser_columns]

# interaction_sample requires that choosers.index.is_monotonic_increasing
Expand Down
6 changes: 6 additions & 0 deletions activitysim/abm/models/util/tour_scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ def run_tour_scheduling(
c for c in model_columns if c not in logsum_columns
]

# Drop this when PR #1017 is merged
if ("household_id" not in chooser_columns) and (
"household_id" in persons_merged.columns
):
chooser_columns = chooser_columns + ["household_id"]

persons_merged = expressions.filter_chooser_columns(persons_merged, chooser_columns)

timetable = state.get_injectable("timetable")
Expand Down
7 changes: 7 additions & 0 deletions activitysim/core/configuration/top.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,13 @@ def _check_store_skims_in_shm(self):
should catch many common errors early, including missing required configurations or specified coefficient labels without defined values.
"""

skip_failed_choices: bool = True
"""
Skip households that cause errors during processing instead of failing the model run.

.. versionadded:: 1.6
"""

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need additional setting[s] to set thresholds for how many skips are OK and when it's too many and should be an error.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added fraction_of_failed_choices_allowed as a global parameter and implemented the checking in core.workflow.state.update_table()

other_settings: dict[str, Any] = None

def _get_attr(self, attr):
Expand Down
5 changes: 5 additions & 0 deletions activitysim/core/interaction_sample_simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,11 @@ def _interaction_sample_simulate(
# that is, we want the index value of the row that is offset by <position> rows into the
# tranche of this choosers alternatives created by cross join of alternatives and choosers

# when skip failed choices is enabled, the position may be -99 for failed choices, which gets droppped eventually
# here we just need to clip to zero to avoid getting the wrong index in the take() below
if state.settings.skip_failed_choices:
positions = positions.clip(lower=0)

# resulting pandas Int64Index has one element per chooser row and is in same order as choosers
choices = alternatives[choice_column].take(positions + first_row_offsets)

Expand Down
49 changes: 49 additions & 0 deletions activitysim/core/logit.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def report_bad_choices(
state: workflow.State,
bad_row_map,
df,
skip_failed_choices,
trace_label,
msg,
trace_choosers=None,
Expand Down Expand Up @@ -87,6 +88,27 @@ def report_bad_choices(

logger.warning(row_msg)

if skip_failed_choices:
# update counter in state
num_skipped_households = state.get("num_skipped_households", 0)
skipped_household_ids = state.get("skipped_household_ids", set())
for hh_id in df[trace_col].unique():
if hh_id is None:
continue
if hh_id not in skipped_household_ids:
skipped_household_ids.add(hh_id)
num_skipped_households += 1
else:
continue
state.set("num_skipped_households", num_skipped_households)
state.set("skipped_household_ids", skipped_household_ids)

logger.debug(
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logger level here should be warn not debug

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated to warning for all loggings of skipping households.

f"Skipping {bad_row_map.sum()} bad choices. Total skipped households so far: {num_skipped_households}. Skipped household IDs: {skipped_household_ids}"
)

return

if raise_error:
raise InvalidTravelError(msg_with_count)

Expand Down Expand Up @@ -136,6 +158,7 @@ def utils_to_probs(
allow_zero_probs=False,
trace_choosers=None,
overflow_protection: bool = True,
skip_failed_choices: bool = True,
return_logsums: bool = False,
):
"""
Expand Down Expand Up @@ -176,6 +199,16 @@ def utils_to_probs(
overflow_protection will have no benefit but impose a modest computational
overhead cost.

skip_failed_choices : bool, default True
If True, when bad choices are detected (all zero probabilities or infinite
probabilities), the entire household that's causing bad choices will be skipped instead of
being masked by overflow protection or causing an error.
A counter will be incremented for each skipped household. This is useful when running large
simulations where occasional bad choices are encountered and should not halt the process.
The counter can be accessed via `state.get("num_skipped_households", 0)`.
The number of skipped households and their IDs will be logged at the end of the simulation.
When `skip_failed_choices` is True, `overflow_protection` will be reverted to False to avoid conflicts.

Returns
-------
probs : pandas.DataFrame
Expand Down Expand Up @@ -203,6 +236,13 @@ def utils_to_probs(
utils_arr.dtype == np.float32 and utils_arr.max() > 85
)

if state.settings.skip_failed_choices is not None:
skip_failed_choices = state.settings.skip_failed_choices
# when skipping failed choices, we cannot use overflow protection
# because it would mask the underlying issue causing bad choices
if skip_failed_choices:
overflow_protection = False

if overflow_protection:
# exponentiated utils will overflow, downshift them
shifts = utils_arr.max(1, keepdims=True)
Expand Down Expand Up @@ -240,6 +280,7 @@ def utils_to_probs(
state,
zero_probs,
utils,
skip_failed_choices,
trace_label=tracing.extend_trace_label(trace_label, "zero_prob_utils"),
msg="all probabilities are zero",
trace_choosers=trace_choosers,
Expand All @@ -251,6 +292,7 @@ def utils_to_probs(
state,
inf_utils,
utils,
skip_failed_choices,
trace_label=tracing.extend_trace_label(trace_label, "inf_exp_utils"),
msg="infinite exponentiated utilities",
trace_choosers=trace_choosers,
Expand Down Expand Up @@ -281,6 +323,7 @@ def make_choices(
trace_label: str = None,
trace_choosers=None,
allow_bad_probs=False,
skip_failed_choices=True,
) -> tuple[pd.Series, pd.Series]:
"""
Make choices for each chooser from among a set of alternatives.
Expand Down Expand Up @@ -316,11 +359,15 @@ def make_choices(
np.ones(len(probs.index))
).abs() > BAD_PROB_THRESHOLD * np.ones(len(probs.index))

if state.settings.skip_failed_choices is not None:
skip_failed_choices = state.settings.skip_failed_choices

if bad_probs.any() and not allow_bad_probs:
report_bad_choices(
state,
bad_probs,
probs,
skip_failed_choices,
trace_label=tracing.extend_trace_label(trace_label, "bad_probs"),
msg="probabilities do not add up to 1",
trace_choosers=trace_choosers,
Expand All @@ -329,6 +376,8 @@ def make_choices(
rands = state.get_rn_generator().random_for_df(probs)

choices = pd.Series(choice_maker(probs.values, rands), index=probs.index)
# mark bad choices with -99
choices[bad_probs] = -99

rands = pd.Series(np.asanyarray(rands).flatten(), index=probs.index)

Expand Down
8 changes: 7 additions & 1 deletion activitysim/core/simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1325,7 +1325,9 @@ def eval_mnl(
if custom_chooser:
choices, rands = custom_chooser(state, probs, choosers, spec, trace_label)
else:
choices, rands = logit.make_choices(state, probs, trace_label=trace_label)
choices, rands = logit.make_choices(
state, probs, trace_label=trace_label, trace_choosers=choosers
)

del probs
chunk_sizer.log_df(trace_label, "probs", None)
Expand Down Expand Up @@ -1485,11 +1487,15 @@ def eval_nl(
BAD_PROB_THRESHOLD = 0.001
no_choices = (base_probabilities.sum(axis=1) - 1).abs() > BAD_PROB_THRESHOLD

if state.settings.skip_failed_choices is not None:
skip_failed_choices = state.settings.skip_failed_choices

if no_choices.any():
logit.report_bad_choices(
state,
no_choices,
base_probabilities,
skip_failed_choices,
trace_label=tracing.extend_trace_label(trace_label, "bad_probs"),
trace_choosers=choosers,
msg="base_probabilities do not sum to one",
Expand Down
1 change: 1 addition & 0 deletions activitysim/core/test/test_logit.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def test_utils_to_probs(utilities, test_data):

def test_utils_to_probs_raises():
state = workflow.State().default_settings()
state.settings.skip_failed_choices = False
idx = pd.Index(name="household_id", data=[1])
with pytest.raises(RuntimeError) as excinfo:
logit.utils_to_probs(
Expand Down
Loading