diff --git a/modules/dialog/dlg_handlers.c b/modules/dialog/dlg_handlers.c index 6fc7dfe12f..d29915dd9b 100644 --- a/modules/dialog/dlg_handlers.c +++ b/modules/dialog/dlg_handlers.c @@ -1889,7 +1889,7 @@ void dlg_onroute(struct sip_msg* req, str *route_params, void *param) int event; unsigned int update_val; unsigned int dir,dst_leg,src_leg; - int ret = 0,ok = 1; + int ok = 1; struct dlg_entry *d_entry; str *msg_cseq; char *final_cseq; @@ -2130,31 +2130,6 @@ void dlg_onroute(struct sip_msg* req, str *route_params, void *param) after_unlock5: - /* remove from timer */ - ret = remove_dlg_timer(&dlg->tl); - if (ret < 0) { - LM_CRIT("unable to unlink the timer on dlg %p [%u:%u] " - "with clid '%.*s' and tags '%.*s' '%.*s'\n", - dlg, dlg->h_entry, dlg->h_id, - dlg->callid.len, dlg->callid.s, - dlg->legs[DLG_CALLER_LEG].tag.len, - dlg->legs[DLG_CALLER_LEG].tag.s, - dlg->legs[callee_idx(dlg)].tag.len, - ZSW(dlg->legs[callee_idx(dlg)].tag.s)); - } else if (ret > 0) { - LM_DBG("dlg expired (not in timer list) on dlg %p [%u:%u] " - "with clid '%.*s' and tags '%.*s' '%.*s'\n", - dlg, dlg->h_entry, dlg->h_id, - dlg->callid.len, dlg->callid.s, - dlg->legs[DLG_CALLER_LEG].tag.len, - dlg->legs[DLG_CALLER_LEG].tag.s, - dlg->legs[callee_idx(dlg)].tag.len, - ZSW(dlg->legs[callee_idx(dlg)].tag.s)); - } else { - /* dialog successfully removed from timer -> unref */ - unref++; - } - /* dialog terminated (BYE) */ run_dlg_callbacks(DLGCB_TERMINATED, dlg, req, dir, dst_leg, NULL, 0, is_active); @@ -2452,14 +2427,34 @@ void dlg_ontimeout(struct dlg_tl *tl) context_p old_ctx; context_p *new_ctx; struct dlg_cell *dlg; + struct dlg_entry *d_entry; int new_state; int old_state; int unref; int do_expire_actions = 1; + int dlg_state; dlg = get_dlg_tl_payload(tl); + /* d_table is global, initialized in mod_init(); guaranteed valid if + * the timer callback fires. dlg->h_entry is set during dialog + * creation (init_new_dialog) and never changes; get_dlg_tl_payload + * is a container_of macro that derives the dlg pointer from the + * timer link — cannot return NULL since the timer fired on this + * link. Same pattern as next_state_dlg (line ~1193) which does + * the identical d_entry lookup without null/bounds checks. */ + d_entry = &(d_table->entries[dlg->h_entry]); + + /* Read the dialog state under lock to ensure visibility of + * concurrent state changes (GH-3835). On architectures with + * relaxed memory ordering (e.g. ARM64), an unlocked read of + * dlg->state can return a stale value, causing the timer to + * proceed as if the dialog is still CONFIRMED when a BYE + * worker has already transitioned it to DELETED. */ + dlg_lock(d_table, d_entry); + dlg_state = dlg->state; + dlg_unlock(d_table, d_entry); - LM_DBG("byeontimeout ? flags = %d , state = %d\n",dlg->flags,dlg->state); + LM_DBG("byeontimeout ? flags = %d , state = %d\n",dlg->flags,dlg_state); if (dialog_repl_cluster) { /* if dialog replication is used, send BYEs only if the current node @@ -2471,7 +2466,7 @@ void dlg_ontimeout(struct dlg_tl *tl) * the dialog. We this self prolonging only once! */ if (!do_expire_actions && ref_script_route_check_and_update(dlg->rt_on_timeout) - && dlg->stateflags&DLG_FLAG_SELF_EXTENDED_TIMEOUT)) { LM_DBG("self prolonging with 10 mins to see what the active" "decides after the on-timeout route\n"); @@ -2494,7 +2489,7 @@ void dlg_ontimeout(struct dlg_tl *tl) if (do_expire_actions && ref_script_route_check_and_update(dlg->rt_on_timeout) - && dlg->statenext = tl->prev = NULL; @@ -2521,7 +2516,7 @@ void dlg_ontimeout(struct dlg_tl *tl) } if ((dlg->flags&DLG_FLAG_BYEONTIMEOUT) && - (dlg->state==DLG_STATE_CONFIRMED_NA || dlg->state==DLG_STATE_CONFIRMED)) { + (dlg_state==DLG_STATE_CONFIRMED_NA || dlg_state==DLG_STATE_CONFIRMED)) { if (do_expire_actions) { if (dlg->flags & DLG_FLAG_RACE_CONDITION_OCCURRED) diff --git a/modules/dialog/dlg_hash.c b/modules/dialog/dlg_hash.c index 3daffc3cf4..ab74ba4909 100644 --- a/modules/dialog/dlg_hash.c +++ b/modules/dialog/dlg_hash.c @@ -1330,6 +1330,23 @@ void next_state_dlg(struct dlg_cell *dlg, int event, int dir, int *old_state, } *new_state = dlg->state; + /* Remove dialog timer under lock to prevent race with concurrent + * unref (GH-3835): if we transitioned to DELETED, remove the timer + * atomically with the state change so no other worker can see + * state=DELETED and race to unref before timer cleanup */ + if (*old_state != DLG_STATE_DELETED && *new_state == DLG_STATE_DELETED) { + int ret = remove_dlg_timer(&dlg->tl); + if (ret == 0) + (*unref)++; + else if (ret < 0) + LM_CRIT("unable to unlink the timer on dlg %p [%u:%u] " + "with clid '%.*s' and tags '%.*s' '%.*s'\n", + dlg, dlg->h_entry, dlg->h_id, + dlg->callid.len, dlg->callid.s, + dlg_leg_print_info(dlg, DLG_CALLER_LEG, tag), + dlg_leg_print_info(dlg, callee_idx(dlg), tag)); + } + dlg_unlock( d_table, d_entry); if (*old_state != *new_state) diff --git a/modules/dialog/dlg_replication.c b/modules/dialog/dlg_replication.c index 1e2a590188..1a3055af73 100644 --- a/modules/dialog/dlg_replication.c +++ b/modules/dialog/dlg_replication.c @@ -599,7 +599,7 @@ int dlg_replicated_delete(bin_packet_t *packet) str call_id, from_tag, to_tag; unsigned int dir, _ = -1; struct dlg_cell *dlg; - int old_state, new_state, unref, ret; + int old_state, new_state, unref; unsigned int h_id; int h_entry; short pkg_ver = get_bin_pkg_version(packet); @@ -639,30 +639,6 @@ int dlg_replicated_delete(bin_packet_t *packet) return -1; } - ret = remove_dlg_timer(&dlg->tl); - if (ret < 0) { - LM_CRIT("unable to unlink the timer on dlg %p [%u:%u] " - "with clid '%.*s' and tags '%.*s' '%.*s'\n", - dlg, dlg->h_entry, dlg->h_id, - dlg->callid.len, dlg->callid.s, - dlg->legs[DLG_CALLER_LEG].tag.len, - dlg->legs[DLG_CALLER_LEG].tag.s, - dlg->legs[callee_idx(dlg)].tag.len, - ZSW(dlg->legs[callee_idx(dlg)].tag.s)); - } else if (ret > 0) { - LM_DBG("dlg expired (not in timer list) on dlg %p [%u:%u] " - "with clid '%.*s' and tags '%.*s' '%.*s'\n", - dlg, dlg->h_entry, dlg->h_id, - dlg->callid.len, dlg->callid.s, - dlg->legs[DLG_CALLER_LEG].tag.len, - dlg->legs[DLG_CALLER_LEG].tag.s, - dlg->legs[callee_idx(dlg)].tag.len, - ZSW(dlg->legs[callee_idx(dlg)].tag.s)); - } else { - /* dialog successfully removed from timer -> unref */ - unref++; - } - unref_dlg(dlg, 1 + unref); if_update_stat(dlg_enable_stats, active_dlgs, -1); @@ -1200,7 +1176,7 @@ static int receive_sync_request(int node_id) struct dlg_cell *drop_dlg(struct dlg_cell *dlg, int i) { struct dlg_cell *next_dlg; - int ret, unref, old_state, new_state; + int unref, old_state, new_state; /* make sure dialog is not freed while we don't hold the lock */ ref_dlg_unsafe(dlg, 1); @@ -1226,19 +1202,6 @@ struct dlg_cell *drop_dlg(struct dlg_cell *dlg, int i) dlg_lock(d_table, &d_table->entries[i]); - /* remove from timer, even though it may be done already */ - ret = remove_dlg_timer(&dlg->tl); - if (ret < 0) { - LM_ERR("unable to unlink the timer on dlg %p [%u:%u] " - "with clid '%.*s' and tags '%.*s' '%.*s'\n", - dlg, dlg->h_entry, dlg->h_id, - dlg->callid.len, dlg->callid.s, - dlg_leg_print_info(dlg, DLG_CALLER_LEG, tag), - dlg_leg_print_info(dlg, callee_idx(dlg), tag)); - } else if (ret == 0) - /* successfully removed from timer list */ - unref++; - if (dlg_db_mode != DB_MODE_NONE) { if (dlg_db_mode == DB_MODE_DELAYED && !(dlg->flags&DLG_FLAG_DB_DELETED)) diff --git a/modules/dialog/dlg_req_within.c b/modules/dialog/dlg_req_within.c index 51e0cba212..164f68d120 100644 --- a/modules/dialog/dlg_req_within.c +++ b/modules/dialog/dlg_req_within.c @@ -212,7 +212,7 @@ dlg_t * build_dialog_info(struct dlg_cell * cell, int dst_leg, int src_leg, static void dual_bye_event(struct dlg_cell* dlg, struct sip_msg *req, int is_active) { - int event, old_state, new_state, unref, ret; + int event, old_state, new_state, unref; struct sip_msg *fake_msg=NULL; context_p old_ctx; context_p *new_ctx; @@ -233,27 +233,6 @@ static void dual_bye_event(struct dlg_cell* dlg, struct sip_msg *req, destroy_linkers(dlg); remove_dlg_prof_table(dlg,is_active); - /* remove from timer */ - ret = remove_dlg_timer(&dlg->tl); - if (ret < 0) { - LM_CRIT("unable to unlink the timer on dlg %p [%u:%u] " - "with clid '%.*s' and tags '%.*s' '%.*s'\n", - dlg, dlg->h_entry, dlg->h_id, - dlg->callid.len, dlg->callid.s, - dlg_leg_print_info( dlg, DLG_CALLER_LEG, tag), - dlg_leg_print_info( dlg, callee_idx(dlg), tag)); - } else if (ret > 0) { - LM_DBG("dlg already expired (not in timer list) %p [%u:%u] " - "with clid '%.*s' and tags '%.*s' '%.*s'\n", - dlg, dlg->h_entry, dlg->h_id, - dlg->callid.len, dlg->callid.s, - dlg_leg_print_info( dlg, DLG_CALLER_LEG, tag), - dlg_leg_print_info( dlg, callee_idx(dlg), tag)); - } else { - /* successfully removed from timer list */ - unref++; - } - if (req==NULL) { /* set new msg & processing context */ if (push_new_processing_context( dlg, &old_ctx, &new_ctx, &fake_msg)==0) {