Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 25 additions & 30 deletions modules/dialog/dlg_handlers.c
Original file line number Diff line number Diff line change
Expand Up @@ -1889,7 +1889,7 @@ void dlg_onroute(struct sip_msg* req, str *route_params, void *param)
int event;
unsigned int update_val;
unsigned int dir,dst_leg,src_leg;
int ret = 0,ok = 1;
int ok = 1;
struct dlg_entry *d_entry;
str *msg_cseq;
char *final_cseq;
Expand Down Expand Up @@ -2130,31 +2130,6 @@ void dlg_onroute(struct sip_msg* req, str *route_params, void *param)

after_unlock5:

/* remove from timer */
ret = remove_dlg_timer(&dlg->tl);
if (ret < 0) {
LM_CRIT("unable to unlink the timer on dlg %p [%u:%u] "
"with clid '%.*s' and tags '%.*s' '%.*s'\n",
dlg, dlg->h_entry, dlg->h_id,
dlg->callid.len, dlg->callid.s,
dlg->legs[DLG_CALLER_LEG].tag.len,
dlg->legs[DLG_CALLER_LEG].tag.s,
dlg->legs[callee_idx(dlg)].tag.len,
ZSW(dlg->legs[callee_idx(dlg)].tag.s));
} else if (ret > 0) {
LM_DBG("dlg expired (not in timer list) on dlg %p [%u:%u] "
"with clid '%.*s' and tags '%.*s' '%.*s'\n",
dlg, dlg->h_entry, dlg->h_id,
dlg->callid.len, dlg->callid.s,
dlg->legs[DLG_CALLER_LEG].tag.len,
dlg->legs[DLG_CALLER_LEG].tag.s,
dlg->legs[callee_idx(dlg)].tag.len,
ZSW(dlg->legs[callee_idx(dlg)].tag.s));
} else {
/* dialog successfully removed from timer -> unref */
unref++;
}

/* dialog terminated (BYE) */
run_dlg_callbacks(DLGCB_TERMINATED, dlg, req, dir, dst_leg, NULL, 0, is_active);

Expand Down Expand Up @@ -2452,14 +2427,34 @@ void dlg_ontimeout(struct dlg_tl *tl)
context_p old_ctx;
context_p *new_ctx;
struct dlg_cell *dlg;
struct dlg_entry *d_entry;
int new_state;
int old_state;
int unref;
int do_expire_actions = 1;
int dlg_state;

dlg = get_dlg_tl_payload(tl);
/* d_table is global, initialized in mod_init(); guaranteed valid if
* the timer callback fires. dlg->h_entry is set during dialog
* creation (init_new_dialog) and never changes; get_dlg_tl_payload
* is a container_of macro that derives the dlg pointer from the
* timer link — cannot return NULL since the timer fired on this
* link. Same pattern as next_state_dlg (line ~1193) which does
* the identical d_entry lookup without null/bounds checks. */
d_entry = &(d_table->entries[dlg->h_entry]);

/* Read the dialog state under lock to ensure visibility of
* concurrent state changes (GH-3835). On architectures with
* relaxed memory ordering (e.g. ARM64), an unlocked read of
* dlg->state can return a stale value, causing the timer to
* proceed as if the dialog is still CONFIRMED when a BYE
* worker has already transitioned it to DELETED. */
dlg_lock(d_table, d_entry);
dlg_state = dlg->state;
dlg_unlock(d_table, d_entry);

LM_DBG("byeontimeout ? flags = %d , state = %d\n",dlg->flags,dlg->state);
LM_DBG("byeontimeout ? flags = %d , state = %d\n",dlg->flags,dlg_state);

if (dialog_repl_cluster) {
/* if dialog replication is used, send BYEs only if the current node
Expand All @@ -2471,7 +2466,7 @@ void dlg_ontimeout(struct dlg_tl *tl)
* the dialog. We this self prolonging only once! */
if (!do_expire_actions
&& ref_script_route_check_and_update(dlg->rt_on_timeout)
&& dlg->state<DLG_STATE_DELETED
&& dlg_state<DLG_STATE_DELETED
&& !(dlg->flags&DLG_FLAG_SELF_EXTENDED_TIMEOUT)) {
LM_DBG("self prolonging with 10 mins to see what the active"
"decides after the on-timeout route\n");
Expand All @@ -2494,7 +2489,7 @@ void dlg_ontimeout(struct dlg_tl *tl)

if (do_expire_actions
&& ref_script_route_check_and_update(dlg->rt_on_timeout)
&& dlg->state<DLG_STATE_DELETED) {
&& dlg_state<DLG_STATE_DELETED) {
struct dlg_tl bk_tl = *tl;
/* allow the dialog to be re-inserted in the timer list */
tl->next = tl->prev = NULL;
Expand All @@ -2521,7 +2516,7 @@ void dlg_ontimeout(struct dlg_tl *tl)
}

if ((dlg->flags&DLG_FLAG_BYEONTIMEOUT) &&
(dlg->state==DLG_STATE_CONFIRMED_NA || dlg->state==DLG_STATE_CONFIRMED)) {
(dlg_state==DLG_STATE_CONFIRMED_NA || dlg_state==DLG_STATE_CONFIRMED)) {

if (do_expire_actions) {
if (dlg->flags & DLG_FLAG_RACE_CONDITION_OCCURRED)
Expand Down
17 changes: 17 additions & 0 deletions modules/dialog/dlg_hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -1330,6 +1330,23 @@ void next_state_dlg(struct dlg_cell *dlg, int event, int dir, int *old_state,
}
*new_state = dlg->state;

/* Remove dialog timer under lock to prevent race with concurrent
* unref (GH-3835): if we transitioned to DELETED, remove the timer
* atomically with the state change so no other worker can see
* state=DELETED and race to unref before timer cleanup */
if (*old_state != DLG_STATE_DELETED && *new_state == DLG_STATE_DELETED) {
int ret = remove_dlg_timer(&dlg->tl);
if (ret == 0)
(*unref)++;
else if (ret < 0)
LM_CRIT("unable to unlink the timer on dlg %p [%u:%u] "
"with clid '%.*s' and tags '%.*s' '%.*s'\n",
dlg, dlg->h_entry, dlg->h_id,
dlg->callid.len, dlg->callid.s,
dlg_leg_print_info(dlg, DLG_CALLER_LEG, tag),
dlg_leg_print_info(dlg, callee_idx(dlg), tag));
}

dlg_unlock( d_table, d_entry);

if (*old_state != *new_state)
Expand Down
41 changes: 2 additions & 39 deletions modules/dialog/dlg_replication.c
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ int dlg_replicated_delete(bin_packet_t *packet)
str call_id, from_tag, to_tag;
unsigned int dir, _ = -1;
struct dlg_cell *dlg;
int old_state, new_state, unref, ret;
int old_state, new_state, unref;
unsigned int h_id;
int h_entry;
short pkg_ver = get_bin_pkg_version(packet);
Expand Down Expand Up @@ -639,30 +639,6 @@ int dlg_replicated_delete(bin_packet_t *packet)
return -1;
}

ret = remove_dlg_timer(&dlg->tl);
if (ret < 0) {
LM_CRIT("unable to unlink the timer on dlg %p [%u:%u] "
"with clid '%.*s' and tags '%.*s' '%.*s'\n",
dlg, dlg->h_entry, dlg->h_id,
dlg->callid.len, dlg->callid.s,
dlg->legs[DLG_CALLER_LEG].tag.len,
dlg->legs[DLG_CALLER_LEG].tag.s,
dlg->legs[callee_idx(dlg)].tag.len,
ZSW(dlg->legs[callee_idx(dlg)].tag.s));
} else if (ret > 0) {
LM_DBG("dlg expired (not in timer list) on dlg %p [%u:%u] "
"with clid '%.*s' and tags '%.*s' '%.*s'\n",
dlg, dlg->h_entry, dlg->h_id,
dlg->callid.len, dlg->callid.s,
dlg->legs[DLG_CALLER_LEG].tag.len,
dlg->legs[DLG_CALLER_LEG].tag.s,
dlg->legs[callee_idx(dlg)].tag.len,
ZSW(dlg->legs[callee_idx(dlg)].tag.s));
} else {
/* dialog successfully removed from timer -> unref */
unref++;
}

unref_dlg(dlg, 1 + unref);
if_update_stat(dlg_enable_stats, active_dlgs, -1);

Expand Down Expand Up @@ -1200,7 +1176,7 @@ static int receive_sync_request(int node_id)
struct dlg_cell *drop_dlg(struct dlg_cell *dlg, int i)
{
struct dlg_cell *next_dlg;
int ret, unref, old_state, new_state;
int unref, old_state, new_state;

/* make sure dialog is not freed while we don't hold the lock */
ref_dlg_unsafe(dlg, 1);
Expand All @@ -1226,19 +1202,6 @@ struct dlg_cell *drop_dlg(struct dlg_cell *dlg, int i)

dlg_lock(d_table, &d_table->entries[i]);

/* remove from timer, even though it may be done already */
ret = remove_dlg_timer(&dlg->tl);
if (ret < 0) {
LM_ERR("unable to unlink the timer on dlg %p [%u:%u] "
"with clid '%.*s' and tags '%.*s' '%.*s'\n",
dlg, dlg->h_entry, dlg->h_id,
dlg->callid.len, dlg->callid.s,
dlg_leg_print_info(dlg, DLG_CALLER_LEG, tag),
dlg_leg_print_info(dlg, callee_idx(dlg), tag));
} else if (ret == 0)
/* successfully removed from timer list */
unref++;

if (dlg_db_mode != DB_MODE_NONE) {
if (dlg_db_mode == DB_MODE_DELAYED &&
!(dlg->flags&DLG_FLAG_DB_DELETED))
Expand Down
23 changes: 1 addition & 22 deletions modules/dialog/dlg_req_within.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ dlg_t * build_dialog_info(struct dlg_cell * cell, int dst_leg, int src_leg,
static void dual_bye_event(struct dlg_cell* dlg, struct sip_msg *req,
int is_active)
{
int event, old_state, new_state, unref, ret;
int event, old_state, new_state, unref;
struct sip_msg *fake_msg=NULL;
context_p old_ctx;
context_p *new_ctx;
Expand All @@ -233,27 +233,6 @@ static void dual_bye_event(struct dlg_cell* dlg, struct sip_msg *req,
destroy_linkers(dlg);
remove_dlg_prof_table(dlg,is_active);

/* remove from timer */
ret = remove_dlg_timer(&dlg->tl);
if (ret < 0) {
LM_CRIT("unable to unlink the timer on dlg %p [%u:%u] "
"with clid '%.*s' and tags '%.*s' '%.*s'\n",
dlg, dlg->h_entry, dlg->h_id,
dlg->callid.len, dlg->callid.s,
dlg_leg_print_info( dlg, DLG_CALLER_LEG, tag),
dlg_leg_print_info( dlg, callee_idx(dlg), tag));
} else if (ret > 0) {
LM_DBG("dlg already expired (not in timer list) %p [%u:%u] "
"with clid '%.*s' and tags '%.*s' '%.*s'\n",
dlg, dlg->h_entry, dlg->h_id,
dlg->callid.len, dlg->callid.s,
dlg_leg_print_info( dlg, DLG_CALLER_LEG, tag),
dlg_leg_print_info( dlg, callee_idx(dlg), tag));
} else {
/* successfully removed from timer list */
unref++;
}

if (req==NULL) {
/* set new msg & processing context */
if (push_new_processing_context( dlg, &old_ctx, &new_ctx, &fake_msg)==0) {
Expand Down
Loading