From 0589457324671c3aa53993c320f0df24ac5ceb03 Mon Sep 17 00:00:00 2001 From: Bikash Singha Date: Tue, 17 Mar 2026 22:53:06 +0530 Subject: [PATCH] Updated the runlist timeout path to get correct command idx Signed-off-by: Bikash Singha --- src/driver/amdxdna/ve2_host_queue.h | 2 +- src/driver/amdxdna/ve2_hwctx.c | 29 ++++++++++++++++++++++++++--- src/driver/amdxdna/ve2_mgmt.c | 2 +- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/driver/amdxdna/ve2_host_queue.h b/src/driver/amdxdna/ve2_host_queue.h index 157dda4a1..7f53e8709 100644 --- a/src/driver/amdxdna/ve2_host_queue.h +++ b/src/driver/amdxdna/ve2_host_queue.h @@ -296,7 +296,7 @@ struct handshake { trace_save; // 68 This needs to be saved/restored during ctx switch to support preemption u32 doorbell_pending; // 6c this is to solve the race condition. //MPNPU will set it to 1 when it receives doorbell from host. - u32 completion_status; + u32 runlist_read_idx; // 70 relative read index in the runlist u32 reserved1[7]; //make sure vm (below) starts at offset 0xa0 u32 last_ddr_dm2mm_addr_high; // 90 u32 last_ddr_dm2mm_addr_low; // 94 diff --git a/src/driver/amdxdna/ve2_hwctx.c b/src/driver/amdxdna/ve2_hwctx.c index dce90b8e5..724eedecd 100644 --- a/src/driver/amdxdna/ve2_hwctx.c +++ b/src/driver/amdxdna/ve2_hwctx.c @@ -1148,10 +1148,33 @@ int ve2_cmd_wait(struct amdxdna_ctx *hwctx, u64 seq, u32 timeout) if (!cc) { XDNA_WARN(xdna, "cmd_chain timeout: failed to get payload"); } else { - /* In the async callback/timeout case, - * driver sets error index to 0 + u32 fail_cmd_idx = 0; + u32 rl_read_idx = 0; + int rd_ret; + + /* + * CERT tracks progress via runlist_read_idx in + * the handshake (offset 0x70). Read it from the + * lead CERT (col 0) to find which sub-command + * was being processed when the timeout fired. */ - cc->error_index = 0; + rd_ret = ve2_partition_read_privileged_mem( + priv_ctx->aie_dev, 0, + offsetof(struct handshake, runlist_read_idx), + sizeof(rl_read_idx), + &rl_read_idx); + if (rd_ret >= 0) + fail_cmd_idx = rl_read_idx; + + if (fail_cmd_idx >= cmd_count) + fail_cmd_idx = 0; + + cc->error_index = fail_cmd_idx; + XDNA_ERR(xdna, + "Timeout at cmd chain index %u (slot %u), runlist_read_idx %u", + fail_cmd_idx, + (start_slot + fail_cmd_idx) % capacity, + rl_read_idx); } } } else { diff --git a/src/driver/amdxdna/ve2_mgmt.c b/src/driver/amdxdna/ve2_mgmt.c index 59a15da10..b0aa00e1b 100644 --- a/src/driver/amdxdna/ve2_mgmt.c +++ b/src/driver/amdxdna/ve2_mgmt.c @@ -911,7 +911,7 @@ static void ve2_dump_debug_state(struct amdxdna_dev *xdna, XDNA_WARN(xdna, " ctx_switch_req: 0x%x\n", hs->ctx_switch_req); XDNA_WARN(xdna, " cert_idle_status: 0x%x\n", hs->cert_idle_status); XDNA_WARN(xdna, " misc_status: 0x%x\n", hs->misc_status); - XDNA_WARN(xdna, " completion_status: 0x%x\n", hs->completion_status); + XDNA_WARN(xdna, " runlist_read_idx: 0x%x\n", hs->runlist_read_idx); XDNA_WARN(xdna, " doorbell_pending: %u\n", hs->doorbell_pending); /* Dump VM state (firmware execution context) */