Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 95 additions & 27 deletions src/driver/amdxdna/ve2_debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include "amdxdna_error.h"
#include "amdxdna_drm.h"

extern int enable_debug_queue;

static int ve2_query_ctx_status_array(struct amdxdna_client *client,
struct amdxdna_drm_hwctx_entry *tmp,
pid_t pid, u32 ctx_id)
Expand Down Expand Up @@ -151,6 +153,53 @@ static int ve2_get_array_hwctx(struct amdxdna_client *client,
return ret;
}

static int ve2_dbg_queue_data_rw(struct amdxdna_dev *xdev, struct amdxdna_ctx *hwctx,
u32 col, u32 row, u32 addr, void *data, size_t size,
int cmd_type)
{
struct platform_device *pdev = to_platform_device(xdev->ddev.dev);
dma_addr_t dma_handle;
void *virt_ptr = NULL;
int ret = 0;

if (size % 4 != 0) {
XDNA_ERR(xdev, "Size (%zu) must be a multiple of 4 bytes", size);
return -EINVAL;
}
/*Allocate phy memory and pass it to submit function*/
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing space after the comment start marker. The comment should be formatted as "/* Allocate phy memory..." with a space after the opening /* for consistency with kernel coding style.

Suggested change
/*Allocate phy memory and pass it to submit function*/
/* Allocate phy memory and pass it to submit function */

Copilot uses AI. Check for mistakes.
virt_ptr = dma_alloc_coherent(&pdev->dev, size, &dma_handle, GFP_KERNEL);
if (!virt_ptr) {
XDNA_ERR(xdev, "Failed to allocate DMA buffer");
return -ENOMEM;
}

addr = addr + ((col << VE2_COL_SHIFT) + (row << VE2_ROW_SHIFT));

switch (cmd_type) {
case DBG_CMD_WRITE:
memcpy(virt_ptr, data, size);
ret = submit_command_to_dbg_queue(hwctx, DBG_CMD_WRITE, addr, (u64)dma_handle,
size / 4);
break;
case DBG_CMD_READ:
ret = submit_command_to_dbg_queue(hwctx, DBG_CMD_READ, addr, (u64)dma_handle,
size / 4);
if (ret == 0)
memcpy(data, virt_ptr, size);
break;
case DBG_CMD_EXIT:
ret = submit_command_to_dbg_queue(hwctx, DBG_CMD_EXIT, addr, (u64)dma_handle,
size / 4);
break;
default:
XDNA_ERR(xdev, "CMD_TYPE is not supported");
ret = -EINVAL;
break;
}
dma_free_coherent(&pdev->dev, size, virt_ptr, dma_handle);
return ret;
}

static int ve2_aie_write(struct amdxdna_client *client,
struct amdxdna_drm_set_state *args)
{
Expand All @@ -171,7 +220,7 @@ static int ve2_aie_write(struct amdxdna_client *client,
return -EFAULT;
}

XDNA_DBG(xdna, "Write request for ctx_id: %u, col: %u, row: %u, addr: 0x%x, size: %u\n",
XDNA_DBG(xdna, "Write request for ctx_id: %u, col: %u, row: %u, addr: 0x%x, size: %u",
footer.context_id, footer.col, footer.row, footer.addr, footer.size);

/* Find the hardware context */
Expand All @@ -187,32 +236,32 @@ static int ve2_aie_write(struct amdxdna_client *client,
}

if (!hwctx) {
XDNA_ERR(xdna, "hw context :%u pid:%llu not found\n", footer.context_id,
XDNA_ERR(xdna, "hw context :%u pid:%llu not found", footer.context_id,
footer.pid);
return -EINVAL;
}

XDNA_DBG(xdna, "Found hwctx: cl_pid: %u, hwctx_id: %u, start_col %u, ncol %u\n",
XDNA_DBG(xdna, "Found hwctx: cl_pid: %u, hwctx_id: %u, start_col %u, ncol %u",
hwctx->client->pid, hwctx->id, hwctx->start_col, hwctx->num_col);

/* Validate column is within partition */
if (footer.col >= hwctx->num_col) {
XDNA_ERR(xdna, "Column %u is outside partition range [0, %u)\n",
XDNA_ERR(xdna, "Column %u is outside partition range [0, %u)",
footer.col, hwctx->num_col);
return -EINVAL;
}

/* Validate row */
if (footer.row >= xdna->dev_handle->aie_dev_info.rows) {
XDNA_ERR(xdna, "Row %u is outside range [0, %u)\n",
XDNA_ERR(xdna, "Row %u is outside range [0, %u)",
footer.row, xdna->dev_handle->aie_dev_info.rows);
return -EINVAL;
}

/* Get AIE device handle */
aie_dev = hwctx->priv->aie_dev;
if (!aie_dev) {
XDNA_ERR(xdna, "AIE device handle not found\n");
XDNA_ERR(xdna, "AIE device handle not found");
return -EINVAL;
}

Expand All @@ -223,16 +272,30 @@ static int ve2_aie_write(struct amdxdna_client *client,

/* Copy data from user space (data is at the beginning of buffer) */
if (copy_from_user(local_buf, u64_to_user_ptr(args->buffer), footer.size)) {
XDNA_ERR(xdna, "Error: unable to copy data from userptr\n");
XDNA_ERR(xdna, "Error: unable to copy data from userptr");
kfree(local_buf);
return -EFAULT;
}

/* Write to AIE memory */
ret = ve2_partition_write(aie_dev, footer.col, footer.row, footer.addr,
footer.size, local_buf);
//TODO This is temporary fix to exit the debug queue.
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The TODO comment style is inconsistent with Linux kernel coding style. The comment should be formatted as "/* TODO: This is temporary fix to exit the debug queue. */" with a space after the comment delimiter, colon after TODO, and proper ending.

Suggested change
//TODO This is temporary fix to exit the debug queue.
/* TODO: This is temporary fix to exit the debug queue. */

Copilot uses AI. Check for mistakes.
if (enable_debug_queue) {
if (footer.col == 3) {
ret = ve2_dbg_queue_data_rw(xdna, hwctx, footer.col, footer.row,
footer.addr, local_buf, footer.size,
DBG_CMD_EXIT);
} else {
ret = ve2_dbg_queue_data_rw(xdna, hwctx, footer.col, footer.row,
footer.addr, local_buf, footer.size,
DBG_CMD_WRITE);
}
} else {
ret = ve2_partition_write(aie_dev, footer.col, footer.row, footer.addr,
footer.size, local_buf);
}

if (ret < 0) {
XDNA_ERR(xdna, "Error in AIE memory write operation, err: %d\n", ret);
XDNA_ERR(xdna, "Error in AIE memory write operation, err: %d", ret);
kfree(local_buf);
return ret;
}
Expand Down Expand Up @@ -261,7 +324,7 @@ static int ve2_aie_read(struct amdxdna_client *client, struct amdxdna_drm_get_ar
return -EFAULT;
}

XDNA_DBG(xdna, "Read request for ctx_id: %u, col: %u, row: %u, addr: 0x%x, size: %u\n",
XDNA_DBG(xdna, "Read request for ctx_id: %u, col: %u, row: %u, addr: 0x%x, size: %u",
footer.context_id, footer.col, footer.row, footer.addr, footer.size);

/* Find the hardware context */
Expand All @@ -277,32 +340,32 @@ static int ve2_aie_read(struct amdxdna_client *client, struct amdxdna_drm_get_ar
}

if (!hwctx) {
XDNA_ERR(xdna, "hw context :%u pid:%llu not found\n", footer.context_id,
XDNA_ERR(xdna, "hw context :%u pid:%llu not found", footer.context_id,
footer.pid);
return -EINVAL;
}

XDNA_DBG(xdna, "Found hwctx: cl_pid: %u, hwctx_id: %u, start_col %u, ncol %u\n",
XDNA_DBG(xdna, "Found hwctx: cl_pid: %u, hwctx_id: %u, start_col %u, ncol %u",
hwctx->client->pid, hwctx->id, hwctx->start_col, hwctx->num_col);

/* Validate column is within partition */
if (footer.col >= hwctx->num_col) {
XDNA_ERR(xdna, "Column %u is outside partition range [0, %u)\n",
XDNA_ERR(xdna, "Column %u is outside partition range [0, %u)",
footer.col, hwctx->num_col);
return -EINVAL;
}

/* Validate row */
if (footer.row >= xdna->dev_handle->aie_dev_info.rows) {
XDNA_ERR(xdna, "Row %u is outside range [0, %u)\n",
XDNA_ERR(xdna, "Row %u is outside range [0, %u)",
footer.row, xdna->dev_handle->aie_dev_info.rows);
return -EINVAL;
}

/* Get AIE device handle and relative column */
aie_dev = hwctx->priv->aie_dev;
if (!aie_dev) {
XDNA_ERR(xdna, "AIE device handle not found\n");
XDNA_ERR(xdna, "AIE device handle not found");
return -EINVAL;
}

Expand All @@ -312,17 +375,22 @@ static int ve2_aie_read(struct amdxdna_client *client, struct amdxdna_drm_get_ar
return -ENOMEM;

/* Read from AIE memory */
ret = ve2_partition_read(aie_dev, footer.col, footer.row, footer.addr,
footer.size, local_buf);
if (enable_debug_queue) {
ret = ve2_dbg_queue_data_rw(xdna, hwctx, footer.col, footer.row,
footer.addr, local_buf, footer.size, DBG_CMD_READ);
} else {
ret = ve2_partition_read(aie_dev, footer.col, footer.row, footer.addr,
footer.size, local_buf);
}
if (ret < 0) {
XDNA_ERR(xdna, "Error in AIE memory read operation, err: %d\n", ret);
XDNA_ERR(xdna, "Error in AIE memory read operation, err: %d", ret);
kfree(local_buf);
return ret;
}

/* Copy data to user space */
if (copy_to_user(u64_to_user_ptr(args->buffer), local_buf, footer.size)) {
XDNA_ERR(xdna, "Error: unable to copy memory to userptr\n");
XDNA_ERR(xdna, "Error: unable to copy memory to userptr");
kfree(local_buf);
return -EFAULT;
}
Expand Down Expand Up @@ -366,18 +434,18 @@ static int ve2_coredump_read(struct amdxdna_client *client, struct amdxdna_drm_g
}

if (!hwctx) {
XDNA_ERR(xdna, "hw context :%u pid:%llu not found\n", footer.context_id,
XDNA_ERR(xdna, "hw context :%u pid:%llu not found", footer.context_id,
footer.pid);
return -EINVAL;
}

XDNA_DBG(xdna, "cl_pid: %u, hwctx_id: %u, start_col %u, ncol %u\n",
XDNA_DBG(xdna, "cl_pid: %u, hwctx_id: %u, start_col %u, ncol %u",
hwctx->client->pid, hwctx->id, hwctx->start_col,
hwctx->num_col);

rel_size = hwctx->priv->num_col * xdna->dev_handle->aie_dev_info.rows * TILE_ADDRESS_SPACE;
if (rel_size > buf_size) {
XDNA_DBG(xdna, "Invalid buffer size:%d (rel_size:%d)\n", buf_size, rel_size);
XDNA_DBG(xdna, "Invalid buffer size:%d (rel_size:%d)", buf_size, rel_size);
args->element_size = rel_size;
return -ENOBUFS;
}
Expand All @@ -387,16 +455,16 @@ static int ve2_coredump_read(struct amdxdna_client *client, struct amdxdna_drm_g
return -ENOMEM;

ret = ve2_create_coredump(xdna, hwctx, local_buf, rel_size);
XDNA_DBG(xdna, "created dump of size:%d\n", ret);
XDNA_DBG(xdna, "created dump of size:%d", ret);

if (ret < 0) {
XDNA_ERR(xdna, "Error in AIE Data mem read operation, err: %d\n", ret);
XDNA_ERR(xdna, "Error in AIE Data mem read operation, err: %d", ret);
vfree(local_buf);
return ret;
}

if (copy_to_user(u64_to_user_ptr(args->buffer), local_buf, ret)) {
XDNA_ERR(xdna, "Error: unable to copy memory to userptr\n");
XDNA_ERR(xdna, "Error: unable to copy memory to userptr");
vfree(local_buf);
return -EFAULT;
}
Expand Down Expand Up @@ -684,7 +752,7 @@ static int ve2_get_array_async_error(struct amdxdna_dev *xdna, struct amdxdna_dr
XDNA_DBG(xdna, "Waiting for error callback to complete on mgmtctx[%u]", i);
if (wait_for_completion_timeout(&mgmtctx->error_cb_completion,
wait_timeout) == 0) {
XDNA_WARN(xdna, "Timeout waiting for err callback completion\n");
XDNA_WARN(xdna, "Timeout waiting for err callback completion");
}
}
}
Expand Down
29 changes: 29 additions & 0 deletions src/driver/amdxdna/ve2_host_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,35 @@ struct ve2_hsa_queue {
struct device *alloc_dev;
};

enum dbg_cmd_type {
DBG_CMD_EXIT = 11,
DBG_CMD_READ = 12,
DBG_CMD_WRITE = 13,
};

struct rw_mem {
u32 aie_addr;
u32 length;
u32 host_addr_high;
u32 host_addr_low;
};

struct dbg_queue {
struct host_queue_header hq_header;
struct host_queue_packet hq_entry[HOST_QUEUE_ENTRY];
};

struct ve2_dbg_queue {
struct dbg_queue *dbg_queue_p;
struct ve2_mem dbg_queue_mem;
struct ve2_hq_complete hq_complete;
// hq_lock protects [read | write]_index and reserved_write_index
struct mutex hq_lock;
u64 reserved_write_index;
/* Device used for host queue allocation */
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment for the alloc_dev field has incorrect indentation - it has an extra leading tab compared to the field declaration below it. The comment should align with the field it describes.

Suggested change
/* Device used for host queue allocation */
/* Device used for host queue allocation */

Copilot uses AI. Check for mistakes.
struct device *alloc_dev;
};

/* handshake */
#define ALIVE_MAGIC 0x404C5645
struct handshake {
Expand Down
Loading