From 43b24e4ae79aa9d33cf05987e2462ef3b3d2bf21 Mon Sep 17 00:00:00 2001 From: kahmed10 <15948690+kahmed10@users.noreply.github.com> Date: Thu, 9 Apr 2026 17:06:01 -0400 Subject: [PATCH 01/14] refactor device name functions --- src/targets/gpu/device_name.cpp | 56 ++++++++++--------- .../gpu/include/migraphx/gpu/device_name.hpp | 14 ++--- 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/src/targets/gpu/device_name.cpp b/src/targets/gpu/device_name.cpp index 5242bda8caa..6ea53af33ed 100644 --- a/src/targets/gpu/device_name.cpp +++ b/src/targets/gpu/device_name.cpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -37,6 +37,12 @@ namespace gpu { MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_SET_GEMM_PROVIDER) +static std::string get_gfx_name(const std::string& device_name) +{ + const auto& name = device_name.empty() ? get_device_name() : device_name; + return trim(split_string(name, ':').front()); +} + int get_device_id() { int device; @@ -55,55 +61,53 @@ std::string get_device_name() return props.gcnArchName; } -bool gfx_has_fp8fnuz_intrinsics() +bool gfx_has_fp8fnuz_intrinsics(const std::string& device_name) { - const auto device_name = trim(split_string(get_device_name(), ':').front()); - return (starts_with(device_name, "gfx94")); + const auto gfx_name = get_gfx_name(device_name); + return (starts_with(gfx_name, "gfx94")); } -bool gfx_has_fp8ocp_intrinsics() +bool gfx_has_fp8ocp_intrinsics(const std::string& device_name) { - const auto device_name = trim(split_string(get_device_name(), ':').front()); - bool is_navi_with_fp8ocp = starts_with(device_name, "gfx12") and device_name >= "gfx1200"; - bool is_mi_with_fp8ocp = starts_with(device_name, "gfx9") and device_name >= "gfx950"; + const auto gfx_name = get_gfx_name(device_name); + bool is_navi_with_fp8ocp = starts_with(gfx_name, "gfx12") and gfx_name >= "gfx1200"; + bool is_mi_with_fp8ocp = starts_with(gfx_name, "gfx9") and gfx_name >= "gfx950"; return (is_navi_with_fp8ocp or is_mi_with_fp8ocp); } -bool gfx_has_bf16_intrinsics() +bool gfx_has_bf16_intrinsics(const std::string& device_name) { - const auto device_name = trim(split_string(get_device_name(), ':').front()); - return not(starts_with(device_name, "gfx1030")); + const auto gfx_name = get_gfx_name(device_name); + return not(starts_with(gfx_name, "gfx1030")); } -bool gfx_has_mx_intrinsics() +bool gfx_has_mx_intrinsics(const std::string& device_name) { - const auto device_name = trim(split_string(get_device_name(), ':').front()); - return starts_with(device_name, "gfx9") and device_name >= "gfx950"; + const auto gfx_name = get_gfx_name(device_name); + return starts_with(gfx_name, "gfx9") and gfx_name >= "gfx950"; } #if MIGRAPHX_USE_HIPBLASLT -// Archs that support hipBLASLt but are defaulted to use rocBLAS. -bool gfx_default_rocblas() +bool gfx_default_rocblas(const std::string& device_name) { - const auto device_name = trim(split_string(get_device_name(), ':').front()); - // Default to rocBLAS for gfx90a. + const auto gfx_name = get_gfx_name(device_name); return ((string_value_of(MIGRAPHX_SET_GEMM_PROVIDER{}) == "hipblaslt") ? false - : (device_name == "gfx90a")); + : (gfx_name == "gfx90a")); } #endif -bool hipblaslt_supported() +bool hipblaslt_supported(const std::string& device_name) { #if !MIGRAPHX_USE_HIPBLASLT + (void)device_name; return false; #else - const auto device_name = trim(split_string(get_device_name(), ':').front()); - // hipblaslt is supported for MI200 and above, and Navi3x and above. - return (device_name == "gfx90a" or - (starts_with(device_name, "gfx94") and device_name >= "gfx942") or - (starts_with(device_name, "gfx95") and device_name >= "gfx950") or - starts_with(device_name, "gfx110") or starts_with(device_name, "gfx120")); + const auto gfx_name = get_gfx_name(device_name); + return (gfx_name == "gfx90a" or + (starts_with(gfx_name, "gfx94") and gfx_name >= "gfx942") or + (starts_with(gfx_name, "gfx95") and gfx_name >= "gfx950") or + starts_with(gfx_name, "gfx110") or starts_with(gfx_name, "gfx120")); #endif } diff --git a/src/targets/gpu/include/migraphx/gpu/device_name.hpp b/src/targets/gpu/include/migraphx/gpu/device_name.hpp index b346aa046b3..75a4a7e9b69 100644 --- a/src/targets/gpu/include/migraphx/gpu/device_name.hpp +++ b/src/targets/gpu/include/migraphx/gpu/device_name.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -37,21 +37,21 @@ MIGRAPHX_GPU_EXPORT std::string get_device_name(); MIGRAPHX_GPU_EXPORT int get_device_id(); -MIGRAPHX_GPU_EXPORT bool gfx_has_fp8fnuz_intrinsics(); +MIGRAPHX_GPU_EXPORT bool gfx_has_fp8fnuz_intrinsics(const std::string& device_name = ""); -MIGRAPHX_GPU_EXPORT bool gfx_has_fp8ocp_intrinsics(); +MIGRAPHX_GPU_EXPORT bool gfx_has_fp8ocp_intrinsics(const std::string& device_name = ""); -MIGRAPHX_GPU_EXPORT bool gfx_has_bf16_intrinsics(); +MIGRAPHX_GPU_EXPORT bool gfx_has_bf16_intrinsics(const std::string& device_name = ""); -MIGRAPHX_GPU_EXPORT bool gfx_has_mx_intrinsics(); +MIGRAPHX_GPU_EXPORT bool gfx_has_mx_intrinsics(const std::string& device_name = ""); MIGRAPHX_GPU_EXPORT bool gfx_has_fp8fnuz_support(); #if MIGRAPHX_USE_HIPBLASLT -MIGRAPHX_GPU_EXPORT bool gfx_default_rocblas(); +MIGRAPHX_GPU_EXPORT bool gfx_default_rocblas(const std::string& device_name = ""); #endif -MIGRAPHX_GPU_EXPORT bool hipblaslt_supported(); +MIGRAPHX_GPU_EXPORT bool hipblaslt_supported(const std::string& device_name = ""); } // namespace gpu } // namespace MIGRAPHX_INLINE_NS From aedf850d0e653df70476ffcdf64806d32628e87d Mon Sep 17 00:00:00 2001 From: kahmed10 <15948690+kahmed10@users.noreply.github.com> Date: Thu, 9 Apr 2026 18:36:25 -0400 Subject: [PATCH 02/14] change to overload --- src/targets/gpu/device_name.cpp | 15 +++++++++++++-- .../gpu/include/migraphx/gpu/device_name.hpp | 18 ++++++++++++------ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/targets/gpu/device_name.cpp b/src/targets/gpu/device_name.cpp index 6ea53af33ed..8b426fc7402 100644 --- a/src/targets/gpu/device_name.cpp +++ b/src/targets/gpu/device_name.cpp @@ -39,8 +39,7 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_SET_GEMM_PROVIDER) static std::string get_gfx_name(const std::string& device_name) { - const auto& name = device_name.empty() ? get_device_name() : device_name; - return trim(split_string(name, ':').front()); + return trim(split_string(device_name, ':').front()); } int get_device_id() @@ -67,6 +66,8 @@ bool gfx_has_fp8fnuz_intrinsics(const std::string& device_name) return (starts_with(gfx_name, "gfx94")); } +bool gfx_has_fp8fnuz_intrinsics() { return gfx_has_fp8fnuz_intrinsics(get_device_name()); } + bool gfx_has_fp8ocp_intrinsics(const std::string& device_name) { const auto gfx_name = get_gfx_name(device_name); @@ -75,18 +76,24 @@ bool gfx_has_fp8ocp_intrinsics(const std::string& device_name) return (is_navi_with_fp8ocp or is_mi_with_fp8ocp); } +bool gfx_has_fp8ocp_intrinsics() { return gfx_has_fp8ocp_intrinsics(get_device_name()); } + bool gfx_has_bf16_intrinsics(const std::string& device_name) { const auto gfx_name = get_gfx_name(device_name); return not(starts_with(gfx_name, "gfx1030")); } +bool gfx_has_bf16_intrinsics() { return gfx_has_bf16_intrinsics(get_device_name()); } + bool gfx_has_mx_intrinsics(const std::string& device_name) { const auto gfx_name = get_gfx_name(device_name); return starts_with(gfx_name, "gfx9") and gfx_name >= "gfx950"; } +bool gfx_has_mx_intrinsics() { return gfx_has_mx_intrinsics(get_device_name()); } + #if MIGRAPHX_USE_HIPBLASLT bool gfx_default_rocblas(const std::string& device_name) { @@ -95,6 +102,8 @@ bool gfx_default_rocblas(const std::string& device_name) ? false : (gfx_name == "gfx90a")); } + +bool gfx_default_rocblas() { return gfx_default_rocblas(get_device_name()); } #endif bool hipblaslt_supported(const std::string& device_name) @@ -111,6 +120,8 @@ bool hipblaslt_supported(const std::string& device_name) #endif } +bool hipblaslt_supported() { return hipblaslt_supported(get_device_name()); } + } // namespace gpu } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx diff --git a/src/targets/gpu/include/migraphx/gpu/device_name.hpp b/src/targets/gpu/include/migraphx/gpu/device_name.hpp index 75a4a7e9b69..36e07216251 100644 --- a/src/targets/gpu/include/migraphx/gpu/device_name.hpp +++ b/src/targets/gpu/include/migraphx/gpu/device_name.hpp @@ -37,21 +37,27 @@ MIGRAPHX_GPU_EXPORT std::string get_device_name(); MIGRAPHX_GPU_EXPORT int get_device_id(); -MIGRAPHX_GPU_EXPORT bool gfx_has_fp8fnuz_intrinsics(const std::string& device_name = ""); +MIGRAPHX_GPU_EXPORT bool gfx_has_fp8fnuz_intrinsics(); +MIGRAPHX_GPU_EXPORT bool gfx_has_fp8fnuz_intrinsics(const std::string& device_name); -MIGRAPHX_GPU_EXPORT bool gfx_has_fp8ocp_intrinsics(const std::string& device_name = ""); +MIGRAPHX_GPU_EXPORT bool gfx_has_fp8ocp_intrinsics(); +MIGRAPHX_GPU_EXPORT bool gfx_has_fp8ocp_intrinsics(const std::string& device_name); -MIGRAPHX_GPU_EXPORT bool gfx_has_bf16_intrinsics(const std::string& device_name = ""); +MIGRAPHX_GPU_EXPORT bool gfx_has_bf16_intrinsics(); +MIGRAPHX_GPU_EXPORT bool gfx_has_bf16_intrinsics(const std::string& device_name); -MIGRAPHX_GPU_EXPORT bool gfx_has_mx_intrinsics(const std::string& device_name = ""); +MIGRAPHX_GPU_EXPORT bool gfx_has_mx_intrinsics(); +MIGRAPHX_GPU_EXPORT bool gfx_has_mx_intrinsics(const std::string& device_name); MIGRAPHX_GPU_EXPORT bool gfx_has_fp8fnuz_support(); #if MIGRAPHX_USE_HIPBLASLT -MIGRAPHX_GPU_EXPORT bool gfx_default_rocblas(const std::string& device_name = ""); +MIGRAPHX_GPU_EXPORT bool gfx_default_rocblas(); +MIGRAPHX_GPU_EXPORT bool gfx_default_rocblas(const std::string& device_name); #endif -MIGRAPHX_GPU_EXPORT bool hipblaslt_supported(const std::string& device_name = ""); +MIGRAPHX_GPU_EXPORT bool hipblaslt_supported(); +MIGRAPHX_GPU_EXPORT bool hipblaslt_supported(const std::string& device_name); } // namespace gpu } // namespace MIGRAPHX_INLINE_NS From acf9c46c5b61ac26d13d45e20904a218b363dd84 Mon Sep 17 00:00:00 2001 From: kahmed10 <15948690+kahmed10@users.noreply.github.com> Date: Thu, 9 Apr 2026 18:36:40 -0400 Subject: [PATCH 03/14] formatting --- src/targets/gpu/device_name.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/targets/gpu/device_name.cpp b/src/targets/gpu/device_name.cpp index 8b426fc7402..f9daa1f941f 100644 --- a/src/targets/gpu/device_name.cpp +++ b/src/targets/gpu/device_name.cpp @@ -113,8 +113,7 @@ bool hipblaslt_supported(const std::string& device_name) return false; #else const auto gfx_name = get_gfx_name(device_name); - return (gfx_name == "gfx90a" or - (starts_with(gfx_name, "gfx94") and gfx_name >= "gfx942") or + return (gfx_name == "gfx90a" or (starts_with(gfx_name, "gfx94") and gfx_name >= "gfx942") or (starts_with(gfx_name, "gfx95") and gfx_name >= "gfx950") or starts_with(gfx_name, "gfx110") or starts_with(gfx_name, "gfx120")); #endif From 06a966c0b4fad5a73f9bf379cbd5254c98516f98 Mon Sep 17 00:00:00 2001 From: kahmed10 <15948690+kahmed10@users.noreply.github.com> Date: Wed, 15 Apr 2026 09:08:47 -0400 Subject: [PATCH 04/14] use context --- src/targets/gpu/device_name.cpp | 78 +++++++++++++------ .../gpu/include/migraphx/gpu/context.hpp | 2 +- .../gpu/include/migraphx/gpu/device_name.hpp | 16 ++-- 3 files changed, 66 insertions(+), 30 deletions(-) diff --git a/src/targets/gpu/device_name.cpp b/src/targets/gpu/device_name.cpp index f9daa1f941f..5968a4856b3 100644 --- a/src/targets/gpu/device_name.cpp +++ b/src/targets/gpu/device_name.cpp @@ -23,6 +23,7 @@ */ #include #include +#include #include #include #include @@ -37,7 +38,7 @@ namespace gpu { MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_SET_GEMM_PROVIDER) -static std::string get_gfx_name(const std::string& device_name) +std::string get_gfx_name(const std::string& device_name) { return trim(split_string(device_name, ':').front()); } @@ -60,66 +61,97 @@ std::string get_device_name() return props.gcnArchName; } -bool gfx_has_fp8fnuz_intrinsics(const std::string& device_name) +static bool gfx_has_fp8fnuz_intrinsics_impl(const std::string& gfx_name) { - const auto gfx_name = get_gfx_name(device_name); return (starts_with(gfx_name, "gfx94")); } -bool gfx_has_fp8fnuz_intrinsics() { return gfx_has_fp8fnuz_intrinsics(get_device_name()); } +bool gfx_has_fp8fnuz_intrinsics() { return gfx_has_fp8fnuz_intrinsics_impl(get_gfx_name(get_device_name())); } -bool gfx_has_fp8ocp_intrinsics(const std::string& device_name) +bool gfx_has_fp8fnuz_intrinsics(const context& ctx) +{ + return gfx_has_fp8fnuz_intrinsics_impl(get_gfx_name(ctx.get_current_device().get_device_name())); +} + +static bool gfx_has_fp8ocp_intrinsics_impl(const std::string& gfx_name) { - const auto gfx_name = get_gfx_name(device_name); bool is_navi_with_fp8ocp = starts_with(gfx_name, "gfx12") and gfx_name >= "gfx1200"; bool is_mi_with_fp8ocp = starts_with(gfx_name, "gfx9") and gfx_name >= "gfx950"; return (is_navi_with_fp8ocp or is_mi_with_fp8ocp); } -bool gfx_has_fp8ocp_intrinsics() { return gfx_has_fp8ocp_intrinsics(get_device_name()); } +bool gfx_has_fp8ocp_intrinsics() { return gfx_has_fp8ocp_intrinsics_impl(get_gfx_name(get_device_name())); } + +bool gfx_has_fp8ocp_intrinsics(const context& ctx) +{ + return gfx_has_fp8ocp_intrinsics_impl(get_gfx_name(ctx.get_current_device().get_device_name())); +} -bool gfx_has_bf16_intrinsics(const std::string& device_name) +static bool gfx_has_bf16_intrinsics_impl(const std::string& gfx_name) { - const auto gfx_name = get_gfx_name(device_name); return not(starts_with(gfx_name, "gfx1030")); } -bool gfx_has_bf16_intrinsics() { return gfx_has_bf16_intrinsics(get_device_name()); } +bool gfx_has_bf16_intrinsics() { return gfx_has_bf16_intrinsics_impl(get_gfx_name(get_device_name())); } -bool gfx_has_mx_intrinsics(const std::string& device_name) +bool gfx_has_bf16_intrinsics(const context& ctx) +{ + return gfx_has_bf16_intrinsics_impl(get_gfx_name(ctx.get_current_device().get_device_name())); +} + +static bool gfx_has_mx_intrinsics_impl(const std::string& gfx_name) { - const auto gfx_name = get_gfx_name(device_name); return starts_with(gfx_name, "gfx9") and gfx_name >= "gfx950"; } -bool gfx_has_mx_intrinsics() { return gfx_has_mx_intrinsics(get_device_name()); } +bool gfx_has_mx_intrinsics() { return gfx_has_mx_intrinsics_impl(get_gfx_name(get_device_name())); } + +bool gfx_has_mx_intrinsics(const context& ctx) +{ + return gfx_has_mx_intrinsics_impl(get_gfx_name(ctx.get_current_device().get_device_name())); +} #if MIGRAPHX_USE_HIPBLASLT -bool gfx_default_rocblas(const std::string& device_name) +static bool gfx_default_rocblas_impl(const std::string& gfx_name) { - const auto gfx_name = get_gfx_name(device_name); return ((string_value_of(MIGRAPHX_SET_GEMM_PROVIDER{}) == "hipblaslt") ? false : (gfx_name == "gfx90a")); } -bool gfx_default_rocblas() { return gfx_default_rocblas(get_device_name()); } +bool gfx_default_rocblas() { return gfx_default_rocblas_impl(get_gfx_name(get_device_name())); } + +bool gfx_default_rocblas(const context& ctx) +{ + return gfx_default_rocblas_impl(get_gfx_name(ctx.get_current_device().get_device_name())); +} #endif -bool hipblaslt_supported(const std::string& device_name) +static bool hipblaslt_supported_impl(const std::string& gfx_name) { -#if !MIGRAPHX_USE_HIPBLASLT - (void)device_name; - return false; -#else - const auto gfx_name = get_gfx_name(device_name); return (gfx_name == "gfx90a" or (starts_with(gfx_name, "gfx94") and gfx_name >= "gfx942") or (starts_with(gfx_name, "gfx95") and gfx_name >= "gfx950") or starts_with(gfx_name, "gfx110") or starts_with(gfx_name, "gfx120")); +} + +bool hipblaslt_supported() +{ +#if !MIGRAPHX_USE_HIPBLASLT + return false; +#else + return hipblaslt_supported_impl(get_gfx_name(get_device_name())); #endif } -bool hipblaslt_supported() { return hipblaslt_supported(get_device_name()); } +bool hipblaslt_supported(const context& ctx) +{ +#if !MIGRAPHX_USE_HIPBLASLT + (void)ctx; + return false; +#else + return hipblaslt_supported_impl(get_gfx_name(ctx.get_current_device().get_device_name())); +#endif +} } // namespace gpu } // namespace MIGRAPHX_INLINE_NS diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp index e29414d41f3..dd7f0c4a7b4 100644 --- a/src/targets/gpu/include/migraphx/gpu/context.hpp +++ b/src/targets/gpu/include/migraphx/gpu/context.hpp @@ -203,7 +203,7 @@ struct hip_device std::string get_device_name() const { return device_props.gcnArchName; } - std::string get_gfx_name() const { return trim(split_string(get_device_name(), ':').front()); } + std::string get_gfx_name() const { return gpu::get_gfx_name(get_device_name()); } std::size_t get_device_major() const { return device_props.major; } diff --git a/src/targets/gpu/include/migraphx/gpu/device_name.hpp b/src/targets/gpu/include/migraphx/gpu/device_name.hpp index 36e07216251..7d8f9652e05 100644 --- a/src/targets/gpu/include/migraphx/gpu/device_name.hpp +++ b/src/targets/gpu/include/migraphx/gpu/device_name.hpp @@ -33,31 +33,35 @@ namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { namespace gpu { +struct context; + MIGRAPHX_GPU_EXPORT std::string get_device_name(); +MIGRAPHX_GPU_EXPORT std::string get_gfx_name(const std::string& device_name); + MIGRAPHX_GPU_EXPORT int get_device_id(); MIGRAPHX_GPU_EXPORT bool gfx_has_fp8fnuz_intrinsics(); -MIGRAPHX_GPU_EXPORT bool gfx_has_fp8fnuz_intrinsics(const std::string& device_name); +MIGRAPHX_GPU_EXPORT bool gfx_has_fp8fnuz_intrinsics(const context& ctx); MIGRAPHX_GPU_EXPORT bool gfx_has_fp8ocp_intrinsics(); -MIGRAPHX_GPU_EXPORT bool gfx_has_fp8ocp_intrinsics(const std::string& device_name); +MIGRAPHX_GPU_EXPORT bool gfx_has_fp8ocp_intrinsics(const context& ctx); MIGRAPHX_GPU_EXPORT bool gfx_has_bf16_intrinsics(); -MIGRAPHX_GPU_EXPORT bool gfx_has_bf16_intrinsics(const std::string& device_name); +MIGRAPHX_GPU_EXPORT bool gfx_has_bf16_intrinsics(const context& ctx); MIGRAPHX_GPU_EXPORT bool gfx_has_mx_intrinsics(); -MIGRAPHX_GPU_EXPORT bool gfx_has_mx_intrinsics(const std::string& device_name); +MIGRAPHX_GPU_EXPORT bool gfx_has_mx_intrinsics(const context& ctx); MIGRAPHX_GPU_EXPORT bool gfx_has_fp8fnuz_support(); #if MIGRAPHX_USE_HIPBLASLT MIGRAPHX_GPU_EXPORT bool gfx_default_rocblas(); -MIGRAPHX_GPU_EXPORT bool gfx_default_rocblas(const std::string& device_name); +MIGRAPHX_GPU_EXPORT bool gfx_default_rocblas(const context& ctx); #endif MIGRAPHX_GPU_EXPORT bool hipblaslt_supported(); -MIGRAPHX_GPU_EXPORT bool hipblaslt_supported(const std::string& device_name); +MIGRAPHX_GPU_EXPORT bool hipblaslt_supported(const context& ctx); } // namespace gpu } // namespace MIGRAPHX_INLINE_NS From eed1afc563bc752d786f303870648d505b62d02e Mon Sep 17 00:00:00 2001 From: kahmed10 <15948690+kahmed10@users.noreply.github.com> Date: Wed, 15 Apr 2026 09:09:18 -0400 Subject: [PATCH 05/14] formatting --- src/targets/gpu/device_name.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/targets/gpu/device_name.cpp b/src/targets/gpu/device_name.cpp index 5968a4856b3..b56d72f45d4 100644 --- a/src/targets/gpu/device_name.cpp +++ b/src/targets/gpu/device_name.cpp @@ -66,11 +66,15 @@ static bool gfx_has_fp8fnuz_intrinsics_impl(const std::string& gfx_name) return (starts_with(gfx_name, "gfx94")); } -bool gfx_has_fp8fnuz_intrinsics() { return gfx_has_fp8fnuz_intrinsics_impl(get_gfx_name(get_device_name())); } +bool gfx_has_fp8fnuz_intrinsics() +{ + return gfx_has_fp8fnuz_intrinsics_impl(get_gfx_name(get_device_name())); +} bool gfx_has_fp8fnuz_intrinsics(const context& ctx) { - return gfx_has_fp8fnuz_intrinsics_impl(get_gfx_name(ctx.get_current_device().get_device_name())); + return gfx_has_fp8fnuz_intrinsics_impl( + get_gfx_name(ctx.get_current_device().get_device_name())); } static bool gfx_has_fp8ocp_intrinsics_impl(const std::string& gfx_name) @@ -80,7 +84,10 @@ static bool gfx_has_fp8ocp_intrinsics_impl(const std::string& gfx_name) return (is_navi_with_fp8ocp or is_mi_with_fp8ocp); } -bool gfx_has_fp8ocp_intrinsics() { return gfx_has_fp8ocp_intrinsics_impl(get_gfx_name(get_device_name())); } +bool gfx_has_fp8ocp_intrinsics() +{ + return gfx_has_fp8ocp_intrinsics_impl(get_gfx_name(get_device_name())); +} bool gfx_has_fp8ocp_intrinsics(const context& ctx) { @@ -92,7 +99,10 @@ static bool gfx_has_bf16_intrinsics_impl(const std::string& gfx_name) return not(starts_with(gfx_name, "gfx1030")); } -bool gfx_has_bf16_intrinsics() { return gfx_has_bf16_intrinsics_impl(get_gfx_name(get_device_name())); } +bool gfx_has_bf16_intrinsics() +{ + return gfx_has_bf16_intrinsics_impl(get_gfx_name(get_device_name())); +} bool gfx_has_bf16_intrinsics(const context& ctx) { From b95d1bef781e1be603f470b9fc989e0fe2e0a312 Mon Sep 17 00:00:00 2001 From: kahmed10 <15948690+kahmed10@users.noreply.github.com> Date: Wed, 15 Apr 2026 13:20:11 -0400 Subject: [PATCH 06/14] add context and compile changes --- src/include/migraphx/program.hpp | 1 + src/program.cpp | 7 +- .../gpu/eliminate_data_type_for_gpu.cpp | 25 ++++--- .../gpu/include/migraphx/gpu/context.hpp | 65 +++++++++++++++++-- .../gpu/eliminate_data_type_for_gpu.hpp | 1 + src/targets/gpu/target.cpp | 47 +++++++++++--- 6 files changed, 117 insertions(+), 29 deletions(-) diff --git a/src/include/migraphx/program.hpp b/src/include/migraphx/program.hpp index 8bc7310c2d2..fb502c34da5 100644 --- a/src/include/migraphx/program.hpp +++ b/src/include/migraphx/program.hpp @@ -46,6 +46,7 @@ inline namespace MIGRAPHX_INLINE_NS { MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_COMPILE) MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_EVAL) +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_ARCH) struct program_impl; diff --git a/src/program.cpp b/src/program.cpp index c24be628f36..34911f0bd90 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -308,7 +308,8 @@ void program::compile(const std::vector& targets, std::vectorfinalize(); + if(string_value_of(MIGRAPHX_GPU_ARCH{}).empty()) + this->finalize(); } void program::compile(const target& t, compile_options options) @@ -326,6 +327,7 @@ void program::compile(const target& t, compile_options options) auto&& passes = t.get_passes(this->impl->contexts.front(), options); run_passes(*this, passes, options.trace); auto mods = this->get_modules(); + bool cross_compiling = not string_value_of(MIGRAPHX_GPU_ARCH{}).empty(); // Validate and finalize for(const auto& mod : reverse(mods)) { @@ -342,7 +344,8 @@ void program::compile(const target& t, compile_options options) MIGRAPHX_THROW("Dangling reference in module " + mod->name() + " from instruction " + std::to_string(index)); } - mod->finalize(this->impl->contexts); + if(not cross_compiling) + mod->finalize(this->impl->contexts); } } diff --git a/src/targets/gpu/eliminate_data_type_for_gpu.cpp b/src/targets/gpu/eliminate_data_type_for_gpu.cpp index 60c0bcfe24b..030b5239d52 100644 --- a/src/targets/gpu/eliminate_data_type_for_gpu.cpp +++ b/src/targets/gpu/eliminate_data_type_for_gpu.cpp @@ -68,21 +68,20 @@ static eliminate_data_type for_device_functions() return eliminate_data_type{unsupported_types, shape::float_type, device_functions}; } -static eliminate_data_type for_fp8fnuz() +static eliminate_data_type for_fp8fnuz(const context* ctx) { std::set unsupported_ops = {}; - // disable dot & quant_dot if no hipblaslt - if(not hipblaslt_supported()) + if(ctx != nullptr ? not hipblaslt_supported(*ctx) : not hipblaslt_supported()) { unsupported_ops.insert("dot"); unsupported_ops.insert("quant_dot"); } - // MIOpen doesn't have support for fp8 pooling yet. insert_miopen_pooling(unsupported_ops); - if(not gpu::gfx_has_fp8fnuz_intrinsics()) + if(ctx != nullptr ? not gpu::gfx_has_fp8fnuz_intrinsics(*ctx) + : not gpu::gfx_has_fp8fnuz_intrinsics()) { insert_gemm_conv(unsupported_ops); } @@ -90,21 +89,20 @@ static eliminate_data_type for_fp8fnuz() {shape::fp8e4m3fnuz_type, shape::fp8e5m2fnuz_type}, shape::float_type, unsupported_ops}; } -static eliminate_data_type for_fp8ocp() +static eliminate_data_type for_fp8ocp(const context* ctx) { std::set unsupported_ops = {}; - // disable dot & quant_dot if no hipblaslt - if(not hipblaslt_supported()) + if(ctx != nullptr ? not hipblaslt_supported(*ctx) : not hipblaslt_supported()) { unsupported_ops.insert("dot"); unsupported_ops.insert("quant_dot"); } - // MIOpen doesn't have support for fp8 pooling yet. insert_miopen_pooling(unsupported_ops); - if(not gpu::gfx_has_fp8ocp_intrinsics()) + if(ctx != nullptr ? not gpu::gfx_has_fp8ocp_intrinsics(*ctx) + : not gpu::gfx_has_fp8ocp_intrinsics()) { insert_gemm_conv(unsupported_ops); } @@ -133,7 +131,8 @@ void eliminate_data_type_for_gpu::apply(module_pass_manager& mpm) const { std::set unsupported_floats; // No BF-16 Support on Navi21 - if(not gpu::gfx_has_bf16_intrinsics()) + if(ctx != nullptr ? not gpu::gfx_has_bf16_intrinsics(*ctx) + : not gpu::gfx_has_bf16_intrinsics()) { unsupported_floats.insert(shape::bf16_type); } @@ -158,8 +157,8 @@ void eliminate_data_type_for_gpu::apply(module_pass_manager& mpm) const mpm.run_pass(for_device_functions()); - mpm.run_pass(for_fp8fnuz()); - mpm.run_pass(for_fp8ocp()); + mpm.run_pass(for_fp8fnuz(ctx)); + mpm.run_pass(for_fp8ocp(ctx)); mpm.run_pass(for_gemm_conv()); } diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp index dd7f0c4a7b4..e38403ed595 100644 --- a/src/targets/gpu/include/migraphx/gpu/context.hpp +++ b/src/targets/gpu/include/migraphx/gpu/context.hpp @@ -42,6 +42,7 @@ #include #include #include +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -69,6 +70,25 @@ struct hip_device add_stream(); } + hip_device(std::string arch_name, + std::size_t cu_count, + std::size_t chiplets, + std::size_t n = 1) + : cross_compile_mode(true), chiplet_count_override(chiplets) + { + std::memset(&device_props, 0, sizeof(device_props)); + std::strncpy( + device_props.gcnArchName, arch_name.c_str(), sizeof(device_props.gcnArchName) - 1); + device_props.gcnArchName[sizeof(device_props.gcnArchName) - 1] = '\0'; + device_props.warpSize = 64; + device_props.maxThreadsPerMultiProcessor = 2048; + device_props.maxThreadsPerBlock = 1024; + device_props.multiProcessorCount = cu_count; + + for(std::size_t i = 0; i < n; i++) + add_stream(); + } + struct stream { using hip_stream_ptr = MIGRAPHX_MANAGE_PTR(hipStream_t, hipStreamDestroy); @@ -211,7 +231,14 @@ struct hip_device std::size_t get_cu_count() const { return device_props.multiProcessorCount; } - std::size_t get_chiplet_count() const { return get_hsa_chiplet_count(device_id); } + std::size_t get_chiplet_count() const + { + if(cross_compile_mode) + return chiplet_count_override; + return get_hsa_chiplet_count(device_id); + } + + bool is_cross_compile() const { return cross_compile_mode; } std::size_t get_max_workitems_per_cu() const { @@ -223,8 +250,10 @@ struct hip_device std::size_t get_wavefront_size() const { return device_props.warpSize; } private: - std::size_t device_id = 0; - std::size_t current_stream = 0; + std::size_t device_id = 0; + std::size_t current_stream = 0; + bool cross_compile_mode = false; + std::size_t chiplet_count_override = 1; std::vector streams; hipDeviceProp_t device_props; @@ -256,6 +285,13 @@ struct context { } + context(std::string arch_name, std::size_t cu_count, std::size_t chiplets) + : current_device(std::make_shared( + std::move(arch_name), cu_count, chiplets, value_of(MIGRAPHX_NSTREAMS{}, 1))), + pc(std::make_shared()) + { + } + hip_device& get_current_device() { assert(current_device != nullptr); @@ -268,6 +304,11 @@ struct context return *current_device; } + bool is_cross_compile() const + { + return current_device != nullptr and current_device->is_cross_compile(); + } + bool get_exhaustive_tune_flag() const { return exhaustive_tune; } void set_exhaustive_tune_flag(bool t) { exhaustive_tune = t; } @@ -292,7 +333,12 @@ struct context hipEvent_t get_event(std::size_t i) const { return events.at(i).get(); } std::vector literals{}; - void finish() const { get_stream().wait(); } + void finish() const + { + if(is_cross_compile()) + MIGRAPHX_THROW("Cannot execute in cross-compilation mode"); + get_stream().wait(); + } static hip_event_ptr create_event() { @@ -336,6 +382,8 @@ struct context void wait_for(any_ptr queue) { + if(is_cross_compile()) + MIGRAPHX_THROW("Cannot execute in cross-compilation mode"); auto status = hipEventRecord(begin_event.get(), queue.get()); if(status != hipSuccess) MIGRAPHX_THROW("Failed to record: " + hip_error(status)); @@ -345,6 +393,8 @@ struct context void finish_on(any_ptr queue) { + if(is_cross_compile()) + MIGRAPHX_THROW("Cannot execute in cross-compilation mode"); get_stream().record(finish_event.get()); auto status = hipStreamWaitEvent(queue.get(), finish_event.get(), 0); @@ -352,7 +402,12 @@ struct context MIGRAPHX_THROW("Failed to wait on event: " + hip_error(status)); } - any_ptr get_queue() { return get_stream().get(); } + any_ptr get_queue() + { + if(is_cross_compile()) + MIGRAPHX_THROW("Cannot execute in cross-compilation mode"); + return get_stream().get(); + } std::pair get_perf_events() const { diff --git a/src/targets/gpu/include/migraphx/gpu/eliminate_data_type_for_gpu.hpp b/src/targets/gpu/include/migraphx/gpu/eliminate_data_type_for_gpu.hpp index 3004c2b97cf..997165565e3 100644 --- a/src/targets/gpu/include/migraphx/gpu/eliminate_data_type_for_gpu.hpp +++ b/src/targets/gpu/include/migraphx/gpu/eliminate_data_type_for_gpu.hpp @@ -36,6 +36,7 @@ namespace gpu { struct MIGRAPHX_GPU_EXPORT eliminate_data_type_for_gpu { bool disable_64bit = false; + const context* ctx = nullptr; std::string name() const { return "gpu::eliminate_data_type_for_gpu"; } void apply(module_pass_manager& mpm) const; }; diff --git a/src/targets/gpu/target.cpp b/src/targets/gpu/target.cpp index 3ed3e72033d..7f46b3f1671 100644 --- a/src/targets/gpu/target.cpp +++ b/src/targets/gpu/target.cpp @@ -92,12 +92,16 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_CK) #endif MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_SET_GEMM_PROVIDER) MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_FULL_DYNAMIC) +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_ARCH) +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_NUM_CU) +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_NUM_CHIPLETS) std::vector target::get_passes(migraphx::context& gctx, const compile_options& options) const { auto& ctx = any_cast(gctx); ctx.set_exhaustive_tune_flag(options.exhaustive_tune); - ctx.load_problem_cache(); + if(not ctx.is_cross_compile()) + ctx.load_problem_cache(); // clang-format off return @@ -110,14 +114,14 @@ std::vector target::get_passes(migraphx::context& gctx, const compile_opti dead_code_elimination{}, eliminate_identity{}, dead_code_elimination{}, - enable_pass(not gpu::gfx_has_fp8ocp_intrinsics() and gpu::gfx_has_fp8fnuz_intrinsics(), fp8_ocp_to_fnuz{}), - enable_pass(not gpu::gfx_has_fp8ocp_intrinsics() and gpu::gfx_has_fp8fnuz_intrinsics(), dead_code_elimination{}), - simplify_qdq{.use_mx_quant=gpu::gfx_has_mx_intrinsics()}, + enable_pass(not gpu::gfx_has_fp8ocp_intrinsics(ctx) and gpu::gfx_has_fp8fnuz_intrinsics(ctx), fp8_ocp_to_fnuz{}), + enable_pass(not gpu::gfx_has_fp8ocp_intrinsics(ctx) and gpu::gfx_has_fp8fnuz_intrinsics(ctx), dead_code_elimination{}), + simplify_qdq{.use_mx_quant=gpu::gfx_has_mx_intrinsics(ctx)}, enable_pass(not mlir_enabled(), rewrite_quantization{}), dead_code_elimination{}, rewrite_rnn{}, dead_code_elimination{}, - eliminate_data_type_for_gpu{.disable_64bit = options.fast_math}, + eliminate_data_type_for_gpu{.disable_64bit = options.fast_math, .ctx = &ctx}, rewrite_resize{.affine_only = true}, dead_code_elimination{}, simplify_reshapes{.enable_gather_rewrite = true}, @@ -205,13 +209,38 @@ std::vector target::get_passes(migraphx::context& gctx, const compile_opti std::string target::name() const { return "gpu"; } -migraphx::context target::get_context() const { return context(gpu::get_device_id()); } +migraphx::context target::get_context() const +{ + auto arch = string_value_of(MIGRAPHX_GPU_ARCH{}); + if(not arch.empty()) + { + auto num_cu = value_of(MIGRAPHX_GPU_NUM_CU{}, 120); + auto num_chiplets = value_of(MIGRAPHX_GPU_NUM_CHIPLETS{}, 1); + return context(std::move(arch), num_cu, num_chiplets); + } + return context(gpu::get_device_id()); +} -argument target::copy_to(const argument& arg) const { return gpu::to_gpu(arg); } +argument target::copy_to(const argument& arg) const +{ + if(not string_value_of(MIGRAPHX_GPU_ARCH{}).empty()) + MIGRAPHX_THROW("Cannot copy data in cross-compilation mode"); + return gpu::to_gpu(arg); +} -argument target::copy_from(const argument& arg) const { return gpu::from_gpu(arg); } +argument target::copy_from(const argument& arg) const +{ + if(not string_value_of(MIGRAPHX_GPU_ARCH{}).empty()) + MIGRAPHX_THROW("Cannot copy data in cross-compilation mode"); + return gpu::from_gpu(arg); +} -argument target::allocate(const shape& s) const { return gpu::allocate_gpu(s); } +argument target::allocate(const shape& s) const +{ + if(not string_value_of(MIGRAPHX_GPU_ARCH{}).empty()) + MIGRAPHX_THROW("Cannot allocate GPU memory in cross-compilation mode"); + return gpu::allocate_gpu(s); +} MIGRAPHX_REGISTER_TARGET(target); From 424f5458b8cf7afbb46ce47431222ec3e825bf93 Mon Sep 17 00:00:00 2001 From: kahmed10 <15948690+kahmed10@users.noreply.github.com> Date: Thu, 16 Apr 2026 23:51:30 -0400 Subject: [PATCH 07/14] refactor --- src/targets/gpu/CMakeLists.txt | 1 + src/targets/gpu/cross_compile_device.cpp | 47 +++++++++++++++++++ .../gpu/include/migraphx/gpu/context.hpp | 25 +++------- .../migraphx/gpu/cross_compile_device.hpp | 46 ++++++++++++++++++ 4 files changed, 101 insertions(+), 18 deletions(-) create mode 100644 src/targets/gpu/cross_compile_device.cpp create mode 100644 src/targets/gpu/include/migraphx/gpu/cross_compile_device.hpp diff --git a/src/targets/gpu/CMakeLists.txt b/src/targets/gpu/CMakeLists.txt index e83f8fea298..999be51fbf0 100644 --- a/src/targets/gpu/CMakeLists.txt +++ b/src/targets/gpu/CMakeLists.txt @@ -163,6 +163,7 @@ add_library(migraphx_gpu compile_miopen.cpp compile_pointwise.cpp compiler.cpp + cross_compile_device.cpp device_name.cpp eliminate_data_type_for_gpu.cpp fixed_pad.cpp diff --git a/src/targets/gpu/cross_compile_device.cpp b/src/targets/gpu/cross_compile_device.cpp new file mode 100644 index 00000000000..8f851e86c78 --- /dev/null +++ b/src/targets/gpu/cross_compile_device.cpp @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +hipDeviceProp_t +make_cross_compile_device_props(const std::string& arch_name, std::size_t cu_count) +{ + hipDeviceProp_t props{}; + std::strncpy(props.gcnArchName, arch_name.c_str(), sizeof(props.gcnArchName) - 1); + props.gcnArchName[sizeof(props.gcnArchName) - 1] = '\0'; + // these are placeholders + props.warpSize = 64; + props.maxThreadsPerMultiProcessor = 2048; + props.maxThreadsPerBlock = 1024; + props.multiProcessorCount = cu_count; + return props; +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp index e38403ed595..6a2141996fd 100644 --- a/src/targets/gpu/include/migraphx/gpu/context.hpp +++ b/src/targets/gpu/include/migraphx/gpu/context.hpp @@ -40,9 +40,9 @@ #include #include #include +#include #include #include -#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -70,23 +70,12 @@ struct hip_device add_stream(); } - hip_device(std::string arch_name, - std::size_t cu_count, - std::size_t chiplets, - std::size_t n = 1) - : cross_compile_mode(true), chiplet_count_override(chiplets) + hip_device(std::string arch_name, std::size_t cu_count, std::size_t chiplets) + : cross_compile_mode(true), + chiplet_count_override(chiplets), + device_props(make_cross_compile_device_props(arch_name, cu_count)) { - std::memset(&device_props, 0, sizeof(device_props)); - std::strncpy( - device_props.gcnArchName, arch_name.c_str(), sizeof(device_props.gcnArchName) - 1); - device_props.gcnArchName[sizeof(device_props.gcnArchName) - 1] = '\0'; - device_props.warpSize = 64; - device_props.maxThreadsPerMultiProcessor = 2048; - device_props.maxThreadsPerBlock = 1024; - device_props.multiProcessorCount = cu_count; - - for(std::size_t i = 0; i < n; i++) - add_stream(); + add_stream(); } struct stream @@ -287,7 +276,7 @@ struct context context(std::string arch_name, std::size_t cu_count, std::size_t chiplets) : current_device(std::make_shared( - std::move(arch_name), cu_count, chiplets, value_of(MIGRAPHX_NSTREAMS{}, 1))), + std::move(arch_name), cu_count, chiplets)), pc(std::make_shared()) { } diff --git a/src/targets/gpu/include/migraphx/gpu/cross_compile_device.hpp b/src/targets/gpu/include/migraphx/gpu/cross_compile_device.hpp new file mode 100644 index 00000000000..a95e5276da1 --- /dev/null +++ b/src/targets/gpu/include/migraphx/gpu/cross_compile_device.hpp @@ -0,0 +1,46 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_CROSS_COMPILE_DEVICE_HPP +#define MIGRAPHX_GUARD_GPU_CROSS_COMPILE_DEVICE_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +/// Populate a hipDeviceProp_t with synthetic values for cross-compilation. +/// Used when no physical GPU is present and the target architecture +/// is specified via environment variables. +MIGRAPHX_GPU_EXPORT hipDeviceProp_t +make_cross_compile_device_props(const std::string& arch_name, std::size_t cu_count); + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif From 0bbd889a19d5f31b010f482e8cc9774d7414a640 Mon Sep 17 00:00:00 2001 From: kahmed10 <15948690+kahmed10@users.noreply.github.com> Date: Thu, 16 Apr 2026 23:59:29 -0400 Subject: [PATCH 08/14] skip benchmarking if cross compile --- src/targets/gpu/compile_ops.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/targets/gpu/compile_ops.cpp b/src/targets/gpu/compile_ops.cpp index 202d317a199..fdca7b47416 100644 --- a/src/targets/gpu/compile_ops.cpp +++ b/src/targets/gpu/compile_ops.cpp @@ -306,7 +306,7 @@ struct compile_plan if(solutions.empty()) MIGRAPHX_THROW("No solutions provided for " + preop.name() + " with " + problem_string() + "\n\n" + print_modules()); - if(enabled(MIGRAPHX_SKIP_BENCHMARKING{}) or solutions.size() == 1) + if(enabled(MIGRAPHX_SKIP_BENCHMARKING{}) or ctx->is_cross_compile() or solutions.size() == 1) { ctx->get_problem_cache().insert(preop.name(), problem, solutions.front()); results.resize(1); From 9f538326e58f8472a0d55713e68bff197ad00d0c Mon Sep 17 00:00:00 2001 From: kahmed10 <15948690+kahmed10@users.noreply.github.com> Date: Fri, 17 Apr 2026 00:07:23 -0400 Subject: [PATCH 09/14] formatting --- src/targets/gpu/compile_ops.cpp | 3 ++- src/targets/gpu/cross_compile_device.cpp | 3 +-- src/targets/gpu/eliminate_data_type_for_gpu.cpp | 3 +-- src/targets/gpu/include/migraphx/gpu/context.hpp | 3 +-- src/targets/gpu/include/migraphx/gpu/cross_compile_device.hpp | 4 ++-- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/targets/gpu/compile_ops.cpp b/src/targets/gpu/compile_ops.cpp index fdca7b47416..7fcd348b771 100644 --- a/src/targets/gpu/compile_ops.cpp +++ b/src/targets/gpu/compile_ops.cpp @@ -306,7 +306,8 @@ struct compile_plan if(solutions.empty()) MIGRAPHX_THROW("No solutions provided for " + preop.name() + " with " + problem_string() + "\n\n" + print_modules()); - if(enabled(MIGRAPHX_SKIP_BENCHMARKING{}) or ctx->is_cross_compile() or solutions.size() == 1) + if(enabled(MIGRAPHX_SKIP_BENCHMARKING{}) or ctx->is_cross_compile() or + solutions.size() == 1) { ctx->get_problem_cache().insert(preop.name(), problem, solutions.front()); results.resize(1); diff --git a/src/targets/gpu/cross_compile_device.cpp b/src/targets/gpu/cross_compile_device.cpp index 8f851e86c78..9725aafd023 100644 --- a/src/targets/gpu/cross_compile_device.cpp +++ b/src/targets/gpu/cross_compile_device.cpp @@ -28,8 +28,7 @@ namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { namespace gpu { -hipDeviceProp_t -make_cross_compile_device_props(const std::string& arch_name, std::size_t cu_count) +hipDeviceProp_t make_cross_compile_device_props(const std::string& arch_name, std::size_t cu_count) { hipDeviceProp_t props{}; std::strncpy(props.gcnArchName, arch_name.c_str(), sizeof(props.gcnArchName) - 1); diff --git a/src/targets/gpu/eliminate_data_type_for_gpu.cpp b/src/targets/gpu/eliminate_data_type_for_gpu.cpp index 030b5239d52..0294724c96b 100644 --- a/src/targets/gpu/eliminate_data_type_for_gpu.cpp +++ b/src/targets/gpu/eliminate_data_type_for_gpu.cpp @@ -131,8 +131,7 @@ void eliminate_data_type_for_gpu::apply(module_pass_manager& mpm) const { std::set unsupported_floats; // No BF-16 Support on Navi21 - if(ctx != nullptr ? not gpu::gfx_has_bf16_intrinsics(*ctx) - : not gpu::gfx_has_bf16_intrinsics()) + if(ctx != nullptr ? not gpu::gfx_has_bf16_intrinsics(*ctx) : not gpu::gfx_has_bf16_intrinsics()) { unsupported_floats.insert(shape::bf16_type); } diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp index 6a2141996fd..5cf86810de7 100644 --- a/src/targets/gpu/include/migraphx/gpu/context.hpp +++ b/src/targets/gpu/include/migraphx/gpu/context.hpp @@ -275,8 +275,7 @@ struct context } context(std::string arch_name, std::size_t cu_count, std::size_t chiplets) - : current_device(std::make_shared( - std::move(arch_name), cu_count, chiplets)), + : current_device(std::make_shared(std::move(arch_name), cu_count, chiplets)), pc(std::make_shared()) { } diff --git a/src/targets/gpu/include/migraphx/gpu/cross_compile_device.hpp b/src/targets/gpu/include/migraphx/gpu/cross_compile_device.hpp index a95e5276da1..b4938081331 100644 --- a/src/targets/gpu/include/migraphx/gpu/cross_compile_device.hpp +++ b/src/targets/gpu/include/migraphx/gpu/cross_compile_device.hpp @@ -36,8 +36,8 @@ namespace gpu { /// Populate a hipDeviceProp_t with synthetic values for cross-compilation. /// Used when no physical GPU is present and the target architecture /// is specified via environment variables. -MIGRAPHX_GPU_EXPORT hipDeviceProp_t -make_cross_compile_device_props(const std::string& arch_name, std::size_t cu_count); +MIGRAPHX_GPU_EXPORT hipDeviceProp_t make_cross_compile_device_props(const std::string& arch_name, + std::size_t cu_count); } // namespace gpu } // namespace MIGRAPHX_INLINE_NS From d10cd5ad363e1b65dcd06d1a0d673afad58fccd5 Mon Sep 17 00:00:00 2001 From: kahmed10 <15948690+kahmed10@users.noreply.github.com> Date: Fri, 17 Apr 2026 00:23:46 -0400 Subject: [PATCH 10/14] fix licensing --- src/include/migraphx/program.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/migraphx/program.hpp b/src/include/migraphx/program.hpp index fb502c34da5..807ab6f9b73 100644 --- a/src/include/migraphx/program.hpp +++ b/src/include/migraphx/program.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal From cbde4eb9c767eab84818f365f4e96cfc4e24be2b Mon Sep 17 00:00:00 2001 From: kahmed10 <15948690+kahmed10@users.noreply.github.com> Date: Tue, 21 Apr 2026 00:23:22 -0400 Subject: [PATCH 11/14] fix cppcheck and tidy --- src/targets/gpu/cross_compile_device.cpp | 7 ++++--- src/targets/gpu/device_name.cpp | 14 +++++++------- .../gpu/eliminate_data_type_for_gpu.cpp | 18 +++++++++++------- .../gpu/include/migraphx/gpu/context.hpp | 6 +++--- 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/targets/gpu/cross_compile_device.cpp b/src/targets/gpu/cross_compile_device.cpp index 9725aafd023..05fb33b9a1e 100644 --- a/src/targets/gpu/cross_compile_device.cpp +++ b/src/targets/gpu/cross_compile_device.cpp @@ -22,7 +22,7 @@ * THE SOFTWARE. */ #include -#include +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -31,8 +31,9 @@ namespace gpu { hipDeviceProp_t make_cross_compile_device_props(const std::string& arch_name, std::size_t cu_count) { hipDeviceProp_t props{}; - std::strncpy(props.gcnArchName, arch_name.c_str(), sizeof(props.gcnArchName) - 1); - props.gcnArchName[sizeof(props.gcnArchName) - 1] = '\0'; + auto n = std::min(arch_name.size(), sizeof(props.gcnArchName) - 1); + std::copy_n(arch_name.begin(), n, props.gcnArchName); + props.gcnArchName[n] = '\0'; // these are placeholders props.warpSize = 64; props.maxThreadsPerMultiProcessor = 2048; diff --git a/src/targets/gpu/device_name.cpp b/src/targets/gpu/device_name.cpp index b56d72f45d4..e01a786885f 100644 --- a/src/targets/gpu/device_name.cpp +++ b/src/targets/gpu/device_name.cpp @@ -122,6 +122,13 @@ bool gfx_has_mx_intrinsics(const context& ctx) } #if MIGRAPHX_USE_HIPBLASLT +static bool hipblaslt_supported_impl(const std::string& gfx_name) +{ + return (gfx_name == "gfx90a" or (starts_with(gfx_name, "gfx94") and gfx_name >= "gfx942") or + (starts_with(gfx_name, "gfx95") and gfx_name >= "gfx950") or + starts_with(gfx_name, "gfx110") or starts_with(gfx_name, "gfx120")); +} + static bool gfx_default_rocblas_impl(const std::string& gfx_name) { return ((string_value_of(MIGRAPHX_SET_GEMM_PROVIDER{}) == "hipblaslt") @@ -137,13 +144,6 @@ bool gfx_default_rocblas(const context& ctx) } #endif -static bool hipblaslt_supported_impl(const std::string& gfx_name) -{ - return (gfx_name == "gfx90a" or (starts_with(gfx_name, "gfx94") and gfx_name >= "gfx942") or - (starts_with(gfx_name, "gfx95") and gfx_name >= "gfx950") or - starts_with(gfx_name, "gfx110") or starts_with(gfx_name, "gfx120")); -} - bool hipblaslt_supported() { #if !MIGRAPHX_USE_HIPBLASLT diff --git a/src/targets/gpu/eliminate_data_type_for_gpu.cpp b/src/targets/gpu/eliminate_data_type_for_gpu.cpp index 0294724c96b..a42305e0724 100644 --- a/src/targets/gpu/eliminate_data_type_for_gpu.cpp +++ b/src/targets/gpu/eliminate_data_type_for_gpu.cpp @@ -68,11 +68,17 @@ static eliminate_data_type for_device_functions() return eliminate_data_type{unsupported_types, shape::float_type, device_functions}; } +template +static auto query_device(const context* ctx, F f) +{ + return ctx != nullptr ? f(*ctx) : f(); +} + static eliminate_data_type for_fp8fnuz(const context* ctx) { std::set unsupported_ops = {}; - if(ctx != nullptr ? not hipblaslt_supported(*ctx) : not hipblaslt_supported()) + if(not query_device(ctx, [](auto&&... args) { return hipblaslt_supported(args...); })) { unsupported_ops.insert("dot"); unsupported_ops.insert("quant_dot"); @@ -80,8 +86,7 @@ static eliminate_data_type for_fp8fnuz(const context* ctx) insert_miopen_pooling(unsupported_ops); - if(ctx != nullptr ? not gpu::gfx_has_fp8fnuz_intrinsics(*ctx) - : not gpu::gfx_has_fp8fnuz_intrinsics()) + if(not query_device(ctx, [](auto&&... args) { return gfx_has_fp8fnuz_intrinsics(args...); })) { insert_gemm_conv(unsupported_ops); } @@ -93,7 +98,7 @@ static eliminate_data_type for_fp8ocp(const context* ctx) { std::set unsupported_ops = {}; - if(ctx != nullptr ? not hipblaslt_supported(*ctx) : not hipblaslt_supported()) + if(not query_device(ctx, [](auto&&... args) { return hipblaslt_supported(args...); })) { unsupported_ops.insert("dot"); unsupported_ops.insert("quant_dot"); @@ -101,8 +106,7 @@ static eliminate_data_type for_fp8ocp(const context* ctx) insert_miopen_pooling(unsupported_ops); - if(ctx != nullptr ? not gpu::gfx_has_fp8ocp_intrinsics(*ctx) - : not gpu::gfx_has_fp8ocp_intrinsics()) + if(not query_device(ctx, [](auto&&... args) { return gfx_has_fp8ocp_intrinsics(args...); })) { insert_gemm_conv(unsupported_ops); } @@ -131,7 +135,7 @@ void eliminate_data_type_for_gpu::apply(module_pass_manager& mpm) const { std::set unsupported_floats; // No BF-16 Support on Navi21 - if(ctx != nullptr ? not gpu::gfx_has_bf16_intrinsics(*ctx) : not gpu::gfx_has_bf16_intrinsics()) + if(not query_device(ctx, [](auto&&... args) { return gfx_has_bf16_intrinsics(args...); })) { unsupported_floats.insert(shape::bf16_type); } diff --git a/src/targets/gpu/include/migraphx/gpu/context.hpp b/src/targets/gpu/include/migraphx/gpu/context.hpp index 5cf86810de7..9de298eaa32 100644 --- a/src/targets/gpu/include/migraphx/gpu/context.hpp +++ b/src/targets/gpu/include/migraphx/gpu/context.hpp @@ -70,7 +70,7 @@ struct hip_device add_stream(); } - hip_device(std::string arch_name, std::size_t cu_count, std::size_t chiplets) + hip_device(const std::string& arch_name, std::size_t cu_count, std::size_t chiplets) : cross_compile_mode(true), chiplet_count_override(chiplets), device_props(make_cross_compile_device_props(arch_name, cu_count)) @@ -274,8 +274,8 @@ struct context { } - context(std::string arch_name, std::size_t cu_count, std::size_t chiplets) - : current_device(std::make_shared(std::move(arch_name), cu_count, chiplets)), + context(const std::string& arch_name, std::size_t cu_count, std::size_t chiplets) + : current_device(std::make_shared(arch_name, cu_count, chiplets)), pc(std::make_shared()) { } From c288251ca17a0c39d288d318fb203fe824c15d14 Mon Sep 17 00:00:00 2001 From: kahmed10 <15948690+kahmed10@users.noreply.github.com> Date: Tue, 21 Apr 2026 23:01:34 -0400 Subject: [PATCH 12/14] fix merge --- CHANGELOG.md | 1 + docs/reference/MIGraphX-dev-env-vars.rst | 22 ++++++ src/targets/gpu/device_name.cpp | 96 ++++++++++++++++++------ 3 files changed, 94 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c15b92e98ef..f9cd4e19096 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Full documentation for MIGraphX is available at * Added a fuse_horizontal pass which batches independent cross embedding gather instructions (#4599). * Added GPU JIT `Resize` kernel (#4553). * Added environment variable `MIGRAPHX_SKIP_BENCHMARKING` which when enabled, skips tuning of MIGraphX and rocMLIR kernels (#4628). +* Added cross-compilation support via `MIGRAPHX_GPU_ARCH` environment variable, enabling compilation for a target GPU architecture without a physical device present (#4795). * Added Cubic resize jit kernel (#4652). * Added JIT compiler for `fill` operation (#4666). * Added JIT compiler for `multinomial` operation (#4721). diff --git a/docs/reference/MIGraphX-dev-env-vars.rst b/docs/reference/MIGraphX-dev-env-vars.rst index 316b822ca4b..5bf48f848fc 100644 --- a/docs/reference/MIGraphX-dev-env-vars.rst +++ b/docs/reference/MIGraphX-dev-env-vars.rst @@ -742,4 +742,26 @@ Advanced settings | Default: Benchmarking is not skipped. + * - | ``MIGRAPHX_GPU_ARCH`` + | Enables cross-compilation mode by specifying a target GPU architecture without requiring a physical GPU. + | When set, kernel benchmarking and finalization are skipped. MIOpen, hipBLASLt, and CK operations are currently not supported in this mode. + + - | Takes a valid GPU architecture string (e.g. ``gfx942``, ``gfx1100``). + + | Default: Not set. A physical GPU is used. + + * - | ``MIGRAPHX_GPU_NUM_CU`` + | Sets the number of compute units for cross-compilation mode. Only used when ``MIGRAPHX_GPU_ARCH`` is set. + + - | Takes a positive integer. + + | Default: ``120`` + + * - | ``MIGRAPHX_GPU_NUM_CHIPLETS`` + | Sets the number of chiplets (XCCs) for cross-compilation mode. Only used when ``MIGRAPHX_GPU_ARCH`` is set. + + - | Takes a positive integer. + + | Default: ``1`` + diff --git a/src/targets/gpu/device_name.cpp b/src/targets/gpu/device_name.cpp index 5242bda8caa..4203e6a3f57 100644 --- a/src/targets/gpu/device_name.cpp +++ b/src/targets/gpu/device_name.cpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -23,6 +23,7 @@ */ #include #include +#include #include #include #include @@ -37,6 +38,11 @@ namespace gpu { MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_SET_GEMM_PROVIDER) +std::string get_gfx_name(const std::string& device_name) +{ + return trim(split_string(device_name, ':').front()); +} + int get_device_id() { int device; @@ -55,41 +61,76 @@ std::string get_device_name() return props.gcnArchName; } -bool gfx_has_fp8fnuz_intrinsics() +static bool gfx_has_fp8fnuz_intrinsics_impl(const std::string& gfx_name) { - const auto device_name = trim(split_string(get_device_name(), ':').front()); - return (starts_with(device_name, "gfx94")); + return (starts_with(gfx_name, "gfx94")); } -bool gfx_has_fp8ocp_intrinsics() +bool gfx_has_fp8fnuz_intrinsics() { return gfx_has_fp8fnuz_intrinsics_impl(get_gfx_name(get_device_name())); } + +bool gfx_has_fp8fnuz_intrinsics(const context& ctx) { - const auto device_name = trim(split_string(get_device_name(), ':').front()); - bool is_navi_with_fp8ocp = starts_with(device_name, "gfx12") and device_name >= "gfx1200"; - bool is_mi_with_fp8ocp = starts_with(device_name, "gfx9") and device_name >= "gfx950"; + return gfx_has_fp8fnuz_intrinsics_impl(get_gfx_name(ctx.get_current_device().get_device_name())); +} + +static bool gfx_has_fp8ocp_intrinsics_impl(const std::string& gfx_name) +{ + bool is_navi_with_fp8ocp = starts_with(gfx_name, "gfx12") and gfx_name >= "gfx1200"; + bool is_mi_with_fp8ocp = starts_with(gfx_name, "gfx9") and gfx_name >= "gfx950"; return (is_navi_with_fp8ocp or is_mi_with_fp8ocp); } -bool gfx_has_bf16_intrinsics() +bool gfx_has_fp8ocp_intrinsics() { return gfx_has_fp8ocp_intrinsics_impl(get_gfx_name(get_device_name())); } + +bool gfx_has_fp8ocp_intrinsics(const context& ctx) { - const auto device_name = trim(split_string(get_device_name(), ':').front()); - return not(starts_with(device_name, "gfx1030")); + return gfx_has_fp8ocp_intrinsics_impl(get_gfx_name(ctx.get_current_device().get_device_name())); } -bool gfx_has_mx_intrinsics() +static bool gfx_has_bf16_intrinsics_impl(const std::string& gfx_name) { - const auto device_name = trim(split_string(get_device_name(), ':').front()); - return starts_with(device_name, "gfx9") and device_name >= "gfx950"; + return not(starts_with(gfx_name, "gfx1030")); +} + +bool gfx_has_bf16_intrinsics() { return gfx_has_bf16_intrinsics_impl(get_gfx_name(get_device_name())); } + +bool gfx_has_bf16_intrinsics(const context& ctx) +{ + return gfx_has_bf16_intrinsics_impl(get_gfx_name(ctx.get_current_device().get_device_name())); +} + +static bool gfx_has_mx_intrinsics_impl(const std::string& gfx_name) +{ + return starts_with(gfx_name, "gfx9") and gfx_name >= "gfx950"; +} + +bool gfx_has_mx_intrinsics() { return gfx_has_mx_intrinsics_impl(get_gfx_name(get_device_name())); } + +bool gfx_has_mx_intrinsics(const context& ctx) +{ + return gfx_has_mx_intrinsics_impl(get_gfx_name(ctx.get_current_device().get_device_name())); } #if MIGRAPHX_USE_HIPBLASLT -// Archs that support hipBLASLt but are defaulted to use rocBLAS. -bool gfx_default_rocblas() +static bool hipblaslt_supported_impl(const std::string& gfx_name) +{ + return (gfx_name == "gfx90a" or (starts_with(gfx_name, "gfx94") and gfx_name >= "gfx942") or + (starts_with(gfx_name, "gfx95") and gfx_name >= "gfx950") or + starts_with(gfx_name, "gfx110") or starts_with(gfx_name, "gfx120")); +} + +static bool gfx_default_rocblas_impl(const std::string& gfx_name) { - const auto device_name = trim(split_string(get_device_name(), ':').front()); - // Default to rocBLAS for gfx90a. return ((string_value_of(MIGRAPHX_SET_GEMM_PROVIDER{}) == "hipblaslt") ? false - : (device_name == "gfx90a")); + : (gfx_name == "gfx90a")); +} + +bool gfx_default_rocblas() { return gfx_default_rocblas_impl(get_gfx_name(get_device_name())); } + +bool gfx_default_rocblas(const context& ctx) +{ + return gfx_default_rocblas_impl(get_gfx_name(ctx.get_current_device().get_device_name())); } #endif @@ -98,12 +139,17 @@ bool hipblaslt_supported() #if !MIGRAPHX_USE_HIPBLASLT return false; #else - const auto device_name = trim(split_string(get_device_name(), ':').front()); - // hipblaslt is supported for MI200 and above, and Navi3x and above. - return (device_name == "gfx90a" or - (starts_with(device_name, "gfx94") and device_name >= "gfx942") or - (starts_with(device_name, "gfx95") and device_name >= "gfx950") or - starts_with(device_name, "gfx110") or starts_with(device_name, "gfx120")); + return hipblaslt_supported_impl(get_gfx_name(get_device_name())); +#endif +} + +bool hipblaslt_supported(const context& ctx) +{ +#if !MIGRAPHX_USE_HIPBLASLT + (void)ctx; + return false; +#else + return hipblaslt_supported_impl(get_gfx_name(ctx.get_current_device().get_device_name())); #endif } From 53840408ce91966800c65a7be0494535870bec81 Mon Sep 17 00:00:00 2001 From: kahmed10 <15948690+kahmed10@users.noreply.github.com> Date: Thu, 23 Apr 2026 16:06:11 -0400 Subject: [PATCH 13/14] small refactor and fixing issues --- src/targets/gpu/eliminate_data_type_for_gpu.cpp | 17 ++++++++++------- src/targets/gpu/target.cpp | 2 +- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/targets/gpu/eliminate_data_type_for_gpu.cpp b/src/targets/gpu/eliminate_data_type_for_gpu.cpp index a42305e0724..69de712a93d 100644 --- a/src/targets/gpu/eliminate_data_type_for_gpu.cpp +++ b/src/targets/gpu/eliminate_data_type_for_gpu.cpp @@ -24,6 +24,7 @@ #include #include #include +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -69,16 +70,18 @@ static eliminate_data_type for_device_functions() } template -static auto query_device(const context* ctx, F f) +static bool query_device(const context* ctx, F f) { - return ctx != nullptr ? f(*ctx) : f(); + if(ctx != nullptr) + return f(*ctx); + return f(); } static eliminate_data_type for_fp8fnuz(const context* ctx) { std::set unsupported_ops = {}; - if(not query_device(ctx, [](auto&&... args) { return hipblaslt_supported(args...); })) + if(not query_device(ctx, MIGRAPHX_LIFT(hipblaslt_supported))) { unsupported_ops.insert("dot"); unsupported_ops.insert("quant_dot"); @@ -86,7 +89,7 @@ static eliminate_data_type for_fp8fnuz(const context* ctx) insert_miopen_pooling(unsupported_ops); - if(not query_device(ctx, [](auto&&... args) { return gfx_has_fp8fnuz_intrinsics(args...); })) + if(not query_device(ctx, MIGRAPHX_LIFT(gfx_has_fp8fnuz_intrinsics))) { insert_gemm_conv(unsupported_ops); } @@ -98,7 +101,7 @@ static eliminate_data_type for_fp8ocp(const context* ctx) { std::set unsupported_ops = {}; - if(not query_device(ctx, [](auto&&... args) { return hipblaslt_supported(args...); })) + if(not query_device(ctx, MIGRAPHX_LIFT(hipblaslt_supported))) { unsupported_ops.insert("dot"); unsupported_ops.insert("quant_dot"); @@ -106,7 +109,7 @@ static eliminate_data_type for_fp8ocp(const context* ctx) insert_miopen_pooling(unsupported_ops); - if(not query_device(ctx, [](auto&&... args) { return gfx_has_fp8ocp_intrinsics(args...); })) + if(not query_device(ctx, MIGRAPHX_LIFT(gfx_has_fp8ocp_intrinsics))) { insert_gemm_conv(unsupported_ops); } @@ -135,7 +138,7 @@ void eliminate_data_type_for_gpu::apply(module_pass_manager& mpm) const { std::set unsupported_floats; // No BF-16 Support on Navi21 - if(not query_device(ctx, [](auto&&... args) { return gfx_has_bf16_intrinsics(args...); })) + if(not query_device(ctx, MIGRAPHX_LIFT(gfx_has_bf16_intrinsics))) { unsupported_floats.insert(shape::bf16_type); } diff --git a/src/targets/gpu/target.cpp b/src/targets/gpu/target.cpp index 7f46b3f1671..46aa93befc6 100644 --- a/src/targets/gpu/target.cpp +++ b/src/targets/gpu/target.cpp @@ -216,7 +216,7 @@ migraphx::context target::get_context() const { auto num_cu = value_of(MIGRAPHX_GPU_NUM_CU{}, 120); auto num_chiplets = value_of(MIGRAPHX_GPU_NUM_CHIPLETS{}, 1); - return context(std::move(arch), num_cu, num_chiplets); + return context(arch, num_cu, num_chiplets); } return context(gpu::get_device_id()); } From e36c12134568e5b9de5145f532fa197909326147 Mon Sep 17 00:00:00 2001 From: kahmed10 <15948690+kahmed10@users.noreply.github.com> Date: Thu, 23 Apr 2026 16:06:32 -0400 Subject: [PATCH 14/14] formatting --- src/targets/gpu/device_name.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/targets/gpu/device_name.cpp b/src/targets/gpu/device_name.cpp index 4203e6a3f57..e01a786885f 100644 --- a/src/targets/gpu/device_name.cpp +++ b/src/targets/gpu/device_name.cpp @@ -66,11 +66,15 @@ static bool gfx_has_fp8fnuz_intrinsics_impl(const std::string& gfx_name) return (starts_with(gfx_name, "gfx94")); } -bool gfx_has_fp8fnuz_intrinsics() { return gfx_has_fp8fnuz_intrinsics_impl(get_gfx_name(get_device_name())); } +bool gfx_has_fp8fnuz_intrinsics() +{ + return gfx_has_fp8fnuz_intrinsics_impl(get_gfx_name(get_device_name())); +} bool gfx_has_fp8fnuz_intrinsics(const context& ctx) { - return gfx_has_fp8fnuz_intrinsics_impl(get_gfx_name(ctx.get_current_device().get_device_name())); + return gfx_has_fp8fnuz_intrinsics_impl( + get_gfx_name(ctx.get_current_device().get_device_name())); } static bool gfx_has_fp8ocp_intrinsics_impl(const std::string& gfx_name) @@ -80,7 +84,10 @@ static bool gfx_has_fp8ocp_intrinsics_impl(const std::string& gfx_name) return (is_navi_with_fp8ocp or is_mi_with_fp8ocp); } -bool gfx_has_fp8ocp_intrinsics() { return gfx_has_fp8ocp_intrinsics_impl(get_gfx_name(get_device_name())); } +bool gfx_has_fp8ocp_intrinsics() +{ + return gfx_has_fp8ocp_intrinsics_impl(get_gfx_name(get_device_name())); +} bool gfx_has_fp8ocp_intrinsics(const context& ctx) { @@ -92,7 +99,10 @@ static bool gfx_has_bf16_intrinsics_impl(const std::string& gfx_name) return not(starts_with(gfx_name, "gfx1030")); } -bool gfx_has_bf16_intrinsics() { return gfx_has_bf16_intrinsics_impl(get_gfx_name(get_device_name())); } +bool gfx_has_bf16_intrinsics() +{ + return gfx_has_bf16_intrinsics_impl(get_gfx_name(get_device_name())); +} bool gfx_has_bf16_intrinsics(const context& ctx) {