diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7a1d00a4faa..efdde65a2e4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -57,6 +57,7 @@ add_library(migraphx color.cpp common.cpp common_dims.cpp + compile_modes.cpp compile_src.cpp convert_to_json.cpp cpp_generator.cpp @@ -450,5 +451,3 @@ rocm_export_targets( Threads ${MIGRAPHX_CONFIG_DEPENDS} ) - - diff --git a/src/api/include/migraphx/migraphx.h b/src/api/include/migraphx/migraphx.h index 254c8282672..6d9ad82fcc9 100644 --- a/src/api/include/migraphx/migraphx.h +++ b/src/api/include/migraphx/migraphx.h @@ -66,6 +66,14 @@ typedef enum } migraphx_status; +typedef enum +{ + migraphx_compile_mode_eager = 0, + migraphx_compile_mode_balanced = 50, + migraphx_compile_mode_max = 100, + +} migraphx_compile_mode; + #define MIGRAPHX_SHAPE_GENERATE_ENUM_TYPES(x, t) migraphx_shape_##x, /// An enum to represent the different data type inputs typedef enum diff --git a/src/api/include/migraphx/migraphx.hpp b/src/api/include/migraphx/migraphx.hpp index beeebf20b86..3f99976ed24 100644 --- a/src/api/include/migraphx/migraphx.hpp +++ b/src/api/include/migraphx/migraphx.hpp @@ -1193,6 +1193,13 @@ struct compile_options : MIGRAPHX_HANDLE_BASE(compile_options) { call(&migraphx_compile_options_set_exhaustive_tune_flag, this->get_handle_ptr(), value); } + + /// Set compilation mode (0-100). 0 = fast compile, low performance. + /// 100 = best compile with max optimizations, best performance. + void set_compile_mode(int8_t value = migraphx_compile_mode_balanced) + { + call(&migraphx_compile_options_set_compile_mode, this->get_handle_ptr(), value); + } }; /// A program represents the all computation graphs to be compiled and executed diff --git a/src/api/migraphx.py b/src/api/migraphx.py index 50ac934925a..dde6a05b48e 100644 --- a/src/api/migraphx.py +++ b/src/api/migraphx.py @@ -431,6 +431,9 @@ def compile_options(h): h.method('set_exhaustive_tune_flag', api.params(value='bool'), invoke='migraphx::set_exhaustive_tune_flag($@)') + h.method('set_compile_mode', + api.params(value='int8_t'), + invoke='migraphx::set_compile_mode($@)') api.add_function('migraphx_parse_onnx', diff --git a/src/compile_modes.cpp b/src/compile_modes.cpp new file mode 100644 index 00000000000..142c873b209 --- /dev/null +++ b/src/compile_modes.cpp @@ -0,0 +1,84 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +compile_modes convert_to_compile_mode(const uint8_t mode) +{ + auto clamped = static_cast(std::clamp(mode, 0, 100)); + if(clamped != mode) + log::warn() << "Compile mode value " << static_cast(mode) + << " out of range [0, 100], clamping to " << static_cast(clamped); + + static const std::array modes = { + compile_modes::EAGER, compile_modes::BALANCED, compile_modes::MAX}; + + auto it = std::find_if(modes.begin(), modes.end(), [&](compile_modes m) { + return static_cast(m) == clamped; + }); + if(it != modes.end()) + return *it; + + log::warn() << "Compile mode value " << static_cast(clamped) + << " does not match a known mode, using closest match"; + return *std::min_element(modes.begin(), modes.end(), by(std::less<>{}, [&](compile_modes m) { + return std::abs(static_cast(clamped) - static_cast(m)); + })); +} + +compile_modes convert_to_compile_mode(const std::string& mode) +{ + auto lower = to_lower(mode); + if(lower == "eager") + return compile_modes::EAGER; + if(lower == "balanced") + return compile_modes::BALANCED; + if(lower == "max") + return compile_modes::MAX; + try + { + int val = std::stoi(mode); + if(val < 0 or val > 100) + log::warn() << "Compile mode value " << val << " out of range [0, 100], clamping to " + << std::clamp(val, 0, 100); + return convert_to_compile_mode(static_cast(std::clamp(val, 0, 100))); + } + catch(const std::invalid_argument&) + { + MIGRAPHX_THROW("Invalid compile mode: " + mode + + ". Expected eager, balanced, max, or an integer 0-100"); + } +} + +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/src/driver/main.cpp b/src/driver/main.cpp index 1e1e774b7d6..ad73e131da7 100644 --- a/src/driver/main.cpp +++ b/src/driver/main.cpp @@ -681,6 +681,14 @@ struct compiler {"--exhaustive-tune"}, ap.help("Exhastively search for best tuning parameters for kernels"), ap.set_value(true)); + ap(co.compile_mode, + {"--compile-mode"}, + ap.help("Set compilation mode: eager, balanced, max, or an integer 0-100"), + ap.write_action([](auto&, auto& x, const auto& params) { + if(params.empty()) + throw std::runtime_error("Flag with no value."); + x = convert_to_compile_mode(params.back()); + })); ap(to_fp16, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(true)); ap(to_bf16, {"--bf16"}, ap.help("Quantize for bf16"), ap.set_value(true)); ap(to_int8, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(true)); diff --git a/src/include/migraphx/compile_modes.hpp b/src/include/migraphx/compile_modes.hpp new file mode 100644 index 00000000000..4632630b8d2 --- /dev/null +++ b/src/include/migraphx/compile_modes.hpp @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_MIGRAPHX_COMPILE_MODES_HPP +#define MIGRAPHX_GUARD_MIGRAPHX_COMPILE_MODES_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +enum class compile_modes +{ + EAGER = 0, + BALANCED = 50, + MAX = 100 +}; + +MIGRAPHX_EXPORT compile_modes convert_to_compile_mode(const uint8_t mode); +MIGRAPHX_EXPORT compile_modes convert_to_compile_mode(const std::string& mode); + +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif // MIGRAPHX_GUARD_MIGRAPHX_COMPILE_MODES_HPP diff --git a/src/include/migraphx/compile_options.hpp b/src/include/migraphx/compile_options.hpp index 64eb3018cf3..ccbb4efee12 100644 --- a/src/include/migraphx/compile_options.hpp +++ b/src/include/migraphx/compile_options.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -25,6 +25,7 @@ #define MIGRAPHX_GUARD_RTGLIB_COMPILE_OPTIONS_HPP #include +#include #include namespace migraphx { @@ -41,6 +42,8 @@ struct compile_options bool fast_math = true; bool exhaustive_tune = false; + compile_modes compile_mode = compile_modes::BALANCED; + tracer trace{}; }; diff --git a/src/include/migraphx/output_iterator.hpp b/src/include/migraphx/output_iterator.hpp index 6efd2532396..a4449870dda 100644 --- a/src/include/migraphx/output_iterator.hpp +++ b/src/include/migraphx/output_iterator.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -26,6 +26,7 @@ #include #include +#include #include namespace migraphx { diff --git a/src/py/migraphx_py.cpp b/src/py/migraphx_py.cpp index ccd4321028a..1fae4f0831f 100644 --- a/src/py/migraphx_py.cpp +++ b/src/py/migraphx_py.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #ifdef HAVE_GPU @@ -553,17 +554,20 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m) const migraphx::target& t, bool offload_copy, bool fast_math, - bool exhaustive_tune) { + bool exhaustive_tune, + migraphx::compile_modes compile_mode) { migraphx::compile_options options; options.offload_copy = offload_copy; options.fast_math = fast_math; options.exhaustive_tune = exhaustive_tune; + options.compile_mode = compile_mode; p.compile(t, options); }, py::arg("t"), py::arg("offload_copy") = true, py::arg("fast_math") = true, - py::arg("exhaustive_tune") = false) + py::arg("exhaustive_tune") = false, + py::arg("compile_mode") = migraphx::compile_modes::BALANCED) .def("get_main_module", [](const migraphx::program& p) { return p.get_main_module(); }) .def( "create_module", @@ -634,6 +638,11 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m) .value("reverse", migraphx::op::rnn_direction::reverse) .value("bidirectional", migraphx::op::rnn_direction::bidirectional); + py::enum_(m, "compile_modes") + .value("eager", migraphx::compile_modes::EAGER) + .value("balanced", migraphx::compile_modes::BALANCED) + .value("max", migraphx::compile_modes::MAX); + py::class_(m, "macro") .def(py::init([](const std::string& name, py::kwargs kwargs) { migraphx::value v = migraphx::value::object{}; diff --git a/src/targets/gpu/CMakeLists.txt b/src/targets/gpu/CMakeLists.txt index 6d66ccdc573..abd57dc559f 100644 --- a/src/targets/gpu/CMakeLists.txt +++ b/src/targets/gpu/CMakeLists.txt @@ -156,6 +156,7 @@ add_library(migraphx_gpu allocation_model.cpp code_object_op.cpp compile_ops.cpp + pipeline_factory.cpp compile_gen.cpp compile_hip.cpp compile_hip_code_object.cpp diff --git a/src/targets/gpu/compile_ops.cpp b/src/targets/gpu/compile_ops.cpp index 52272b1d7af..9a58c0af6b8 100644 --- a/src/targets/gpu/compile_ops.cpp +++ b/src/targets/gpu/compile_ops.cpp @@ -317,7 +317,7 @@ struct compile_plan } template - void add_compiles(Vector& compiles) + void add_compiles(Vector& compiles, bool skip_benchmark) { if(config.has_value()) { @@ -337,7 +337,7 @@ struct compile_plan if(solutions.empty()) MIGRAPHX_THROW("No solutions provided for " + preop.name() + " with " + problem_string() + "\n\n" + print_modules()); - if(enabled(MIGRAPHX_SKIP_BENCHMARKING{}) or solutions.size() == 1) + if(skip_benchmark or enabled(MIGRAPHX_SKIP_BENCHMARKING{}) or solutions.size() == 1) { ctx->get_problem_cache().insert(preop.name(), problem, solutions.front()); results.resize(1); @@ -514,7 +514,8 @@ static void par_compile(std::size_t n, F f) struct compile_manager { std::vector cps; - bool exhaustive = false; + bool exhaustive = false; + bool skip_benchmark = false; template void add_plan(Ts&&... xs) @@ -532,7 +533,7 @@ struct compile_manager std::vector> compiles; for(auto& cp : cps) { - cp.add_compiles(compiles); + cp.add_compiles(compiles, skip_benchmark); } par_compile(compiles.size(), [&](auto i) { compiles[i](); }); @@ -576,7 +577,8 @@ struct compile_manager void compile_ops::apply(module& m) const { compile_manager cm; - cm.exhaustive = exhaustive_tune; + cm.exhaustive = exhaustive_tune; + cm.skip_benchmark = skip_benchmark; // Find all precompile ops for(auto ins : iterator_for(m)) { diff --git a/src/targets/gpu/include/migraphx/gpu/compile_ops.hpp b/src/targets/gpu/include/migraphx/gpu/compile_ops.hpp index 6986822a5c1..8445dc5a8ae 100644 --- a/src/targets/gpu/include/migraphx/gpu/compile_ops.hpp +++ b/src/targets/gpu/include/migraphx/gpu/compile_ops.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -40,6 +40,7 @@ struct MIGRAPHX_GPU_EXPORT compile_ops { context* ctx = nullptr; bool exhaustive_tune = false; + bool skip_benchmark = false; std::string name() const { return "gpu::compile_ops"; } void apply(module& m) const; }; diff --git a/src/targets/gpu/include/migraphx/gpu/pipeline_factory.hpp b/src/targets/gpu/include/migraphx/gpu/pipeline_factory.hpp new file mode 100644 index 00000000000..4bde90eda1e --- /dev/null +++ b/src/targets/gpu/include/migraphx/gpu/pipeline_factory.hpp @@ -0,0 +1,58 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_PIPELINE_FACTORY_HPP +#define MIGRAPHX_GUARD_GPU_PIPELINE_FACTORY_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +struct MIGRAPHX_GPU_EXPORT pipeline_factory +{ + migraphx::context* gctx_ptr = nullptr; + compile_options options = {}; + + migraphx::context* get_generic_context() const { return gctx_ptr; } + + context* get_context() const; + + std::vector dynamic_shapes_pipeline() const; + std::vector required_pipeline() const; + std::vector optimize_rewrite_pipeline() const; + std::vector prefuse_pipeline() const; + std::vector rewrite_simplify_pipeline() const; + std::vector fusion_pipeline() const; + std::vector backend_pipeline() const; +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif // MIGRAPHX_GUARD_GPU_PIPELINE_FACTORY_HPP diff --git a/src/targets/gpu/pipeline_factory.cpp b/src/targets/gpu/pipeline_factory.cpp new file mode 100644 index 00000000000..f89b6cce1d9 --- /dev/null +++ b/src/targets/gpu/pipeline_factory.cpp @@ -0,0 +1,238 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_SCHEDULE_PASS) +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_NHWC) +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_REWRITE_DOT) +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_REWRITE_LRN) +#ifndef _WIN32 +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_CK) +#endif +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_FULL_DYNAMIC) + +context* pipeline_factory::get_context() const { return any_cast(gctx_ptr); } + +// clang-format off +std::vector pipeline_factory::dynamic_shapes_pipeline() const +{ + return { + enable_pass(disabled(MIGRAPHX_ENABLE_FULL_DYNAMIC{}), split_single_dyn_dim{}), + dead_code_elimination{}, + simplify_dyn_ops{}, + dead_code_elimination{} + }; +} + +std::vector pipeline_factory::required_pipeline() const +{ + return { + normalize_ops{}, + dead_code_elimination{}, + eliminate_identity{}, + dead_code_elimination{}, + enable_pass(not gpu::gfx_has_fp8ocp_intrinsics() and gpu::gfx_has_fp8fnuz_intrinsics(), fp8_ocp_to_fnuz{}), + enable_pass(not gpu::gfx_has_fp8ocp_intrinsics() and gpu::gfx_has_fp8fnuz_intrinsics(), dead_code_elimination{}), + simplify_qdq{.use_mx_quant=gpu::gfx_has_mx_intrinsics()}, + enable_pass(not mlir_enabled(), rewrite_quantization{}), + dead_code_elimination{}, + rewrite_rnn{}, + dead_code_elimination{}, + eliminate_data_type_for_gpu{.disable_64bit = options.compile_mode != compile_modes::EAGER and options.fast_math}, + rewrite_resize{.affine_only = true}, + dead_code_elimination{} + }; +} + +std::vector pipeline_factory::optimize_rewrite_pipeline() const +{ + return { + simplify_reshapes{.enable_gather_rewrite = true}, + eliminate_identity{}, + eliminate_pad{}, + dead_code_elimination{}, + insert_pad{{"convolution"}}, + dead_code_elimination{}, + inline_module{}, + enable_pass(disabled(MIGRAPHX_ENABLE_FULL_DYNAMIC{}), rewrite_pooling{.rewrite_lrn = (not MIGRAPHX_USE_MIOPEN or enabled(MIGRAPHX_REWRITE_LRN{}))}), + dead_code_elimination{}, + rewrite_gelu{options.fast_math}, + optimize_module{}, + layout_convolution{.channels_last = enabled(MIGRAPHX_ENABLE_NHWC{})}, + dead_code_elimination{} + }; +} + +std::vector pipeline_factory::prefuse_pipeline() const +{ + return { + fuse_horizontal{}, + dead_code_elimination{}, + prefuse_ops{get_context()}, + dead_code_elimination{} + }; +} + +std::vector pipeline_factory::rewrite_simplify_pipeline() const +{ + return { + rewrite_reduce{}, + rewrite_topk{}, + rewrite_low_precision{}, + enable_pass(enabled(MIGRAPHX_ENABLE_REWRITE_DOT{}), rewrite_dot{}), + dead_code_elimination{}, + propagate_precision{}, + dead_code_elimination{}, + simplify_reshapes{.enable_op_shape_transform_op=true}, + dead_code_elimination{} + }; +} + +std::vector pipeline_factory::fusion_pipeline() const +{ + return { + enable_pass(mlir_enabled(), fuse_attention{.attn_enabled = mlir_attention_enabled(get_context()), + .flash_decoding_enabled = mlir_flash_decoding_enabled()}), + dead_code_elimination{}, + optimize_module{}, + fuse_pointwise_reduce{}, + dead_code_elimination{}, +#ifndef _WIN32 + enable_pass(enabled(MIGRAPHX_ENABLE_CK{}), fuse_ck{}), +#endif + dead_code_elimination{}, + enable_pass(mlir_enabled(), fuse_mlir{get_context()}), + dead_code_elimination{}, + fuse_concat{}, + dead_code_elimination{}, + auto_contiguous{}, + dead_code_elimination{} + }; +} + +std::vector pipeline_factory::backend_pipeline() const +{ + return { + lowering{get_context(), options.offload_copy}, + eliminate_contiguous{"gpu::contiguous"}, + dead_code_elimination{}, + adjust_allocation{gpu_allocation_model{.use_hip_allocate = false}}, + dead_code_elimination{}, + eliminate_concat{concat_gpu_optimization{}}, + dead_code_elimination{}, + #if MIGRAPHX_USE_MIOPEN + compile_miopen{get_context()}, + dead_code_elimination{}, + #endif + fuse_ops{get_context(), options.fast_math}, + dead_code_elimination{}, + #if MIGRAPHX_USE_HIPBLASLT + compile_hipblaslt{get_context()}, + dead_code_elimination{}, + #endif + replace_allocate{gpu_allocation_model{}, options.offload_copy}, + dead_code_elimination{}, + adjust_allocation{gpu_allocation_model{}}, + dead_code_elimination{}, + compile_ops{get_context(), options.exhaustive_tune, options.compile_mode == compile_modes::EAGER}, + dead_code_elimination{}, + promote_literals{}, + dead_code_elimination{}, + write_literals{get_context()}, + schedule{gpu::schedule_model{get_context()->get_current_device().nstreams()}, not enabled(MIGRAPHX_DISABLE_SCHEDULE_PASS{})}, + memory_coloring{"hip::allocate"}, + sync_device{}, + preallocate_param{"scratch", gpu_allocation_model{}}, + dead_code_elimination{}, + eliminate_allocation{"hip::allocate"}, + check_context{}, + normalize_ops{}, + dead_code_elimination{}, + eliminate_identity{} + }; +} +// clang-format on + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/src/targets/gpu/target.cpp b/src/targets/gpu/target.cpp index 3ed3e72033d..9ef6bdd80d2 100644 --- a/src/targets/gpu/target.cpp +++ b/src/targets/gpu/target.cpp @@ -21,186 +21,55 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include -#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { namespace gpu { -MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_SCHEDULE_PASS) -MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_NHWC) -MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_REWRITE_DOT) -MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_REWRITE_LRN) -#ifndef _WIN32 -MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_CK) -#endif -MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_SET_GEMM_PROVIDER) -MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_FULL_DYNAMIC) - std::vector target::get_passes(migraphx::context& gctx, const compile_options& options) const { auto& ctx = any_cast(gctx); ctx.set_exhaustive_tune_flag(options.exhaustive_tune); ctx.load_problem_cache(); - // clang-format off - return + if(options.compile_mode == compile_modes::MAX) + ctx.set_exhaustive_tune_flag(true); + + pipeline_factory factory{&gctx, options}; + + std::vector> pipelines; + if(options.compile_mode == compile_modes::EAGER) { - enable_pass(disabled(MIGRAPHX_ENABLE_FULL_DYNAMIC{}), split_single_dyn_dim{}), - dead_code_elimination{}, - simplify_dyn_ops{}, - dead_code_elimination{}, - normalize_ops{}, - dead_code_elimination{}, - eliminate_identity{}, - dead_code_elimination{}, - enable_pass(not gpu::gfx_has_fp8ocp_intrinsics() and gpu::gfx_has_fp8fnuz_intrinsics(), fp8_ocp_to_fnuz{}), - enable_pass(not gpu::gfx_has_fp8ocp_intrinsics() and gpu::gfx_has_fp8fnuz_intrinsics(), dead_code_elimination{}), - simplify_qdq{.use_mx_quant=gpu::gfx_has_mx_intrinsics()}, - enable_pass(not mlir_enabled(), rewrite_quantization{}), - dead_code_elimination{}, - rewrite_rnn{}, - dead_code_elimination{}, - eliminate_data_type_for_gpu{.disable_64bit = options.fast_math}, - rewrite_resize{.affine_only = true}, - dead_code_elimination{}, - simplify_reshapes{.enable_gather_rewrite = true}, - eliminate_identity{}, - eliminate_pad{}, - dead_code_elimination{}, - insert_pad{{"convolution"}}, - dead_code_elimination{}, - inline_module{}, - enable_pass(disabled(MIGRAPHX_ENABLE_FULL_DYNAMIC{}), rewrite_pooling{.rewrite_lrn = (not MIGRAPHX_USE_MIOPEN or enabled(MIGRAPHX_REWRITE_LRN{}))}), - dead_code_elimination{}, - rewrite_gelu{options.fast_math}, - optimize_module{}, - layout_convolution{.channels_last = enabled(MIGRAPHX_ENABLE_NHWC{})}, - dead_code_elimination{}, - fuse_horizontal{}, - dead_code_elimination{}, - prefuse_ops{&ctx}, - dead_code_elimination{}, - dead_code_elimination{}, - rewrite_reduce{}, - rewrite_topk{}, - rewrite_low_precision{}, - enable_pass(enabled(MIGRAPHX_ENABLE_REWRITE_DOT{}), rewrite_dot{}), - dead_code_elimination{}, - propagate_precision{}, - dead_code_elimination{}, - simplify_reshapes{.enable_op_shape_transform_op=true}, - dead_code_elimination{}, - enable_pass(mlir_enabled(), fuse_attention{.attn_enabled = mlir_attention_enabled(&ctx), - .flash_decoding_enabled = mlir_flash_decoding_enabled()}), - dead_code_elimination{}, - optimize_module{}, - fuse_pointwise_reduce{}, - dead_code_elimination{}, -#ifndef _WIN32 - enable_pass(enabled(MIGRAPHX_ENABLE_CK{}), fuse_ck{}), -#endif - dead_code_elimination{}, - enable_pass(mlir_enabled(), fuse_mlir{&ctx}), - dead_code_elimination{}, - fuse_concat{}, - dead_code_elimination{}, - auto_contiguous{}, - dead_code_elimination{}, - lowering{&ctx, options.offload_copy}, - eliminate_contiguous{"gpu::contiguous"}, - dead_code_elimination{}, - adjust_allocation{gpu_allocation_model{.use_hip_allocate = false}}, - dead_code_elimination{}, - eliminate_concat{concat_gpu_optimization{}}, - dead_code_elimination{}, -#if MIGRAPHX_USE_MIOPEN - compile_miopen{&gctx}, - dead_code_elimination{}, -#endif - fuse_ops{&ctx, options.fast_math}, - dead_code_elimination{}, -#if MIGRAPHX_USE_HIPBLASLT - compile_hipblaslt{&gctx}, - dead_code_elimination{}, -#endif - replace_allocate{gpu_allocation_model{}, options.offload_copy}, - dead_code_elimination{}, - adjust_allocation{gpu_allocation_model{}}, - dead_code_elimination{}, - compile_ops{&ctx, options.exhaustive_tune}, - dead_code_elimination{}, - promote_literals{}, - dead_code_elimination{}, - write_literals{&ctx}, - schedule{gpu::schedule_model{ctx.get_current_device().nstreams()}, not enabled(MIGRAPHX_DISABLE_SCHEDULE_PASS{})}, - memory_coloring{"hip::allocate"}, - sync_device{}, - preallocate_param{"scratch", gpu_allocation_model{}}, - dead_code_elimination{}, - eliminate_allocation{"hip::allocate"}, - check_context{}, - normalize_ops{}, - dead_code_elimination{}, - eliminate_identity{} - }; - // clang-format on + pipelines = { + factory.dynamic_shapes_pipeline(), + factory.required_pipeline(), + factory.prefuse_pipeline(), + factory.fusion_pipeline(), + factory.backend_pipeline(), + }; + } + else + { + pipelines = { + factory.dynamic_shapes_pipeline(), + factory.required_pipeline(), + factory.optimize_rewrite_pipeline(), + factory.rewrite_simplify_pipeline(), + factory.prefuse_pipeline(), + factory.fusion_pipeline(), + factory.backend_pipeline(), + }; + } + + std::vector passes; + std::copy(pipelines.begin(), pipelines.end(), join_back_inserter(passes)); + return passes; } std::string target::name() const { return "gpu"; } diff --git a/tools/api/api.cpp b/tools/api/api.cpp index 79f5785357e..1fc9b11e526 100644 --- a/tools/api/api.cpp +++ b/tools/api/api.cpp @@ -154,6 +154,11 @@ static void set_exhaustive_tune_flag(compile_options& options, bool value) options.exhaustive_tune = value; } +static void set_compile_mode(compile_options& options, int8_t value) +{ + options.compile_mode = convert_to_compile_mode(value); +} + static void set_file_format(file_options& options, const char* format) { options.format = format; } static void set_default_dim_value(onnx_options& options, size_t value) diff --git a/tools/api/migraphx.h b/tools/api/migraphx.h index 263dacd0160..e93c7360141 100644 --- a/tools/api/migraphx.h +++ b/tools/api/migraphx.h @@ -76,6 +76,14 @@ typedef enum } migraphx_shape_datatype_t; #undef MIGRAPHX_SHAPE_GENERATE_ENUM_TYPES +typedef enum +{ + migraphx_compile_mode_eager = 0, + migraphx_compile_mode_balanced = 50, + migraphx_compile_mode_max = 100, + +} migraphx_compile_mode; + <% generate_c_header() %>