Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
82ba1c3
Rewrite gather to transpose/reshape/broadcast/slice (#4550)
pfultz2 Feb 6, 2026
79c531d
Fix bug in gather rewrite with nhwc shapes (#4610)
pfultz2 Feb 14, 2026
f0291c8
Horizontally fuse cross-embedding gather operators (#4599)
kahmed10 Mar 7, 2026
e7efd73
Fix gather rewrite crash: validate strided view element count (#4656)
urpetkov-amd Mar 10, 2026
be7bbcf
Move move_output_instructions_after to the module class (#4601)
pfultz2 Feb 11, 2026
ff852c7
fixed conflict for the lack of no_broadcast param in rocm7.2
kentqian Apr 16, 2026
d77fb99
Fix error with rewrite_reshapes (#4482)
pfultz2 Jan 9, 2026
e0c10c6
Fix shape_transform_descriptor::rebase when flattening a broadcasted …
pfultz2 Jan 28, 2026
7e245d3
find_nop_reshapes: Remove extra assignments/inserts (#4696)
TedThemistokleous Mar 25, 2026
6b246f6
Fix comparrision for find_concat_op to match the correct I/O (#4661)
TedThemistokleous Mar 12, 2026
89c9178
[AIMIGRAPHX-801] Fix int convert bf16/fp16 (#4745)
TedThemistokleous Apr 8, 2026
3fbe8c6
Improve tuning for splitk (#4486)
pfultz2 Jan 13, 2026
63bd943
Return vector for output alias (#4540)
pfultz2 Feb 5, 2026
f5b2e79
Filter zero arg operators during ONNX Parsing (#4567)
TedThemistokleous Apr 17, 2026
df5ad7a
Update license file for outstanding files
TedThemistokleous Apr 17, 2026
08a25c5
[AIMIGRAPHX-885] Add external stream support to context
TedThemistokleous Apr 9, 2026
b0070ad
Update rocMLIR to fix bert_squad and bert_tf regression
ahsan-ca Apr 10, 2026
180953a
Update license
ahsan-ca Apr 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#####################################################################################
# The MIT License (MIT)
#
# Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
# Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -77,6 +77,7 @@ add_library(migraphx
fp8_ocp_to_fnuz.cpp
fuse_attention.cpp
fuse_concat.cpp
fuse_horizontal.cpp
fuse_pointwise.cpp
fuse_pointwise_reduce.cpp
fuse_reduce.cpp
Expand Down
7 changes: 5 additions & 2 deletions src/adjust_allocation.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -43,7 +43,10 @@ void adjust_allocation::apply(module& m) const
if(ins->get_operator().is_context_free())
continue;

auto alias_ins = instruction::get_output_alias(ins, true);
auto aliases = instruction::get_output_alias(ins, true);
if(aliases.size() != 1)
continue;
auto alias_ins = aliases.front();
if(alias_ins->name() != model.name() and alias_ins->name() != "@param")
continue;
// shape allocated is different from actual shape
Expand Down
16 changes: 3 additions & 13 deletions src/api/api.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -377,19 +377,9 @@ struct custom_operation
return op.compute(std::move(ctx), std::move(output_shape), std::move(inputs));
}

std::ptrdiff_t output_alias(std::vector<shape> inputs) const
std::vector<std::size_t> output_alias(std::vector<shape> inputs) const
{
auto alias_vec = op.output_alias(std::move(inputs));
// TODO: For now, only support one output alias
if(alias_vec.empty())
{
return -1;
}
if(alias_vec.size() > 1)
{
MIGRAPHX_THROW("Currently, CustomOps in MIGraphX only supports one output_alias");
}
return alias_vec.front();
return op.output_alias(std::move(inputs));
}

bool runs_on_offload_target() const { return op.runs_on_offload_target(); }
Expand Down
75 changes: 66 additions & 9 deletions src/common_dims.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -97,25 +97,66 @@ static bool compute_common_dim(std::vector<std::size_t>& cd_dims,
assert(state1.get() < state2.get());
auto d2 = state2.get();
auto dims = state1.dims_for(d2);
auto n = elements(dims);
auto naxes = distance(dims);

if(naxes == 0)
return false;

// Check if state1 has a remainder from previous split
bool has_remainder = (state1.rem != 1);

// Compute the product of dimensions, adjusting for remainder if needed
auto n = elements(dims);
if(has_remainder and naxes > 0)
{
n = n / *dims.begin() * (*dims.begin() / state1.rem);
}

// If not divisible then we can't compute a common dim
if((d2 % n) != 0)
return false;

auto rem = d2 / n;
state1.add_multi_axes(naxes, cd_dims.size());
state2.add_axes(rem == 1 ? naxes : naxes + 1, cd_dims.size());
auto start_pos = cd_dims.size();

// Add axes mappings
if(has_remainder)
{
// state1: dimension was split, keep axes together
state1.add_axes(naxes, start_pos);
// state2: axes should include the previous remainder dimension
state2.add_axes(rem == 1 ? naxes : naxes + 1, start_pos - 1);
}
else
{
// state1: separate axes for each dimension
state1.add_multi_axes(naxes, start_pos);
// state2: normal axes mapping
state2.add_axes(rem == 1 ? naxes : naxes + 1, start_pos);
}

// Add dimensions to cd_dims
if(has_remainder and naxes > 0)
{
// Adjust the first dimension by dividing by the remainder
cd_dims.push_back(*dims.begin() / state1.rem);
cd_dims.insert(cd_dims.end(), std::next(dims.begin()), dims.end());
}
else
{
cd_dims.insert(cd_dims.end(), dims.begin(), dims.end());
}

// Add remainder dimension if needed
if(rem != 1)
cd_dims.push_back(rem);

// Update states
state1.rem = rem;
state2.rem = 1;

cd_dims.insert(cd_dims.end(), dims.begin(), dims.end());
if(state1.rem != 1)
cd_dims.push_back(state1.rem);
state1.next(distance(dims));
state1.next(naxes);
state2.next();

return true;
}

Expand Down Expand Up @@ -152,6 +193,22 @@ common_dims common_dims::compute(const std::vector<std::size_t>& dims1,
return {};
}
}

// Handle case where one state has a remainder that equals the next dimension
// In this case, the dimension was already added as a remainder, we just need the axes mapping
auto handle_remaining_dimension = [&cd](common_dim_state& state) {
if(not state.is_end() and state.rem != 1 and state.get() == 1)
{
// The remainder already added to cd_dims matches this dimension
// Add a single axes mapping
state.axes_map->push_back({cd.dims.size() - 1});
state.next();
}
};

handle_remaining_dimension(state1);
handle_remaining_dimension(state2);

assert(elements(dims1) == elements(cd.dims));
return cd;
}
Expand Down
12 changes: 5 additions & 7 deletions src/dead_code_elimination.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -48,13 +48,11 @@ void dead_code_elimination::apply(module& m) const
// Skip the last instruction
if(i == last)
break;
// Skip instruction with empty shape as output unless its [dynamic, builtin, undefined,
// Skip instruction with empty shape as output unless its [builtin, undefined,
// identity, allocate, or tuple_type]
if((not i->get_shape().dynamic() and
(i->get_shape().elements() == 0 and
i->get_shape().type() != migraphx::shape::tuple_type)) and
not(i->name().front() == '@') and not contains({"identity", "allocate"}, i->name()) and
not i->is_undefined())
if(i->get_shape().ndim() == 0 and not i->is_undefined() and
i->get_shape().type() != migraphx::shape::tuple_type and i->name().front() != '@' and
not contains({"identity", "allocate"}, i->name()))
continue;
assert(std::distance(m.begin(), i) <= std::distance(m.begin(), last));
std::unordered_set<instruction_ref> visited;
Expand Down
22 changes: 13 additions & 9 deletions src/driver/perf.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -118,19 +118,23 @@ bool is_offload_copy_set(const program& p)
{
if(i.name() == "hip::copy_to_gpu")
{
auto copy_arg = instruction::get_output_alias(i.inputs().front(), true);
param_ins.erase(copy_arg);
auto copy_args = instruction::get_output_alias(i.inputs().front(), true);
for(auto copy_arg : copy_args)
param_ins.erase(copy_arg);
}
else if(i.name() == "@return")
{
auto return_args = i.inputs();
for(const auto& j : return_args)
{
auto alias_ins = instruction::get_output_alias(j, true);
if((alias_ins->name() == "@param" and param_ins.erase(alias_ins) == 0) or
(alias_ins->name() != "hip::copy_from_gpu"))
return std::all_of(return_args.begin(), return_args.end(), [&](const auto& j) {
auto aliases = instruction::get_output_alias(j, true);
return std::all_of(aliases.begin(), aliases.end(), [&](instruction_ref alias_ins) {
if(alias_ins->name() == "hip::copy_from_gpu")
return true;
if(alias_ins->name() == "@param")
return not contains(param_ins, alias_ins);
return false;
}
});
});
}
}
return param_ins.empty();
Expand Down
8 changes: 4 additions & 4 deletions src/driver/trim.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -37,11 +37,11 @@ inline namespace MIGRAPHX_INLINE_NS {

static instruction_ref capture_arg(std::unordered_set<instruction_ref>& s, instruction_ref ins)
{
auto alias = instruction::get_output_alias(ins, true);
if(alias != ins)
auto aliases = instruction::get_output_alias(ins, true);
if(aliases.size() == 1 and aliases.front() != ins)
{
s.insert(ins);
return capture_arg(s, alias);
return capture_arg(s, aliases.front());
}
if(contains({"reshape", "contiguous"}, ins->name()))
{
Expand Down
15 changes: 9 additions & 6 deletions src/eliminate_concat.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -70,11 +70,14 @@ void eliminate_concat::apply(module& m) const
// Where are the allocations for the tensors to be concatenated?
std::vector<instruction_ref> allocations;

std::transform(
ins->inputs().begin(),
std::prev(ins->inputs().end()),
std::back_inserter(allocations),
[&](instruction_ref x) { return instruction::get_output_alias(x, true); });
std::transform(ins->inputs().begin(),
std::prev(ins->inputs().end()),
std::back_inserter(allocations),
[&](instruction_ref x) {
auto aliases = instruction::get_output_alias(x, true);
// cppcheck-suppress returnDanglingLifetime
return aliases.front();
});

if(std::any_of(allocations.begin(), allocations.end(), [&](auto x) {
return x->name() != concat_opt.allocate();
Expand Down
Loading
Loading