-
Notifications
You must be signed in to change notification settings - Fork 128
Improve yolov10n Performance #4785
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
1230be4
b8052a7
7c7af24
e4aadb8
cf1b560
c2d4a90
b25576d
03c7850
2cf6bd3
37a6fce
12e991e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -590,6 +590,15 @@ std::vector<argument> program::eval(const parameter_map& params, | |
| { | ||
| auto& contexts = this->impl->contexts; | ||
|
|
||
| if(contexts.size() == 1 and enabled(MIGRAPHX_ENABLE_HIP_GRAPH{})) | ||
| { | ||
| auto& ctx = contexts.front(); | ||
| auto run = ctx.get_capture(); | ||
| if(run != nullptr) | ||
| return run(); | ||
| ctx.start_capture(); | ||
| } | ||
|
Comment on lines
+593
to
+600
|
||
|
|
||
| auto trace_level = value_of(MIGRAPHX_TRACE_EVAL{}); | ||
| std::vector<argument> ret; | ||
|
|
||
|
|
@@ -664,6 +673,12 @@ std::vector<argument> program::eval(const parameter_map& params, | |
| contexts.front().finish_on(exec_env.queue); | ||
| } | ||
|
|
||
| if(contexts.size() == 1 and enabled(MIGRAPHX_ENABLE_HIP_GRAPH{})) | ||
| { | ||
| auto& ctx = contexts.front(); | ||
| ctx.end_capture(ret); | ||
| } | ||
|
Comment on lines
+676
to
+680
|
||
|
|
||
| return ret; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -106,6 +106,23 @@ static bool concat_const_foldable(Iterator start, Iterator last, std::size_t iax | |
| }); | ||
| } | ||
|
|
||
| MIGRAPHX_PRED_MATCHER(conv_1x1, instruction_ref ins) | ||
| { | ||
| if(ins->name() != "convolution") | ||
| return false; | ||
| auto v = ins->get_operator().to_value(); | ||
| if(v.at("group").to<int>() != 1) | ||
| return false; | ||
| if(not all_of(v.at("stride"), [](const value& x) { return x.to<std::size_t>() == 1; })) | ||
| return false; | ||
| if(not all_of(v.at("padding"), [](const value& x) { return x.to<std::size_t>() == 0; })) | ||
| return false; | ||
| if(not all_of(v.at("dilation"), [](const value& x) { return x.to<std::size_t>() == 1; })) | ||
| return false; | ||
| auto w = ins->inputs().at(1)->get_shape(); | ||
| return std::all_of(w.lens().begin() + 2, w.lens().end(), [](std::size_t i) { return i == 1; }); | ||
| } | ||
|
|
||
| // conv(x, w) * a => conv(x, a * w) | ||
| struct find_mul_conv | ||
| { | ||
|
|
@@ -1093,6 +1110,61 @@ struct find_concat_conv | |
| } | ||
| }; | ||
|
|
||
| // (x * w1) * w2 => x * (w1 * w2) | ||
| struct find_conv_conv_1x1 | ||
| { | ||
| auto matcher() const | ||
| { | ||
| return conv_1x1( | ||
| match::arg(0)(match::used_once(), match::name("convolution").bind("input"))); | ||
| } | ||
|
|
||
| void apply(module& m, const match::matcher_result& r) const | ||
| { | ||
| auto ins = r.result; | ||
| auto input = r.instructions["input"]; | ||
| auto x_ins = input->inputs().front(); | ||
| auto wnxn = input->inputs()[1]; | ||
| auto w1x1 = ins->inputs()[1]; | ||
|
|
||
| auto out_channels = w1x1->get_shape().lens()[0]; | ||
| auto mid_channels = w1x1->get_shape().lens()[1]; | ||
| auto in_channels_per_group = wnxn->get_shape().lens()[1]; | ||
| auto groups = x_ins->get_shape().lens()[1] / in_channels_per_group; | ||
| auto w_size = std::accumulate(wnxn->get_shape().lens().begin() + 2, | ||
| wnxn->get_shape().lens().end(), | ||
| std::size_t{1}, | ||
| std::multiplies<>{}); | ||
|
|
||
| auto mw_dims = wnxn->get_shape().lens(); | ||
| mw_dims[1] *= groups; | ||
|
|
||
| auto w1x1_reshaped = m.insert_instruction( | ||
| ins, | ||
| make_op("reshape", {{"dims", {out_channels, groups, mid_channels / groups}}}), | ||
| w1x1); | ||
| auto w1x1_grouped = m.insert_instruction( | ||
| ins, make_op("transpose", {{"permutation", {1, 0, 2}}}), w1x1_reshaped); | ||
|
|
||
| auto wnxn_reshaped = m.insert_instruction( | ||
| ins, | ||
| make_op("reshape", | ||
| {{"dims", {groups, mid_channels / groups, in_channels_per_group * w_size}}}), | ||
| wnxn); | ||
|
|
||
| auto mw = m.insert_instruction(ins, make_op("dot"), w1x1_grouped, wnxn_reshaped); | ||
| auto mw_transposed = | ||
| m.insert_instruction(ins, make_op("transpose", {{"permutation", {1, 0, 2}}}), mw); | ||
| auto mw_reshaped = | ||
| m.insert_instruction(ins, make_op("reshape", {{"dims", mw_dims}}), mw_transposed); | ||
|
|
||
| auto op = input->get_operator(); | ||
| op.from_value({{"group", 1}}); | ||
| auto conv = m.insert_instruction(ins, op, x_ins, mw_reshaped); | ||
|
Comment on lines
+1139
to
+1163
|
||
| m.replace_instruction(ins, conv); | ||
| } | ||
|
Comment on lines
+1113
to
+1165
|
||
| }; | ||
|
|
||
| static void | ||
| move_instructions_back(module& m, instruction_ref pos, std::vector<instruction_ref> inss) | ||
| { | ||
|
|
@@ -2395,6 +2467,7 @@ void simplify_algebra::apply(module& m) const | |
| find_zero_ops{}, | ||
| find_dot_add{}, | ||
| find_conv_add{}, | ||
| find_conv_conv_1x1{}, | ||
| find_div_const{}, | ||
| find_sub_const{}, | ||
| find_rsqrt{}, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -372,14 +372,15 @@ auto is_mlir_conv(mlir_mode mode) | |
| if(mode == mlir_mode::all) | ||
| return true; | ||
| // No winograd for group convolution | ||
| if(group > 1) | ||
| return true; | ||
| auto w = ins->inputs().at(1)->get_shape(); | ||
| if(w.lens().size() != 4) | ||
| return true; | ||
| if(w.lens()[2] != w.lens()[3]) | ||
| return true; | ||
| return (w.lens()[3] % 3) != 0; | ||
| return group == 1; | ||
| // if(group > 1) | ||
| // return true; | ||
| // auto w = ins->inputs().at(1)->get_shape(); | ||
| // if(w.lens().size() != 4) | ||
| // return true; | ||
| // if(w.lens()[2] != w.lens()[3]) | ||
| // return true; | ||
| // return (w.lens()[3] % 3) != 0; | ||
|
Comment on lines
374
to
+383
|
||
| }); | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This warm-up
p.eval(m)runs unconditionally and will execute the model twice even when hip-graph capture is disabled, which significantly slows verification and can skew perf numbers. Gate this extra run behindenabled(MIGRAPHX_ENABLE_HIP_GRAPH{})(and/or only when the compiled target is GPU).