From 703e288c1aab309307d55739caf619eda8da6b3d Mon Sep 17 00:00:00 2001 From: Redslayer112 Date: Tue, 14 Apr 2026 12:16:16 +0530 Subject: [PATCH] Emit cold inline asm marker for cold_path intrinsic in LLVM backend The cold_path intrinsic was a no-op in the SSA layer, which meant LLVM never saw any cold hint in blocks reached through generic FnOnce::call_once boundaries. The !prof metadata emitted by find_cold_blocks was dropped by LLVM's InlinerPass when folding redundant PHI+branch patterns. This patch removes the SSA-level no-op for cold_path and adds an LLVM-specific handler that emits "asm sideeffect" with a "cold" call-site attribute. The sideeffect flag prevents any LLVM pass from removing the marker, while the cold attribute lets BranchProbabilityInfo mark the containing block as unlikely. The empty asm string produces zero machine instructions. Fixes #155263 --- compiler/rustc_codegen_llvm/src/intrinsic.rs | 32 ++++++++++++++++ .../rustc_codegen_ssa/src/mir/intrinsic.rs | 5 --- .../hint/cold_path_propagation.rs | 38 +++++++++++++++++++ 3 files changed, 70 insertions(+), 5 deletions(-) create mode 100644 tests/codegen-llvm/hint/cold_path_propagation.rs diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index 0d3d682ece21f..e970d1f0774a7 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -779,6 +779,38 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { } } + sym::cold_path => { + // Emit an empty inline asm with a `cold` call-site attribute. + // This causes LLVM's BranchProbabilityInfo to mark this block + // as unlikely. Unlike `llvm.expect`-based `!prof` metadata + // (which can be lost when the InlinerPass folds branches), + // this marker survives the full optimization pipeline. + let void_ty = self.cx.type_void(); + let fty = self.cx.type_func(&[], void_ty); + let asm_val = unsafe { + llvm::LLVMGetInlineAsm( + fty, + c"".as_ptr().cast(), + 0, + c"".as_ptr().cast(), + 0, + llvm::TRUE, // HasSideEffects + llvm::FALSE, // IsAlignStack + llvm::AsmDialect::Att, + llvm::FALSE, // CanThrow + ) + }; + let call = self.call(fty, None, None, asm_val, &[], None, None); + let cold_attr = + llvm::AttributeKind::Cold.create_attr(self.llcx); + crate::attributes::apply_to_callsite( + call, + llvm::AttributePlace::Function, + &[cold_attr], + ); + return Ok(()); + } + _ => { debug!("unknown intrinsic '{}' -- falling back to default body", name); // Call the fallback body instead of generating the intrinsic code diff --git a/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs b/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs index fd0c7c656ac21..a46ba042e10a0 100644 --- a/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs +++ b/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs @@ -579,11 +579,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { } } - sym::cold_path => { - // This is a no-op. The intrinsic is just a hint to the optimizer. - return Ok(()); - } - _ => { // Need to use backend-specific things in the implementation. return bx.codegen_intrinsic_call(instance, args, result, span); diff --git a/tests/codegen-llvm/hint/cold_path_propagation.rs b/tests/codegen-llvm/hint/cold_path_propagation.rs new file mode 100644 index 0000000000000..8ad677e1800af --- /dev/null +++ b/tests/codegen-llvm/hint/cold_path_propagation.rs @@ -0,0 +1,38 @@ +//@ compile-flags: -Copt-level=3 +// Regression test for #155263: cold_path must propagate through +// FnOnce::call_once boundaries after LLVM inlining. +#![crate_type = "lib"] + +use std::hint::cold_path; + +fn dispatch Option>(x: &mut u64, f: F) -> Option { + if *x == 0 { + cold_path(); + return None; + } + *x -= 1; + + let result = f(x); + if result.is_none() { + cold_path(); + return None; + } + result +} + +fn dec(x: &mut u64) -> Option { + if *x == 0 { + None + } else { + *x -= 1; + Some(1) + } +} + +// CHECK-LABEL: @test_cold_path_through_fnonce( +// CHECK: asm sideeffect +// CHECK: asm sideeffect +#[no_mangle] +pub fn test_cold_path_through_fnonce(x: &mut u64, y: &mut u64) -> Option { + dispatch(x, |x| dec(y)) +}