diff --git a/src/CodeGen_LLVM.cpp b/src/CodeGen_LLVM.cpp index 20d0ad5f1ffe..4bcd2df0f389 100644 --- a/src/CodeGen_LLVM.cpp +++ b/src/CodeGen_LLVM.cpp @@ -4937,6 +4937,27 @@ Value *CodeGen_LLVM::slice_vector(Value *vec, int start, int size) { } } +Value *CodeGen_LLVM::optimization_fence(Value *v) { + llvm::Type *t = v->getType(); + internal_assert(!t->isScalableTy()) + << "optimization_fence does not support scalable vectors yet"; + const int bits = t->getPrimitiveSizeInBits(); + if (bits % 32) { + const int lanes = get_vector_num_elements(t); + const int element_bits = t->getScalarSizeInBits(); + const int lanes_per_32_bits = 32 / element_bits; + const int padded_lanes = align_up(lanes, lanes_per_32_bits); + v = slice_vector(v, 0, padded_lanes); + v = optimization_fence(v); + v = slice_vector(v, 0, lanes); + return v; + } + llvm::Type *float_type = llvm_type_of(Float(32, bits / 32)); + v = builder->CreateBitCast(v, float_type); + v = builder->CreateArithmeticFence(v, float_type); + return builder->CreateBitCast(v, t); +} + Value *CodeGen_LLVM::concat_vectors(const vector &v) { if (v.size() == 1) { return v[0]; diff --git a/src/CodeGen_LLVM.h b/src/CodeGen_LLVM.h index 183463d5fdb6..576bc50d169b 100644 --- a/src/CodeGen_LLVM.h +++ b/src/CodeGen_LLVM.h @@ -509,6 +509,11 @@ class CodeGen_LLVM : public IRVisitor { * if you ask for more lanes than the vector has. */ virtual llvm::Value *slice_vector(llvm::Value *vec, int start, int extent); + /** Use an arithmetic fence to prevent LLVM from fusing operations + * across this barrier. Works by bitcasting to float, applying + * llvm.arithmetic.fence, and bitcasting back. */ + virtual llvm::Value *optimization_fence(llvm::Value *); + /** Concatenate a bunch of llvm vectors. Must be of the same type. */ virtual llvm::Value *concat_vectors(const std::vector &); diff --git a/src/CodeGen_WebAssembly.cpp b/src/CodeGen_WebAssembly.cpp index f173a88e1211..70c77160f2f6 100644 --- a/src/CodeGen_WebAssembly.cpp +++ b/src/CodeGen_WebAssembly.cpp @@ -177,6 +177,33 @@ void CodeGen_WebAssembly::visit(const Cast *op) { codegen(equiv); return; } + + // Work around an LLVM bug where + // WebAssemblyTargetLowering::isVectorLoadExtDesirable assumes the + // operand of a vector extend is always a load, but LLVM's optimizer may + // insert a freeze node between the load and the extend, causing a + // cast assertion failure. Use an optimization fence to + // prevent the DAG combiner from seeing through to the load. See + // https://github.com/halide/Halide/issues/8928 and + // https://github.com/llvm/llvm-project/issues/184676 + if (op->type.is_int_or_uint() && + op->value.type().is_int_or_uint() && + op->type.bits() > op->value.type().bits()) { + // Check if the value is a Load. Loads are sometimes hiding behind + // let bindings. + bool is_load = op->value.as(); + if (const Variable *var = op->value.as()) { + llvm::Value *v = sym_get(var->name, false); + is_load = v && llvm::isa(v); + } + if (is_load) { + llvm::Value *v = codegen(op->value); + v = optimization_fence(v); + value = builder->CreateIntCast(v, llvm_type_of(op->type), + op->value.type().is_int()); + return; + } + } } CodeGen_Posix::visit(op);