Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions src/CodeGen_LLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4937,6 +4937,27 @@ Value *CodeGen_LLVM::slice_vector(Value *vec, int start, int size) {
}
}

Value *CodeGen_LLVM::optimization_fence(Value *v) {
llvm::Type *t = v->getType();
internal_assert(!t->isScalableTy())
<< "optimization_fence does not support scalable vectors yet";
const int bits = t->getPrimitiveSizeInBits();
if (bits % 32) {
const int lanes = get_vector_num_elements(t);
const int element_bits = t->getScalarSizeInBits();
const int lanes_per_32_bits = 32 / element_bits;
const int padded_lanes = align_up(lanes, lanes_per_32_bits);
v = slice_vector(v, 0, padded_lanes);
v = optimization_fence(v);
v = slice_vector(v, 0, lanes);
return v;
}
llvm::Type *float_type = llvm_type_of(Float(32, bits / 32));
v = builder->CreateBitCast(v, float_type);
v = builder->CreateArithmeticFence(v, float_type);
return builder->CreateBitCast(v, t);
}

Value *CodeGen_LLVM::concat_vectors(const vector<Value *> &v) {
if (v.size() == 1) {
return v[0];
Expand Down
5 changes: 5 additions & 0 deletions src/CodeGen_LLVM.h
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,11 @@ class CodeGen_LLVM : public IRVisitor {
* if you ask for more lanes than the vector has. */
virtual llvm::Value *slice_vector(llvm::Value *vec, int start, int extent);

/** Use an arithmetic fence to prevent LLVM from fusing operations
* across this barrier. Works by bitcasting to float, applying
* llvm.arithmetic.fence, and bitcasting back. */
virtual llvm::Value *optimization_fence(llvm::Value *);

/** Concatenate a bunch of llvm vectors. Must be of the same type. */
virtual llvm::Value *concat_vectors(const std::vector<llvm::Value *> &);

Expand Down
26 changes: 26 additions & 0 deletions src/CodeGen_WebAssembly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,32 @@ void CodeGen_WebAssembly::visit(const Cast *op) {
codegen(equiv);
return;
}

// Work around an LLVM bug where
// WebAssemblyTargetLowering::isVectorLoadExtDesirable assumes the
// operand of a vector extend is always a load, but LLVM's optimizer may
// insert a freeze node between the load and the extend, causing a
// cast<LoadSDNode> assertion failure. Use an optimization fence to
// prevent the DAG combiner from seeing through to the load. See
// https://github.com/halide/Halide/issues/8928
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also put a reference to the LLVM issue here?

if (op->type.is_int_or_uint() &&
op->value.type().is_int_or_uint() &&
op->type.bits() > op->value.type().bits()) {
// Check if the value is a Load. Loads are sometimes hiding behind
// let bindings.
bool is_load = op->value.as<Load>();
if (const Variable *var = op->value.as<Variable>()) {
llvm::Value *v = sym_get(var->name, false);
is_load = v && llvm::isa<llvm::LoadInst>(v);
}
if (is_load) {
llvm::Value *v = codegen(op->value);
v = optimization_fence(v);
value = builder->CreateIntCast(v, llvm_type_of(op->type),
op->value.type().is_int());
return;
}
}
}

CodeGen_Posix::visit(op);
Expand Down
Loading