diff --git a/src/CodeGen_LLVM.cpp b/src/CodeGen_LLVM.cpp
index 20d0ad5f1ffe..4bcd2df0f389 100644
--- a/src/CodeGen_LLVM.cpp
+++ b/src/CodeGen_LLVM.cpp
@@ -4937,6 +4937,27 @@ Value *CodeGen_LLVM::slice_vector(Value *vec, int start, int size) {
     }
 }
 
+Value *CodeGen_LLVM::optimization_fence(Value *v) {
+    llvm::Type *t = v->getType();
+    internal_assert(!t->isScalableTy())
+        << "optimization_fence does not support scalable vectors yet";
+    const int bits = t->getPrimitiveSizeInBits();
+    if (bits % 32) {
+        const int lanes = get_vector_num_elements(t);
+        const int element_bits = t->getScalarSizeInBits();
+        const int lanes_per_32_bits = 32 / element_bits;
+        const int padded_lanes = align_up(lanes, lanes_per_32_bits);
+        v = slice_vector(v, 0, padded_lanes);
+        v = optimization_fence(v);
+        v = slice_vector(v, 0, lanes);
+        return v;
+    }
+    llvm::Type *float_type = llvm_type_of(Float(32, bits / 32));
+    v = builder->CreateBitCast(v, float_type);
+    v = builder->CreateArithmeticFence(v, float_type);
+    return builder->CreateBitCast(v, t);
+}
+
 Value *CodeGen_LLVM::concat_vectors(const vector<Value *> &v) {
     if (v.size() == 1) {
         return v[0];
diff --git a/src/CodeGen_LLVM.h b/src/CodeGen_LLVM.h
index 183463d5fdb6..576bc50d169b 100644
--- a/src/CodeGen_LLVM.h
+++ b/src/CodeGen_LLVM.h
@@ -509,6 +509,11 @@ class CodeGen_LLVM : public IRVisitor {
      * if you ask for more lanes than the vector has. */
     virtual llvm::Value *slice_vector(llvm::Value *vec, int start, int extent);
 
+    /** Use an arithmetic fence to prevent LLVM from fusing operations
+     * across this barrier. Works by bitcasting to float, applying
+     * llvm.arithmetic.fence, and bitcasting back. */
+    virtual llvm::Value *optimization_fence(llvm::Value *);
+
     /** Concatenate a bunch of llvm vectors. Must be of the same type. */
     virtual llvm::Value *concat_vectors(const std::vector<llvm::Value *> &);
 
diff --git a/src/CodeGen_WebAssembly.cpp b/src/CodeGen_WebAssembly.cpp
index f173a88e1211..70c77160f2f6 100644
--- a/src/CodeGen_WebAssembly.cpp
+++ b/src/CodeGen_WebAssembly.cpp
@@ -177,6 +177,33 @@ void CodeGen_WebAssembly::visit(const Cast *op) {
             codegen(equiv);
             return;
         }
+
+        // Work around an LLVM bug where
+        // WebAssemblyTargetLowering::isVectorLoadExtDesirable assumes the
+        // operand of a vector extend is always a load, but LLVM's optimizer may
+        // insert a freeze node between the load and the extend, causing a
+        // cast<LoadSDNode> assertion failure. Use an optimization fence to
+        // prevent the DAG combiner from seeing through to the load. See
+        // https://github.com/halide/Halide/issues/8928 and
+        // https://github.com/llvm/llvm-project/issues/184676
+        if (op->type.is_int_or_uint() &&
+            op->value.type().is_int_or_uint() &&
+            op->type.bits() > op->value.type().bits()) {
+            // Check if the value is a Load. Loads are sometimes hiding behind
+            // let bindings.
+            bool is_load = op->value.as<Load>();
+            if (const Variable *var = op->value.as<Variable>()) {
+                llvm::Value *v = sym_get(var->name, false);
+                is_load = v && llvm::isa<llvm::LoadInst>(v);
+            }
+            if (is_load) {
+                llvm::Value *v = codegen(op->value);
+                v = optimization_fence(v);
+                value = builder->CreateIntCast(v, llvm_type_of(op->type),
+                                               op->value.type().is_int());
+                return;
+            }
+        }
     }
 
     CodeGen_Posix::visit(op);