diff --git a/CHANGELOG.md b/CHANGELOG.md
index 529361b68..40ca1055c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -62,6 +62,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
  - `VLXE` and `VSXE` need to wait that the SlideAddrGenA opreq is free before being issued by the lane sequencer to the operand requester stage
  - Do not trap instructions with no operands in the main sequencer
  - Commit a reduction only after a grant from the VRF
+ - Decouple `cmdBuffer` and `dataBuffer` depth parameters in the operand queues
 
 ### Added
 
@@ -104,6 +105,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
  - Add support for vector mask population count and find first set bit instructions: `vcpop.m`, `vfirst.m`
  - Add Spyglass linting script
  - Add parametrized support for Fixed-Point math
+ - Add support for Barber's Pole VRF Layout
 
 ### Changed
 
@@ -134,6 +136,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
  - Adapt `fdotproduct` to `dotproduct` structure
  - Pre-calculate next-cycle `aligned_start_address` in `addrgen` for timing reasons
  - Add `is_reduct` signal to the operand queues, to gate the neutral value filling
+ - Handle WAW and WAR `vload` hazards in the `VLDU` without stalling the main sequencer
+ - Reductions are no more treated as widening instructions for what concerns WAW hazards in the operand requesters
+ - `slide1x` instructions are now not stalled in the main sequencer, but the hazard is handled downstream
 
 ## 2.2.0 - 2021-11-02
 
diff --git a/hardware/include/ara_pkg.sv b/hardware/include/ara_pkg.sv
index 593967a7e..6c45d77ff 100644
--- a/hardware/include/ara_pkg.sv
+++ b/hardware/include/ara_pkg.sv
@@ -86,6 +86,7 @@ package ara_pkg;
   localparam int unsigned ValuInsnQueueDepth = 4;
   localparam int unsigned VlduInsnQueueDepth = 4;
   localparam int unsigned VstuInsnQueueDepth = 4;
+  localparam int unsigned VaddrgenInsnQueueDepth = 4;
   localparam int unsigned SlduInsnQueueDepth = 2;
   localparam int unsigned NoneInsnQueueDepth = 1;
   // Ara supports MaskuInsnQueueDepth = 1 only.
@@ -299,6 +300,8 @@ package ara_pkg;
     logic wide_fp_imm;
     // Resizing of FP conversions
     resize_e cvt_resize;
+    // Widening and vslide1x instructions have different hazard stall policies
+    logic special_hazard;
 
     // Vector machine metadata
     vlen_t vl;
@@ -396,6 +399,8 @@ package ara_pkg;
     logic wide_fp_imm;
     // Resizing of FP conversions
     resize_e cvt_resize;
+    // Widening and vslide1x instructions have different hazard stall policies
+    logic special_hazard;
 
     // Vector machine metadata
     vlen_t vl;
@@ -877,11 +882,6 @@ package ara_pkg;
   // Each lane has eight VRF banks
   localparam int unsigned NrVRFBanksPerLane = 8;
 
-  // Find the starting address of a vector register vid
-  function automatic logic [63:0] vaddr(logic [4:0] vid, int NrLanes);
-    vaddr = vid * (VLENB / NrLanes / 8);
-  endfunction: vaddr
-
   // Differenciate between SLDU and ADDRGEN operands from opqueue
   typedef enum logic {
     ALU_SLDU     = 1'b0,
@@ -898,6 +898,7 @@ package ara_pkg;
     logic scale_vl; // Rescale vl taking into account the new and old EEW
 
     resize_e cvt_resize;    // Resizing of FP conversions
+    logic special_hazard; // Widening and vslide1x instructions have different hazard stall policies
 
     logic is_reduct; // Is this a reduction?
 
diff --git a/hardware/include/ara_vaddr.svh b/hardware/include/ara_vaddr.svh
new file mode 100644
index 000000000..3cd9f9ce8
--- /dev/null
+++ b/hardware/include/ara_vaddr.svh
@@ -0,0 +1,80 @@
+// Copyright 2021 ETH Zurich and University of Bologna.
+// Solderpad Hardware License, Version 0.51, see LICENSE for details.
+// SPDX-License-Identifier: SHL-0.51
+//
+// Author: Matteo Perotti <mperotti@iis.ee.ethz.ch>
+// Description:
+// Ara's functions to calculate VRF addresses. Not in the package
+// since the functions depend on `NrLanes`
+
+// All the functions to support a Barber-Pole VRF layout
+
+// Find the starting VRF address of a vector register vid
+function automatic vaddr_t vaddr(logic [4:0] vid, int NrLanes);
+  // This is not an adder, it's only wires.
+  // (this holds if VLENB / NrLanes >= NrVRFBanksPerLane^2)
+  vaddr = vid * (VLENB / NrLanes / NrVRFBanksPerLane) + vid[VaddrBankWidth-1:0];
+endfunction: vaddr
+
+// Return the physical address of the next element of a certain vector
+function automatic vaddr_t next_vaddr(vaddr_t vaddr, logic [4:0] vid);
+  // vaddr msbs -> byte index in a bank
+  logic [VaddrIdxWidth-1:VaddrBankWidth] index, old_index;
+  // vaddr lsbs -> bank index
+  logic [VaddrBankWidth-1:0] bank;
+
+  index = vaddr[VaddrIdxWidth-1:VaddrBankWidth];
+  bank  = vaddr[VaddrBankWidth-1:0];
+
+  old_index = index;
+
+  // Increment bank counter
+  bank += 1;
+  if (bank == vid[VaddrBankWidth-1:0])
+    // Wrap around
+    index += 1;
+
+  // If we change vreg, the start element position is +1 (LMUL > 1)
+  // This is important for B layout consistency among different LMUL
+  // or when inactive element policy is "undistrubed"
+  if (index[VaddrVregWidth] != old_index[VaddrVregWidth])
+    bank += 1;
+
+  return {index, bank};
+endfunction
+
+// Initialize with an offset (necessary with vslideup)
+function automatic vaddr_t vaddr_offset(vaddr_t vaddr, vaddr_t off, logic [4:0] vid);
+  // vaddr msbs -> byte index in a bank
+  logic [VaddrIdxWidth-1:VaddrBankWidth] index, old_index;
+  // vaddr lsbs -> bank index
+  logic [VaddrBankWidth-1:0] bank, old_bank;
+
+  index = vaddr[VaddrIdxWidth-1:VaddrBankWidth];
+  bank  = vaddr[VaddrBankWidth-1:0];
+
+  old_index = index;
+  old_bank  = bank;
+
+  // Increment bank counter
+  index += off[VaddrIdxWidth-1:VaddrBankWidth];
+  bank  += off[VaddrBankWidth-1:0];
+  // Support vstart != 0: don't hypothesize that old_bank == vid[VaddrBankWidth-1:0]
+  // Wrap around if we meet vid[VaddrBankWidth-1:0] during the addition
+  if (old_bank > vid[VaddrBankWidth-1:0]) begin
+    if (bank >= vid[VaddrBankWidth-1:0] && bank < old_bank)
+      // Wrap around
+      index += 1;
+  end else if (old_bank < vid[VaddrBankWidth-1:0]) begin
+    if (bank >= vid[VaddrBankWidth-1:0] || bank < old_bank)
+      // Wrap around
+      index += 1;
+  end
+
+  // If we change vreg, the start element position is +1
+  // for every reg passed (LMUL > 1). The max reg id delta is 7
+  // with LMUL == 8.
+  bank += index[VaddrVregWidth +: 3] - old_index[VaddrVregWidth +: 3];
+
+  return {index, bank};
+endfunction
diff --git a/hardware/src/ara.sv b/hardware/src/ara.sv
index 7668fef06..4bc110a5d 100644
--- a/hardware/src/ara.sv
+++ b/hardware/src/ara.sv
@@ -137,6 +137,10 @@ module ara import ara_pkg::*; #(
   logic [NrVInsn-1:0][NrVInsn-1:0] global_hazard_table;
   // Ready for lane 0 (scalar operand fwd)
   logic pe_scalar_resp_ready;
+  // VLDU Hazard checking
+  vid_t                         vldu_commit_id;
+  logic                         vldu_commit_id_valid;
+  logic                         vldu_hazard;
 
   // Mask unit operands
   elen_t     [NrLanes-1:0][NrMaskFUnits+2-1:0] masku_operand;
@@ -178,7 +182,11 @@ module ara import ara_pkg::*; #(
     // Interface with the address generator
     .addrgen_ack_i         (addrgen_ack              ),
     .addrgen_error_i       (addrgen_error            ),
-    .addrgen_error_vl_i    (addrgen_error_vl         )
+    .addrgen_error_vl_i    (addrgen_error_vl         ),
+    // Interface with the VLDU for hazard handling
+    .vldu_commit_id_i      (vldu_commit_id           ),
+    .vldu_commit_id_valid_i(vldu_commit_id_valid     ),
+    .vldu_hazard_o         (vldu_hazard              )
   );
 
   // Scalar move support
@@ -234,9 +242,9 @@ module ara import ara_pkg::*; #(
 
   for (genvar lane = 0; lane < NrLanes; lane++) begin: gen_lanes
     lane #(
-      .NrLanes     (NrLanes     ),
-      .FPUSupport  (FPUSupport  ),
-      .FixPtSupport(FixPtSupport)
+      .NrLanes                         (NrLanes                             ),
+      .FPUSupport                      (FPUSupport                          ),
+      .FixPtSupport                    (FixPtSupport                        )
     ) i_lane (
       .clk_i                           (clk_i                               ),
       .rst_ni                          (rst_ni                              ),
@@ -344,6 +352,9 @@ module ara import ara_pkg::*; #(
     .addrgen_ack_o              (addrgen_ack                                           ),
     .addrgen_error_o            (addrgen_error                                         ),
     .addrgen_error_vl_o         (addrgen_error_vl                                      ),
+    .commit_id_o                (vldu_commit_id                                        ),
+    .commit_id_valid_o          (vldu_commit_id_valid                                  ),
+    .hazard_i                   (vldu_hazard                                           ),
     // Interface with the Mask unit
     .mask_i                     (mask                                                  ),
     .mask_valid_i               (mask_valid                                            ),
@@ -377,8 +388,7 @@ module ara import ara_pkg::*; #(
   logic sldu_mask_ready;
 
   sldu #(
-    .NrLanes(NrLanes),
-    .vaddr_t(vaddr_t)
+    .NrLanes(NrLanes)
   ) i_sldu (
     .clk_i                   (clk_i                            ),
     .rst_ni                  (rst_ni                           ),
@@ -413,8 +423,7 @@ module ara import ara_pkg::*; #(
   /////////////////
 
   masku #(
-    .NrLanes(NrLanes),
-    .vaddr_t(vaddr_t)
+    .NrLanes(NrLanes)
   ) i_masku (
     .clk_i                   (clk_i                           ),
     .rst_ni                  (rst_ni                          ),
diff --git a/hardware/src/ara_dispatcher.sv b/hardware/src/ara_dispatcher.sv
index 6fe3783dc..22c1cd5b1 100644
--- a/hardware/src/ara_dispatcher.sv
+++ b/hardware/src/ara_dispatcher.sv
@@ -681,6 +681,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueReductionZExt;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b110001: begin
                     ara_req_d.op = ara_pkg::VWREDSUM;
@@ -690,6 +691,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueReductionZExt;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   default: illegal_insn = 1'b1;
                 endcase
@@ -1300,6 +1302,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         ara_req_d.conversion_vs2 = OpQueueConversionZExt8;
                         ara_req_d.eew_vs2        = eew_q[insn.varith_type.rs2];
                         ara_req_d.cvt_resize     = CVT_WIDE;
+                        ara_req_d.special_hazard = 1'b1;
 
                         // Invalid conversion
                         if (int'(vtype_q.vsew) < int'(EW64) ||
@@ -1310,6 +1313,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         ara_req_d.conversion_vs2 = OpQueueConversionSExt8;
                         ara_req_d.eew_vs2        = eew_q[insn.varith_type.rs2];
                         ara_req_d.cvt_resize     = CVT_WIDE;
+                        ara_req_d.special_hazard = 1'b1;
 
                         // Invalid conversion
                         if (int'(vtype_q.vsew) < int'(EW64) ||
@@ -1320,6 +1324,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         ara_req_d.conversion_vs2 = OpQueueConversionZExt4;
                         ara_req_d.eew_vs2        = prev_prev_ew(vtype_q.vsew);
                         ara_req_d.cvt_resize     = CVT_WIDE;
+                        ara_req_d.special_hazard = 1'b1;
 
                         // Invalid conversion
                         if (int'(vtype_q.vsew) < int'(EW32) ||
@@ -1329,6 +1334,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         ara_req_d.conversion_vs2 = OpQueueConversionSExt4;
                         ara_req_d.eew_vs2        = prev_prev_ew(vtype_q.vsew);
                         ara_req_d.cvt_resize     = CVT_WIDE;
+                        ara_req_d.special_hazard = 1'b1;
 
                         // Invalid conversion
                         if (int'(vtype_q.vsew) < int'(EW32) ||
@@ -1338,6 +1344,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                         ara_req_d.eew_vs2        = vtype_q.vsew.prev();
                         ara_req_d.cvt_resize     = CVT_WIDE;
+                        ara_req_d.special_hazard = 1'b1;
 
                         // Invalid conversion
                         if (int'(vtype_q.vsew) < int'(EW16) || int'(vtype_q.vlmul) inside {LMUL_1_8})
@@ -1347,6 +1354,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                         ara_req_d.eew_vs2        = vtype_q.vsew.prev();
                         ara_req_d.cvt_resize     = CVT_WIDE;
+                        ara_req_d.special_hazard = 1'b1;
 
                         // Invalid conversion
                         if (int'(vtype_q.vsew) < int'(EW16) || int'(vtype_q.vlmul) inside {LMUL_1_8})
@@ -1394,6 +1402,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b110001: begin // VWADD
                     ara_req_d.op             = ara_pkg::VADD;
@@ -1402,6 +1411,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b110010: begin // VWSUBU
                     ara_req_d.op             = ara_pkg::VSUB;
@@ -1410,6 +1420,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b110011: begin // VWSUB
                     ara_req_d.op             = ara_pkg::VSUB;
@@ -1418,6 +1429,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b110100: begin // VWADDU.W
                     ara_req_d.op             = ara_pkg::VADD;
@@ -1427,6 +1439,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.eew_vs2        = vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b110101: begin // VWADD.W
                     ara_req_d.op             = ara_pkg::VADD;
@@ -1436,6 +1449,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.eew_vs2        = vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b110110: begin // VWSUBU.W
                     ara_req_d.op             = ara_pkg::VSUB;
@@ -1445,6 +1459,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.eew_vs2        = vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b110111: begin // VWSUB.W
                     ara_req_d.op             = ara_pkg::VSUB;
@@ -1454,6 +1469,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.eew_vs2        = vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b111000: begin // VWMULU
                     ara_req_d.op             = ara_pkg::VMUL;
@@ -1462,6 +1478,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b111010: begin // VWMULSU
                     ara_req_d.op             = ara_pkg::VMUL;
@@ -1470,6 +1487,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b111011: begin // VWMUL
                     ara_req_d.op             = ara_pkg::VMUL;
@@ -1478,6 +1496,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b111100: begin // VWMACCU
                     ara_req_d.op             = ara_pkg::VMACC;
@@ -1508,6 +1527,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.eew_vd_op      = vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   default: illegal_insn = 1'b1;
                 endcase
@@ -1567,6 +1587,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.eew_vs2 = vtype_q.vsew;
                     // Request will need reshuffling
                     ara_req_d.scale_vl = 1'b1;
+                    // Special hazard handling for this instruction
+                    ara_req_d.special_hazard = 1'b1;
                     // If stride > vl, the vslideup has no effects
                     if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(vl_q)] ||
                       (vlen_t'(ara_req_d.stride) >= vl_q)) null_vslideup = 1'b1;
@@ -1577,6 +1599,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.eew_vs2 = vtype_q.vsew;
                     // Request will need reshuffling
                     ara_req_d.scale_vl = 1'b1;
+                    // Special hazard handling for this instruction
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b010000: begin // VRXUNARY0
                     // vmv.s.x
@@ -1625,6 +1649,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b110001: begin // VWADD
                     ara_req_d.op             = ara_pkg::VADD;
@@ -1633,6 +1658,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b110010: begin // VWSUBU
                     ara_req_d.op             = ara_pkg::VSUB;
@@ -1641,6 +1667,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b110011: begin // VWSUB
                     ara_req_d.op             = ara_pkg::VSUB;
@@ -1649,6 +1676,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b110100: begin // VWADDU.W
                     ara_req_d.op             = ara_pkg::VADD;
@@ -1658,6 +1686,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.eew_vs2        = vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b110101: begin // VWADD.W
                     ara_req_d.op             = ara_pkg::VADD;
@@ -1667,6 +1696,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.eew_vs2        = vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b110110: begin // VWSUBU.W
                     ara_req_d.op             = ara_pkg::VSUB;
@@ -1676,6 +1706,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.eew_vs2        = vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b110111: begin // VWSUB.W
                     ara_req_d.op             = ara_pkg::VSUB;
@@ -1685,6 +1716,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.eew_vs2        = vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b111000: begin // VWMULU
                     ara_req_d.op             = ara_pkg::VMUL;
@@ -1693,6 +1725,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b111010: begin // VWMULSU
                     ara_req_d.op             = ara_pkg::VMUL;
@@ -1701,6 +1734,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionZExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b111011: begin // VWMUL
                     ara_req_d.op             = ara_pkg::VMUL;
@@ -1709,6 +1743,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs1 = OpQueueConversionSExt2;
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b111100: begin // VWMACCU
                     ara_req_d.op             = ara_pkg::VMACC;
@@ -1719,6 +1754,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.eew_vd_op      = vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b111101: begin // VWMACC
                     ara_req_d.op             = ara_pkg::VMACC;
@@ -1729,6 +1765,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.eew_vd_op      = vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b111110: begin // VWMACCUS
                     ara_req_d.op             = ara_pkg::VMACC;
@@ -1739,6 +1776,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs2 = OpQueueConversionSExt2;
                     ara_req_d.eew_vd_op      = vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   6'b111111: begin // VWMACCSU
                     ara_req_d.op             = ara_pkg::VMACC;
@@ -1749,6 +1787,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.conversion_vs2 = OpQueueConversionZExt2;
                     ara_req_d.eew_vd_op      = vtype_q.vsew.next();
                     ara_req_d.cvt_resize     = CVT_WIDE;
+                    ara_req_d.special_hazard = 1'b1;
                   end
                   default: illegal_insn = 1'b1;
                 endcase
@@ -1883,6 +1922,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         5'b01000: begin // Widening VFCVTXUF
                           ara_req_d.op             = VFCVTXUF;
                           ara_req_d.cvt_resize     = CVT_WIDE;
+                          ara_req_d.special_hazard = 1'b1;
                           ara_req_d.emul           = next_lmul(vtype_q.vlmul);
                           ara_req_d.vtype.vsew     = vtype_q.vsew.next();
                           ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt;
@@ -1890,6 +1930,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         5'b01001: begin // Widening VFCVTXF
                           ara_req_d.op             = VFCVTXF;
                           ara_req_d.cvt_resize     = CVT_WIDE;
+                          ara_req_d.special_hazard = 1'b1;
                           ara_req_d.emul           = next_lmul(vtype_q.vlmul);
                           ara_req_d.vtype.vsew     = vtype_q.vsew.next();
                           ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt;
@@ -1897,6 +1938,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         5'b01010: begin // Widening VFCVTFXU
                           ara_req_d.op             = VFCVTFXU;
                           ara_req_d.cvt_resize     = CVT_WIDE;
+                          ara_req_d.special_hazard = 1'b1;
                           ara_req_d.emul           = next_lmul(vtype_q.vlmul);
                           ara_req_d.vtype.vsew     = vtype_q.vsew.next();
                           ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt;
@@ -1904,6 +1946,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         5'b01011: begin // Widening VFCVTFX
                           ara_req_d.op             = VFCVTFX;
                           ara_req_d.cvt_resize     = CVT_WIDE;
+                          ara_req_d.special_hazard = 1'b1;
                           ara_req_d.emul           = next_lmul(vtype_q.vlmul);
                           ara_req_d.vtype.vsew     = vtype_q.vsew.next();
                           ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt;
@@ -1911,6 +1954,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         5'b01100: begin // Widening VFCVTFF
                           ara_req_d.op             = VFCVTFF;
                           ara_req_d.cvt_resize     = CVT_WIDE;
+                          ara_req_d.special_hazard = 1'b1;
                           ara_req_d.emul           = next_lmul(vtype_q.vlmul);
                           ara_req_d.vtype.vsew     = vtype_q.vsew.next();
                           ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt;
@@ -1918,6 +1962,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         5'b01110: begin // Widening VFCVTRTZXUF
                           ara_req_d.op             = VFCVTRTZXUF;
                           ara_req_d.cvt_resize     = CVT_WIDE;
+                          ara_req_d.special_hazard = 1'b1;
                           ara_req_d.emul           = next_lmul(vtype_q.vlmul);
                           ara_req_d.vtype.vsew     = vtype_q.vsew.next();
                           ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt;
@@ -1925,6 +1970,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                         5'b01111: begin // Widening VFCVTRTZXF
                           ara_req_d.op             = VFCVTRTZXF;
                           ara_req_d.cvt_resize     = CVT_WIDE;
+                          ara_req_d.special_hazard = 1'b1;
                           ara_req_d.emul           = next_lmul(vtype_q.vlmul);
                           ara_req_d.vtype.vsew     = vtype_q.vsew.next();
                           ara_req_d.conversion_vs2 = OpQueueAdjustFPCvt;
@@ -2032,6 +2078,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                       ara_req_d.vtype.vsew     = vtype_q.vsew.next();
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
+                      ara_req_d.cvt_resize     = CVT_WIDE;
+                      ara_req_d.special_hazard = 1'b1;
                     end
                     6'b110001: begin // VFWREDUSUM
                       ara_req_d.op             = ara_pkg::VFWREDUSUM;
@@ -2041,7 +2089,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                       ara_req_d.conversion_vs1 = OpQueueReductionZExt;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
                       ara_req_d.eew_vs1        = vtype_q.vsew.next();
-                      ara_req_d.cvt_resize     = resize_e'(2'b00);
+                      ara_req_d.cvt_resize     = CVT_WIDE;
+                      ara_req_d.special_hazard = 1'b1;
                     end
                     6'b110010: begin // VFWSUB
                       ara_req_d.op             = ara_pkg::VFSUB;
@@ -2050,6 +2099,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                       ara_req_d.vtype.vsew     = vtype_q.vsew.next();
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
+                      ara_req_d.cvt_resize     = CVT_WIDE;
+                      ara_req_d.special_hazard = 1'b1;
                     end
                     6'b110011: begin // VFWREDOSUM
                       ara_req_d.op             = ara_pkg::VFWREDOSUM;
@@ -2059,7 +2110,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                       ara_req_d.conversion_vs1 = OpQueueReductionZExt;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
                       ara_req_d.eew_vs1        = vtype_q.vsew.next();
-                      ara_req_d.cvt_resize     = resize_e'(2'b00);
+                      ara_req_d.cvt_resize     = CVT_WIDE;
+                      ara_req_d.special_hazard = 1'b1;
                     end
                     6'b110100: begin // VFWADD.W
                       ara_req_d.op             = ara_pkg::VFADD;
@@ -2069,6 +2121,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                       ara_req_d.vtype.vsew     = vtype_q.vsew.next();
                       ara_req_d.eew_vs2        = vtype_q.vsew.next();
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
+                      ara_req_d.cvt_resize     = CVT_WIDE;
+                      ara_req_d.special_hazard = 1'b1;
                     end
                     6'b110110: begin // VFWSUB.W
                       ara_req_d.op             = ara_pkg::VFSUB;
@@ -2078,6 +2132,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                       ara_req_d.vtype.vsew     = vtype_q.vsew.next();
                       ara_req_d.eew_vs2        = vtype_q.vsew.next();
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
+                      ara_req_d.cvt_resize     = CVT_WIDE;
+                      ara_req_d.special_hazard = 1'b1;
                     end
                     6'b111000: begin // VFWMUL
                       ara_req_d.op             = ara_pkg::VFMUL;
@@ -2085,6 +2141,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                       ara_req_d.vtype.vsew     = vtype_q.vsew.next();
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
+                      ara_req_d.cvt_resize     = CVT_WIDE;
+                      ara_req_d.special_hazard = 1'b1;
                     end
                     6'b111100: begin // VFWMACC
                       ara_req_d.op             = ara_pkg::VFMACC;
@@ -2094,6 +2152,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
                       ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                      ara_req_d.cvt_resize     = CVT_WIDE;
+                      ara_req_d.special_hazard = 1'b1;
                     end
                     6'b111101: begin // VFWNMACC
                       ara_req_d.op             = ara_pkg::VFNMACC;
@@ -2103,6 +2163,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
                       ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                      ara_req_d.cvt_resize     = CVT_WIDE;
+                      ara_req_d.special_hazard = 1'b1;
                     end
                     6'b111110: begin // VFWMSAC
                       ara_req_d.op             = ara_pkg::VFMSAC;
@@ -2112,6 +2174,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
                       ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                      ara_req_d.cvt_resize     = CVT_WIDE;
+                      ara_req_d.special_hazard = 1'b1;
                     end
                     6'b111111: begin // VFWNMSAC
                       ara_req_d.op             = ara_pkg::VFNMSAC;
@@ -2121,6 +2185,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                       ara_req_d.conversion_vs1 = OpQueueConversionWideFP2;
                       ara_req_d.conversion_vs2 = OpQueueConversionWideFP2;
                       ara_req_d.eew_vd_op      = vtype_q.vsew.next();
+                      ara_req_d.cvt_resize     = CVT_WIDE;
+                      ara_req_d.special_hazard = 1'b1;
                     end
                     default: illegal_insn = 1'b1;
                   endcase
@@ -2217,6 +2283,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     ara_req_d.eew_vs2  = vtype_q.vsew;
                     // Request will need reshuffling
                     ara_req_d.scale_vl = 1'b1;
+                    // Special hazard handling for this instruction
+                    ara_req_d.special_hazard = 1'b1;
                     // If stride > vl, the vslideup has no effects
                     if (|ara_req_d.stride[$bits(ara_req_d.stride)-1:$bits(vl_q)] ||
                       (vlen_t'(ara_req_d.stride) >= vl_q)) null_vslideup = 1'b1;
@@ -2224,9 +2292,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
                     6'b001111: begin // vfslide1down
                       ara_req_d.op     = ara_pkg::VSLIDEDOWN;
                       ara_req_d.stride = 1;
-                    ara_req_d.eew_vs2  = vtype_q.vsew;
-                    // Request will need reshuffling
-                    ara_req_d.scale_vl = 1'b1;
+                      ara_req_d.eew_vs2  = vtype_q.vsew;
+                      // Request will need reshuffling
+                      ara_req_d.scale_vl = 1'b1;
+                      // Special hazard handling for this instruction
+                      ara_req_d.special_hazard = 1'b1;
                     end
                     6'b010000: begin // VRFUNARY0
                       // vmv.s.f
diff --git a/hardware/src/ara_sequencer.sv b/hardware/src/ara_sequencer.sv
index 348c01107..539e3d2b3 100644
--- a/hardware/src/ara_sequencer.sv
+++ b/hardware/src/ara_sequencer.sv
@@ -41,7 +41,11 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
     // Interface with the Address Generation
     input  logic                            addrgen_ack_i,
     input  logic                            addrgen_error_i,
-    input  vlen_t                           addrgen_error_vl_i
+    input  vlen_t                           addrgen_error_vl_i,
+    // Interface with the VLDU to handle load WAW and WAR hazards
+    input  vid_t                            vldu_commit_id_i,
+    input  logic                            vldu_commit_id_valid_i,
+    output logic                            vldu_hazard_o
   );
 
   ///////////////////////////////////
@@ -261,6 +265,9 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
     write_list_d          = write_list_q;
     global_hazard_table_d = global_hazard_table_o;
 
+    // No hazard check requested
+    vldu_hazard_o = 1'b0;
+
     // Maintain request
     pe_req_d       = '0;
     pe_req_valid_d = 1'b0;
@@ -354,6 +361,7 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
               fp_rm         : ara_req_i.fp_rm,
               wide_fp_imm   : ara_req_i.wide_fp_imm,
               cvt_resize    : ara_req_i.cvt_resize,
+              special_hazard: ara_req_i.special_hazard,
               scale_vl      : ara_req_i.scale_vl,
               vl            : ara_req_i.vl,
               vstart        : ara_req_i.vstart,
@@ -370,12 +378,17 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
                                                 pe_req_d.hazard_vs1 | pe_req_d.hazard_vs2;
 
             // We only issue instructions that take no operands if they have no hazards.
+            // Exception to this rule: loads, as they are super common. WAW and WAR hazards
+            // on load instructions are handled in the VLDU.
             // Moreover, SLIDE instructions cannot be always chained
             // ToDo: optimize the case for vslide1down, vslide1up (wait 2 cycles, then chain)
-            if (!(|{ara_req_i.use_vs1, ara_req_i.use_vs2, ara_req_i.use_vd_op, !ara_req_i.vm}) &&
-                |{pe_req_d.hazard_vs1, pe_req_d.hazard_vs2, pe_req_d.hazard_vm, pe_req_d.hazard_vd} ||
-                (pe_req_d.op == VSLIDEUP && |{pe_req_d.hazard_vd, pe_req_d.hazard_vs1, pe_req_d.hazard_vs2}) ||
-                (pe_req_d.op == VSLIDEDOWN && |{pe_req_d.hazard_vs1, pe_req_d.hazard_vs2}))
+            if ((!(|{ara_req_i.use_vs1, ara_req_i.use_vs2, ara_req_i.use_vd_op, !ara_req_i.vm})              &&
+                |{pe_req_d.hazard_vs1, pe_req_d.hazard_vs2, pe_req_d.hazard_vm, pe_req_d.hazard_vd}          &&
+                !(is_load(pe_req_d.op)))                                                                     ||
+                (pe_req_d.op == VSLIDEUP && !pe_req_d.use_scalar_op &&
+                |{pe_req_d.hazard_vd, pe_req_d.hazard_vs1, pe_req_d.hazard_vs2}) ||
+                (pe_req_d.op == VSLIDEDOWN && !pe_req_d.use_scalar_op &&
+                |{pe_req_d.hazard_vs1, pe_req_d.hazard_vs2}))
             begin
               ara_req_ready_o = 1'b0;
               pe_req_valid_d  = 1'b0;
@@ -453,6 +466,18 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
       end
     endcase
 
+    // Load-related hazards handling
+    // Loads are masters on the x-bar to write the in-lane VRF. Nevertheless,
+    // they can have WAR or WAW dependencies. When there is a load in the load
+    // unit, its hazard bit is always checked and cleared here as soon as the
+    // dependency does not exist anymore. Whenever the hazard bit is set,
+    // the load cannot issue requests.
+    // It's safe to pipeline vldu_hazard_o if the timing is tight.
+    // (if so, add a sync signal)
+    if (vldu_commit_id_valid_i) begin
+      vldu_hazard_o = |global_hazard_table_o[vldu_commit_id_i];
+    end
+
     // Update the global hazard table
     for (int id = 0; id < NrVInsn; id++) global_hazard_table_d[id] &= vinsn_running_d;
   end : p_sequencer
diff --git a/hardware/src/lane/lane.sv b/hardware/src/lane/lane.sv
index d12c71345..a786cabfe 100644
--- a/hardware/src/lane/lane.sv
+++ b/hardware/src/lane/lane.sv
@@ -191,8 +191,7 @@ module lane import ara_pkg::*; import rvv_pkg::*; #(
 
   operand_requester #(
     .NrBanks(NrVRFBanksPerLane),
-    .NrLanes(NrLanes          ),
-    .vaddr_t(vaddr_t          )
+    .NrLanes(NrLanes          )
   ) i_operand_requester (
     .clk_i                    (clk_i                   ),
     .rst_ni                   (rst_ni                  ),
diff --git a/hardware/src/lane/lane_sequencer.sv b/hardware/src/lane/lane_sequencer.sv
index 722bab7a5..cee688f18 100644
--- a/hardware/src/lane/lane_sequencer.sv
+++ b/hardware/src/lane/lane_sequencer.sv
@@ -240,42 +240,44 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::
       unique case (pe_req.vfu)
         VFU_Alu: begin
           operand_request_i[AluA] = '{
-            id         : pe_req.id,
-            vs         : pe_req.vs1,
-            eew        : pe_req.eew_vs1,
+            id             : pe_req.id,
+            vs             : pe_req.vs1,
+            eew            : pe_req.eew_vs1,
             // If reductions and vl == 0, we must replace with neutral values
-            conv       : (vfu_operation_d.vl == '0) ? OpQueueReductionZExt : pe_req.conversion_vs1,
-            scale_vl   : pe_req.scale_vl,
-            cvt_resize : pe_req.cvt_resize,
-            vtype      : pe_req.vtype,
+            conv           : (vfu_operation_d.vl == '0) ? OpQueueReductionZExt : pe_req.conversion_vs1,
+            scale_vl       : pe_req.scale_vl,
+            cvt_resize     : pe_req.cvt_resize,
+            special_hazard : pe_req.special_hazard,
+            vtype          : pe_req.vtype,
             // In case of reduction, AluA opqueue will keep the scalar element
-            vl         : (pe_req.op inside {[VREDSUM:VWREDSUM]}) ? 1 : vfu_operation_d.vl,
-            vstart     : vfu_operation_d.vstart,
-            hazard     : pe_req.hazard_vs1 | pe_req.hazard_vd,
-            is_reduct  : pe_req.op inside {[VREDSUM:VWREDSUM]} ? 1'b1 : 0,
-            target_fu  : ALU_SLDU,
-            default    : '0
+            vl             : (pe_req.op inside {[VREDSUM:VWREDSUM]}) ? 1 : vfu_operation_d.vl,
+            vstart         : vfu_operation_d.vstart,
+            hazard         : pe_req.hazard_vs1 | pe_req.hazard_vd,
+            is_reduct      : pe_req.op inside {[VREDSUM:VWREDSUM]} ? 1'b1 : 0,
+            target_fu      : ALU_SLDU,
+            default        : '0
           };
           operand_request_push[AluA] = pe_req.use_vs1;
 
           operand_request_i[AluB] = '{
-            id         : pe_req.id,
-            vs         : pe_req.vs2,
-            eew        : pe_req.eew_vs2,
+            id             : pe_req.id,
+            vs             : pe_req.vs2,
+            eew            : pe_req.eew_vs2,
             // If reductions and vl == 0, we must replace with neutral values
-            conv       : (vfu_operation_d.vl == '0) ? OpQueueReductionZExt : pe_req.conversion_vs2,
-            scale_vl   : pe_req.scale_vl,
-            cvt_resize : pe_req.cvt_resize,
-            vtype      : pe_req.vtype,
+            conv           : (vfu_operation_d.vl == '0) ? OpQueueReductionZExt : pe_req.conversion_vs2,
+            scale_vl       : pe_req.scale_vl,
+            cvt_resize     : pe_req.cvt_resize,
+            special_hazard : pe_req.special_hazard,
+            vtype          : pe_req.vtype,
             // If reductions and vl == 0, we must replace the operands with neutral
             // values in the opqueues. So, vl must be 1 at least
-            vl         : (pe_req.op inside {[VREDSUM:VWREDSUM]} && vfu_operation_d.vl == '0)
-                         ? 1 : vfu_operation_d.vl,
-            vstart     : vfu_operation_d.vstart,
-            hazard     : pe_req.hazard_vs2 | pe_req.hazard_vd,
-            is_reduct  : pe_req.op inside {[VREDSUM:VWREDSUM]} ? 1'b1 : 0,
-            target_fu  : ALU_SLDU,
-            default    : '0
+            vl             : (pe_req.op inside {[VREDSUM:VWREDSUM]} && vfu_operation_d.vl == '0)
+                             ? 1 : vfu_operation_d.vl,
+            vstart         : vfu_operation_d.vstart,
+            hazard         : pe_req.hazard_vs2 | pe_req.hazard_vd,
+            is_reduct      : pe_req.op inside {[VREDSUM:VWREDSUM]} ? 1'b1 : 0,
+            target_fu      : ALU_SLDU,
+            default        : '0
           };
           operand_request_push[AluB] = pe_req.use_vs2;
 
@@ -298,66 +300,69 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::
         end
         VFU_MFpu: begin
           operand_request_i[MulFPUA] = '{
-            id         : pe_req.id,
-            vs         : pe_req.vs1,
-            eew        : pe_req.eew_vs1,
+            id             : pe_req.id,
+            vs             : pe_req.vs1,
+            eew            : pe_req.eew_vs1,
             // If reductions and vl == 0, we must replace with neutral values
-            conv       : pe_req.conversion_vs1,
-            scale_vl   : pe_req.scale_vl,
-            cvt_resize : pe_req.cvt_resize,
-            vtype      : pe_req.vtype,
+            conv           : pe_req.conversion_vs1,
+            scale_vl       : pe_req.scale_vl,
+            cvt_resize     : pe_req.cvt_resize,
+            special_hazard : pe_req.special_hazard,
+            vtype          : pe_req.vtype,
             // If reductions and vl == 0, we must replace the operands with neutral
             // values in the opqueues. So, vl must be 1 at least
-            vl         : (pe_req.op inside {[VFREDUSUM:VFWREDOSUM]}) ? 1 : vfu_operation_d.vl,
-            vstart     : vfu_operation_d.vstart,
-            hazard     : pe_req.hazard_vs1 | pe_req.hazard_vd,
-            is_reduct  : pe_req.op inside {[VFREDUSUM:VFWREDOSUM]} ? 1'b1 : 0,
-            target_fu  : MFPU_ADDRGEN,
-            default    : '0
+            vl             : (pe_req.op inside {[VFREDUSUM:VFWREDOSUM]}) ? 1 : vfu_operation_d.vl,
+            vstart         : vfu_operation_d.vstart,
+            hazard         : pe_req.hazard_vs1 | pe_req.hazard_vd,
+            is_reduct      : pe_req.op inside {[VFREDUSUM:VFWREDOSUM]} ? 1'b1 : 0,
+            target_fu      : MFPU_ADDRGEN,
+            default        : '0
           };
           operand_request_push[MulFPUA] = pe_req.use_vs1;
 
           operand_request_i[MulFPUB] = '{
-            id         : pe_req.id,
-            vs         : pe_req.swap_vs2_vd_op ? pe_req.vd        : pe_req.vs2,
-            eew        : pe_req.swap_vs2_vd_op ? pe_req.eew_vd_op : pe_req.eew_vs2,
+            id               : pe_req.id,
+            vs               : pe_req.swap_vs2_vd_op ? pe_req.vd        : pe_req.vs2,
+            eew              : pe_req.swap_vs2_vd_op ? pe_req.eew_vd_op : pe_req.eew_vs2,
             // If reductions and vl == 0, we must replace with neutral values
-            conv       : pe_req.conversion_vs2,
-            scale_vl   : pe_req.scale_vl,
-            cvt_resize : pe_req.cvt_resize,
-            vtype      : pe_req.vtype,
+            conv             : pe_req.conversion_vs2,
+            scale_vl         : pe_req.scale_vl,
+            cvt_resize       : pe_req.cvt_resize,
+            special_hazard   : pe_req.special_hazard,
+            vtype            : pe_req.vtype,
             // If reductions and vl == 0, we must replace the operands with neutral
             // values in the opqueues. So, vl must be 1 at least
-            vl         : (pe_req.op inside {[VFREDUSUM:VFWREDOSUM]} && vfu_operation_d.vl == '0)
-                        ? 1 : vfu_operation_d.vl,
-            vstart     : vfu_operation_d.vstart,
-            hazard     : (pe_req.swap_vs2_vd_op ?
+            vl               : (pe_req.op inside {[VFREDUSUM:VFWREDOSUM]} && vfu_operation_d.vl == '0)
+                               ? 1 : vfu_operation_d.vl,
+            vstart           : vfu_operation_d.vstart,
+            hazard           : (pe_req.swap_vs2_vd_op ?
             pe_req.hazard_vd : (pe_req.hazard_vs2 | pe_req.hazard_vd)),
-            is_reduct  : pe_req.op inside {[VFREDUSUM:VFWREDOSUM]} ? 1'b1 : 0,
-            target_fu  : MFPU_ADDRGEN,
-            default: '0
+            is_reduct        : pe_req.op inside {[VFREDUSUM:VFWREDOSUM]} ? 1'b1 : 0,
+            target_fu        : MFPU_ADDRGEN,
+            default          : '0
           };
           operand_request_push[MulFPUB] = pe_req.swap_vs2_vd_op ?
           pe_req.use_vd_op : pe_req.use_vs2;
 
           operand_request_i[MulFPUC] = '{
-            id         : pe_req.id,
-            vs         : pe_req.swap_vs2_vd_op ? pe_req.vs2            : pe_req.vd,
-            eew        : pe_req.swap_vs2_vd_op ? pe_req.eew_vs2        : pe_req.eew_vd_op,
-            conv       : pe_req.swap_vs2_vd_op ? pe_req.conversion_vs2 : OpQueueConversionNone,
-            scale_vl   : pe_req.scale_vl,
-            cvt_resize : pe_req.cvt_resize,
+            id             : pe_req.id,
+            vs             : pe_req.swap_vs2_vd_op ? pe_req.vs2            : pe_req.vd,
+            eew            : pe_req.swap_vs2_vd_op ? pe_req.eew_vs2        : pe_req.eew_vd_op,
+            conv           : pe_req.swap_vs2_vd_op ? pe_req.conversion_vs2 : OpQueueConversionNone,
+            scale_vl       : pe_req.scale_vl,
+            cvt_resize     : pe_req.cvt_resize,
+            special_hazard : pe_req.special_hazard,
             // If reductions and vl == 0, we must replace the operands with neutral
             // values in the opqueues. So, vl must be 1 at least
-            vl         : (pe_req.op inside {[VFREDUSUM:VFWREDOSUM]} && vfu_operation_d.vl == '0)
-                        ? 1 : vfu_operation_d.vl,
-            vstart     : vfu_operation_d.vstart,
-            vtype      : pe_req.vtype,
-            hazard     : pe_req.swap_vs2_vd_op ?
+            vl             : (pe_req.op inside {[VFREDUSUM:VFWREDOSUM]} && vfu_operation_d.vl == '0)
+                            ? 1 : vfu_operation_d.vl,
+            vstart         : vfu_operation_d.vstart,
+            vtype          : pe_req.vtype,
+            hazard         : pe_req.swap_vs2_vd_op ?
             (pe_req.hazard_vs2 | pe_req.hazard_vd) : pe_req.hazard_vd,
-            is_reduct  : pe_req.op inside {[VFREDUSUM:VFWREDOSUM]} ? 1'b1 : 0,
-            target_fu  : MFPU_ADDRGEN,
-            default : '0
+            is_reduct      : pe_req.op inside {[VFREDUSUM:VFWREDOSUM]} ? 1'b1 : 0,
+            target_fu      : MFPU_ADDRGEN,
+            default        : '0
           };
           operand_request_push[MulFPUC] = pe_req.swap_vs2_vd_op ?
           pe_req.use_vs2 : pe_req.use_vd_op;
@@ -399,17 +404,18 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::
 
           // Load indexed
           operand_request_i[SlideAddrGenA] = '{
-            id       : pe_req_i.id,
-            vs       : pe_req_i.vs2,
-            eew      : pe_req_i.eew_vs2,
-            conv     : pe_req_i.conversion_vs2,
-            target_fu: MFPU_ADDRGEN,
-            vl       : pe_req_i.vl / NrLanes,
-            scale_vl : pe_req_i.scale_vl,
-            vstart   : vfu_operation_d.vstart,
-            vtype    : pe_req_i.vtype,
-            hazard   : pe_req_i.hazard_vs2 | pe_req_i.hazard_vd,
-            default  : '0
+            id             : pe_req_i.id,
+            vs             : pe_req_i.vs2,
+            eew            : pe_req_i.eew_vs2,
+            conv           : pe_req_i.conversion_vs2,
+            target_fu      : MFPU_ADDRGEN,
+            special_hazard : pe_req.special_hazard,
+            vl             : pe_req_i.vl / NrLanes,
+            scale_vl       : pe_req_i.scale_vl,
+            vstart         : vfu_operation_d.vstart,
+            vtype          : pe_req_i.vtype,
+            hazard         : pe_req_i.hazard_vs2 | pe_req_i.hazard_vd,
+            default        : '0
           };
           // Since this request goes outside of the lane, we might need to request an
           // extra operand regardless of whether it is valid in this lane or not.
@@ -455,17 +461,18 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::
 
           // Store indexed
           operand_request_i[SlideAddrGenA] = '{
-            id       : pe_req_i.id,
-            vs       : pe_req_i.vs2,
-            eew      : pe_req_i.eew_vs2,
-            conv     : pe_req_i.conversion_vs2,
-            target_fu: MFPU_ADDRGEN,
-            vl       : pe_req_i.vl / NrLanes,
-            scale_vl : pe_req_i.scale_vl,
-            vstart   : vfu_operation_d.vstart,
-            vtype    : pe_req_i.vtype,
-            hazard   : pe_req_i.hazard_vs2 | pe_req_i.hazard_vd,
-            default  : '0
+            id             : pe_req_i.id,
+            vs             : pe_req_i.vs2,
+            eew            : pe_req_i.eew_vs2,
+            conv           : pe_req_i.conversion_vs2,
+            target_fu      : MFPU_ADDRGEN,
+            special_hazard : pe_req.special_hazard,
+            vl             : pe_req_i.vl / NrLanes,
+            scale_vl       : pe_req_i.scale_vl,
+            vstart         : vfu_operation_d.vstart,
+            vtype          : pe_req_i.vtype,
+            hazard         : pe_req_i.hazard_vs2 | pe_req_i.hazard_vd,
+            default        : '0
           };
           // Since this request goes outside of the lane, we might need to request an
           // extra operand regardless of whether it is valid in this lane or not.
@@ -476,16 +483,17 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::
 
         VFU_SlideUnit: begin
           operand_request_i[SlideAddrGenA] = '{
-            id       : pe_req.id,
-            vs       : pe_req.vs2,
-            eew      : pe_req.eew_vs2,
-            conv     : pe_req.conversion_vs2,
-            target_fu: ALU_SLDU,
-            scale_vl : pe_req.scale_vl,
-            vtype    : pe_req.vtype,
-            vstart   : vfu_operation_d.vstart,
-            hazard   : pe_req.hazard_vs2 | pe_req.hazard_vd,
-            default  : '0
+            id             : pe_req.id,
+            vs             : pe_req.vs2,
+            eew            : pe_req.eew_vs2,
+            conv           : pe_req.conversion_vs2,
+            target_fu      : ALU_SLDU,
+            special_hazard : pe_req.special_hazard,
+            scale_vl       : pe_req.scale_vl,
+            vtype          : pe_req.vtype,
+            vstart         : vfu_operation_d.vstart,
+            hazard         : pe_req.hazard_vs2 | pe_req.hazard_vd,
+            default        : '0
           };
           operand_request_push[SlideAddrGenA] = pe_req.use_vs2;
 
diff --git a/hardware/src/lane/operand_queue.sv b/hardware/src/lane/operand_queue.sv
index fe40a291b..72c8202e1 100644
--- a/hardware/src/lane/operand_queue.sv
+++ b/hardware/src/lane/operand_queue.sv
@@ -9,7 +9,8 @@
 // need it.
 
 module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width; #(
-    parameter  int           unsigned BufferDepth    = 2,
+    parameter  int           unsigned CmdBufDepth    = 2,
+    parameter  int           unsigned DataBufDepth   = 2,
     parameter  int           unsigned NrSlaves       = 1,
     parameter  int           unsigned NrLanes        = 0,
     // Support for floating-point data types
@@ -52,7 +53,7 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
   logic               cmd_pop;
 
   fifo_v3 #(
-    .DEPTH(BufferDepth        ),
+    .DEPTH(CmdBufDepth        ),
     .dtype(operand_queue_cmd_t)
   ) i_cmd_buffer (
     .clk_i     (clk_i                    ),
@@ -79,8 +80,8 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
   logic  ibuf_pop;
 
   fifo_v3 #(
-    .DEPTH     (BufferDepth),
-    .DATA_WIDTH(DataWidth  )
+    .DEPTH     (DataBufDepth),
+    .DATA_WIDTH(DataWidth   )
   ) i_input_buffer (
     .clk_i     (clk_i          ),
     .rst_ni    (rst_ni         ),
@@ -98,7 +99,7 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
 
   // We used a credit based system, to ensure that the FIFO is always
   // able to accept a request.
-  logic [idx_width(BufferDepth):0] ibuf_usage_d, ibuf_usage_q;
+  logic [idx_width(DataBufDepth):0] ibuf_usage_d, ibuf_usage_q;
 
   always_comb begin: p_ibuf_usage
     // Maintain state
@@ -110,7 +111,7 @@ module operand_queue import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
     if (ibuf_pop) ibuf_usage_d -= 1;
 
     // Are we ready?
-    operand_queue_ready_o = (ibuf_usage_q != BufferDepth);
+    operand_queue_ready_o = (ibuf_usage_q != DataBufDepth);
   end
 
   always_ff @(posedge clk_i or negedge rst_ni) begin: p_ibuf_usage_ff
diff --git a/hardware/src/lane/operand_queues_stage.sv b/hardware/src/lane/operand_queues_stage.sv
index dab636d07..5ed714522 100644
--- a/hardware/src/lane/operand_queues_stage.sv
+++ b/hardware/src/lane/operand_queues_stage.sv
@@ -52,14 +52,15 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
   ///////////
 
   operand_queue #(
-    .BufferDepth   (5         ),
-    .FPUSupport    (FPUSupport),
-    .NrLanes       (NrLanes   ),
-    .SupportIntExt2(1'b1      ),
-    .SupportIntExt4(1'b1      ),
-    .SupportIntExt8(1'b1      ),
-    .SupportReduct (1'b1      ),
-    .SupportNtrVal (1'b0      )
+    .CmdBufDepth   (ValuInsnQueueDepth),
+    .DataBufDepth  (5                 ),
+    .FPUSupport    (FPUSupport        ),
+    .NrLanes       (NrLanes           ),
+    .SupportIntExt2(1'b1              ),
+    .SupportIntExt4(1'b1              ),
+    .SupportIntExt8(1'b1              ),
+    .SupportReduct (1'b1              ),
+    .SupportNtrVal (1'b0              )
   ) i_operand_queue_alu_a (
     .clk_i                    (clk_i                          ),
     .rst_ni                   (rst_ni                         ),
@@ -77,14 +78,15 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
   );
 
   operand_queue #(
-    .BufferDepth   (5         ),
-    .FPUSupport    (FPUSupport),
-    .NrLanes       (NrLanes   ),
-    .SupportIntExt2(1'b1      ),
-    .SupportIntExt4(1'b1      ),
-    .SupportIntExt8(1'b1      ),
-    .SupportReduct (1'b1      ),
-    .SupportNtrVal (1'b1      )
+    .CmdBufDepth   (ValuInsnQueueDepth),
+    .DataBufDepth  (5                 ),
+    .FPUSupport    (FPUSupport        ),
+    .NrLanes       (NrLanes           ),
+    .SupportIntExt2(1'b1              ),
+    .SupportIntExt4(1'b1              ),
+    .SupportIntExt8(1'b1              ),
+    .SupportReduct (1'b1              ),
+    .SupportNtrVal (1'b1              )
   ) i_operand_queue_alu_b (
     .clk_i                    (clk_i                          ),
     .rst_ni                   (rst_ni                         ),
@@ -106,12 +108,13 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
   //////////////////////
 
   operand_queue #(
-    .BufferDepth   (5         ),
-    .FPUSupport    (FPUSupport),
-    .NrLanes       (NrLanes   ),
-    .SupportIntExt2(1'b1      ),
-    .SupportReduct (1'b1      ),
-    .SupportNtrVal (1'b0      )
+    .CmdBufDepth   (MfpuInsnQueueDepth ),
+    .DataBufDepth  (5                  ),
+    .FPUSupport    (FPUSupport         ),
+    .NrLanes       (NrLanes            ),
+    .SupportIntExt2(1'b1               ),
+    .SupportReduct (1'b1               ),
+    .SupportNtrVal (1'b0               )
   ) i_operand_queue_mfpu_a (
     .clk_i                    (clk_i                             ),
     .rst_ni                   (rst_ni                            ),
@@ -129,12 +132,13 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
   );
 
   operand_queue #(
-    .BufferDepth   (5         ),
-    .FPUSupport    (FPUSupport),
-    .NrLanes       (NrLanes   ),
-    .SupportIntExt2(1'b1      ),
-    .SupportReduct (1'b1      ),
-    .SupportNtrVal (1'b1      )
+    .CmdBufDepth   (MfpuInsnQueueDepth ),
+    .DataBufDepth  (5                  ),
+    .FPUSupport    (FPUSupport         ),
+    .NrLanes       (NrLanes            ),
+    .SupportIntExt2(1'b1               ),
+    .SupportReduct (1'b1               ),
+    .SupportNtrVal (1'b1               )
   ) i_operand_queue_mfpu_b (
     .clk_i                    (clk_i                             ),
     .rst_ni                   (rst_ni                            ),
@@ -152,12 +156,13 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
   );
 
   operand_queue #(
-    .BufferDepth   (5         ),
-    .FPUSupport    (FPUSupport),
-    .NrLanes       (NrLanes   ),
-    .SupportIntExt2(1'b1      ),
-    .SupportReduct (1'b1      ),
-    .SupportNtrVal (1'b1      )
+    .CmdBufDepth   (MfpuInsnQueueDepth ),
+    .DataBufDepth  (5                  ),
+    .FPUSupport    (FPUSupport         ),
+    .NrLanes       (NrLanes            ),
+    .SupportIntExt2(1'b1               ),
+    .SupportReduct (1'b1               ),
+    .SupportNtrVal (1'b1               )
   ) i_operand_queue_mfpu_c (
     .clk_i                    (clk_i                             ),
     .rst_ni                   (rst_ni                            ),
@@ -179,9 +184,10 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
   ///////////////////////
 
   operand_queue #(
-    .BufferDepth(2         ),
-    .FPUSupport (FPUSupport),
-    .NrLanes    (NrLanes   )
+    .CmdBufDepth   (VstuInsnQueueDepth + MaskuInsnQueueDepth),
+    .DataBufDepth  (2                                       ),
+    .FPUSupport    (FPUSupport                              ),
+    .NrLanes       (NrLanes                                 )
   ) i_operand_queue_st_mask_a (
     .clk_i                    (clk_i                         ),
     .rst_ni                   (rst_ni                        ),
@@ -203,9 +209,10 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
    ****************/
 
   operand_queue #(
-    .BufferDepth(2         ),
-    .FPUSupport (FPUSupport),
-    .NrLanes    (NrLanes   )
+    .CmdBufDepth   (VlduInsnQueueDepth),
+    .DataBufDepth  (2                 ),
+    .FPUSupport    (FPUSupport        ),
+    .NrLanes       (NrLanes           )
   ) i_operand_queue_slide_addrgen_a (
     .clk_i                    (clk_i                                         ),
     .rst_ni                   (rst_ni                                        ),
@@ -227,11 +234,12 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
   /////////////////
 
   operand_queue #(
-    .BufferDepth(1         ),
-    .FPUSupport (FPUSupport),
-    .SupportIntExt2(1'b1),
-    .SupportIntExt4(1'b1),
-    .SupportIntExt8(1'b1),
+    .CmdBufDepth   (MaskuInsnQueueDepth),
+    .DataBufDepth  (1                  ),
+    .FPUSupport    (FPUSupport         ),
+    .SupportIntExt2(1'b1               ),
+    .SupportIntExt4(1'b1               ),
+    .SupportIntExt8(1'b1               ),
     .NrLanes    (NrLanes   )
   ) i_operand_queue_mask_b (
     .clk_i                    (clk_i                           ),
@@ -250,8 +258,9 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math
   );
 
   operand_queue #(
-    .BufferDepth(1         ),
-    .NrLanes    (NrLanes   )
+    .CmdBufDepth   (MaskuInsnQueueDepth),
+    .DataBufDepth  (1                  ),
+    .NrLanes       (NrLanes            )
   ) i_operand_queue_mask_m (
     .clk_i                    (clk_i                           ),
     .rst_ni                   (rst_ni                          ),
diff --git a/hardware/src/lane/operand_requester.sv b/hardware/src/lane/operand_requester.sv
index 54590fbc3..3e85a58b3 100644
--- a/hardware/src/lane/operand_requester.sv
+++ b/hardware/src/lane/operand_requester.sv
@@ -9,11 +9,17 @@
 // queues. This stage also includes the VRF arbiter.
 
 module operand_requester import ara_pkg::*; import rvv_pkg::*; #(
-    parameter  int  unsigned NrLanes = 0,
-    parameter  int  unsigned NrBanks = 0,     // Number of banks in the vector register file
-    parameter  type          vaddr_t = logic, // Type used to address vector register file elements
+    parameter  int unsigned NrLanes         = 0,
+    parameter  int unsigned NrBanks         = 0, // Number of banks in the vector register file
+    // Type used to address vector register file elements
+    localparam int unsigned MaxVLenBPerLane = VLENB / NrLanes,      // In bytes
+    localparam int unsigned VRFBSizePerLane = MaxVLenBPerLane * 32, // In bytes
+    localparam int unsigned VaddrIdxWidth   = $clog2(VRFBSizePerLane),
+    localparam int unsigned VaddrBankWidth  = $clog2(NrVRFBanksPerLane),
+    localparam int unsigned VaddrVregWidth  = $clog2(MaxVLenBPerLane),
+    localparam type         vaddr_t         = logic [VaddrIdxWidth-1:0],
     // Dependant parameters. DO NOT CHANGE!
-    localparam type          strb_t  = logic[$bits(elen_t)/8-1:0]
+    localparam type          strb_t         = logic[$bits(elen_t)/8-1:0]
   ) (
     input  logic                                       clk_i,
     input  logic                                       rst_ni,
@@ -76,6 +82,8 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #(
     output logic                                       ldu_result_final_gnt_o
   );
 
+  `include "../include/ara_vaddr.svh"
+
   import cf_math_pkg::idx_width;
 
   ////////////////////////
@@ -233,6 +241,8 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #(
       vid_t id;
       // Address of the next element to be read
       vaddr_t addr;
+      // Source reg LSbs (useful for barber's pole)
+      logic [idx_width(NrBanks)-1:0] vs;
       // How many elements remain to be read
       vlen_t len;
       // Element width
@@ -245,15 +255,27 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #(
       // In case of a WAW with a previous instruction,
       // read once every two writes of the previous instruction
       logic is_widening;
+      // Does this instruction have a special hazard protocol?
+      logic special_hazard;
       // One-bit counters
       logic [NrVInsn-1:0] waw_hazard_counter;
     } requester_d, requester_q;
 
+    // Asserted if the SLDU requester is registering a new instruction
+    logic new_sldu_insn;
+    logic has_stalled_d, has_stalled_q;
 
     // Is there a hazard during this cycle?
+    // WAW with widening instructions are special: wait for 2 writes instead of 1
+    // Slide1Up/Down with hazards should wait one cycle before being handled normally
     logic stall;
-    assign stall = |(requester_q.hazard & ~(vinsn_result_written_q &
-                   (~{NrVInsn{requester_q.is_widening}} | requester_q.waw_hazard_counter)));
+    assign stall = |(requester_q.hazard & ~(vinsn_result_written_q & ((~{NrVInsn{requester_q.is_widening}} &
+                     requester_q.special_hazard) | requester_q.waw_hazard_counter))) |
+                     (~has_stalled_q & requester_q.special_hazard & |requester_q.hazard);
+
+    // For every instruction, it signals if the requester has already stalled once
+    // This is needed for vslide1x stall handling
+    assign has_stalled_d = new_sldu_insn ? 1'b0 : (stall ? 1'b1 : has_stalled_q);
 
     // Did we get a grant?
     logic [NrBanks-1:0] operand_requester_gnt;
@@ -269,6 +291,8 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #(
       state_d     = state_q;
       requester_d = requester_q;
 
+      new_sldu_insn = 1'b0;
+
       // Make no requests to the VRF
       operand_payload[requester] = '0;
       for (int bank = 0; bank < NrBanks; bank++) operand_req[bank][requester] = 1'b0;
@@ -288,6 +312,10 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #(
             // Acknowledge the request
             operand_request_ready_o[requester] = 1'b1;
 
+            // New slide unit instruction incoming
+            if (requester == (NrOperandQueues + VFU_SlideUnit))
+              new_sldu_insn = 1'b1;
+
             // Send a command to the operand queue
             operand_queue_cmd_o[requester] = '{
               eew : operand_request_i[requester].eew,
@@ -312,22 +340,25 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #(
 
             // Store the request
             requester_d = '{
-              id     : operand_request_i[requester].id,
-              addr   : vaddr(operand_request_i[requester].vs, NrLanes) +
-              (operand_request_i[requester].vstart >>
-                (int'(EW64) - int'(operand_request_i[requester].eew))),
+              id             : operand_request_i[requester].id,
+              addr           : vaddr_offset(vaddr(operand_request_i[requester].vs, NrLanes),
+                vaddr_t'(operand_request_i[requester].vstart >>
+                (int'(EW64) - int'(operand_request_i[requester].eew))), operand_request_i[requester].vs),
+              vs             : operand_request_i[requester].vs[idx_width(NrBanks)-1:0],
               // For memory operations, the number of elements initially refers to the new EEW (vsew here),
               // but the requester must refer to the old EEW (eew here)
               // This reasoning cannot be applied also to widening instructions, which modify vsew
               // treating it as the EEW of vd
-              len         : (operand_request_i[requester].scale_vl) ?
-                              ((operand_request_i[requester].vl <<
-                              operand_request_i[requester].vtype.vsew) >>
-                              operand_request_i[requester].eew) :
-                              operand_request_i[requester].vl,
-              vew         : operand_request_i[requester].eew,
-              hazard      : operand_request_i[requester].hazard,
-              is_widening : operand_request_i[requester].cvt_resize == CVT_WIDE,
+              len            : (operand_request_i[requester].scale_vl) ?
+                                 ((operand_request_i[requester].vl <<
+                                 operand_request_i[requester].vtype.vsew) >>
+                                 operand_request_i[requester].eew) :
+                                 operand_request_i[requester].vl,
+              vew            : operand_request_i[requester].eew,
+              hazard         : operand_request_i[requester].hazard,
+              is_widening    : operand_request_i[requester].cvt_resize == CVT_WIDE &&
+                                 operand_request_i[requester].special_hazard,
+              special_hazard : operand_request_i[requester].special_hazard,
               default: '0
             };
             // The length should be at least one after the rescaling
@@ -363,7 +394,7 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #(
             // Received a grant.
             if (|operand_requester_gnt) begin
               // Bump the address pointer
-              requester_d.addr = requester_q.addr + 1'b1;
+              requester_d.addr = next_vaddr(requester_q.addr, requester_q.vs);
 
               // We read less than 64 bits worth of elements
               if (requester_q.len < (1 << (int'(EW64) - int'(requester_q.vew))))
@@ -381,6 +412,10 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #(
                 // Acknowledge the request
                 operand_request_ready_o[requester] = 1'b1;
 
+                // New slide unit instruction incoming
+                if (requester == (NrOperandQueues + VFU_SlideUnit))
+                  new_sldu_insn = 1'b1;
+
                 // Send a command to the operand queue
                 operand_queue_cmd_o[requester] = '{
                   eew      : operand_request_i[requester].eew,
@@ -401,18 +436,22 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #(
 
                 // Store the request
                 requester_d = '{
-                  id   : operand_request_i[requester].id,
-                  addr : vaddr(operand_request_i[requester].vs, NrLanes) +
-                  (operand_request_i[requester].vstart >>
-                    (int'(EW64) - int'(operand_request_i[requester].eew))),
-                  len    : (operand_request_i[requester].scale_vl) ?
-                             ((operand_request_i[requester].vl <<
-                             operand_request_i[requester].vtype.vsew) >>
-                             operand_request_i[requester].eew) :
-                             operand_request_i[requester].vl,
-                  vew    : operand_request_i[requester].eew,
-                  hazard : operand_request_i[requester].hazard,
-                  default: '0
+                  id             : operand_request_i[requester].id,
+                  addr           : vaddr_offset(vaddr(operand_request_i[requester].vs, NrLanes),
+                    vaddr_t'(operand_request_i[requester].vstart >>
+                    (int'(EW64) - int'(operand_request_i[requester].eew))), operand_request_i[requester].vs),
+                  vs             : operand_request_i[requester].vs[idx_width(NrBanks)-1:0],
+                  len            : (operand_request_i[requester].scale_vl) ?
+                                     ((operand_request_i[requester].vl <<
+                                     operand_request_i[requester].vtype.vsew) >>
+                                     operand_request_i[requester].eew) :
+                                     operand_request_i[requester].vl,
+                  vew            : operand_request_i[requester].eew,
+                  hazard         : operand_request_i[requester].hazard,
+                  is_widening    : operand_request_i[requester].cvt_resize == CVT_WIDE &&
+                                     operand_request_i[requester].special_hazard,
+                  special_hazard : operand_request_i[requester].special_hazard,
+                  default        : '0
                 };
                 // The length should be at least one after the rescaling
                 if (requester_d.len == '0)
@@ -428,11 +467,13 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #(
 
     always_ff @(posedge clk_i or negedge rst_ni) begin
       if (!rst_ni) begin
-        state_q     <= IDLE;
-        requester_q <= '0;
+        state_q       <= IDLE;
+        requester_q   <= '0;
+        has_stalled_q <= 1'b0;
       end else begin
-        state_q     <= state_d;
-        requester_q <= requester_d;
+        state_q       <= state_d;
+        requester_q   <= requester_d;
+        has_stalled_q <= has_stalled_d;
       end
     end
   end : gen_operand_requester
diff --git a/hardware/src/lane/valu.sv b/hardware/src/lane/valu.sv
index 386caca74..7cc93f3d8 100644
--- a/hardware/src/lane/valu.sv
+++ b/hardware/src/lane/valu.sv
@@ -8,15 +8,20 @@
 // in a SIMD fashion, always operating on 64 bits.
 
 module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width; #(
-    parameter  int  unsigned NrLanes      = 0,
+    parameter  int  unsigned NrLanes         = 0,
     // Support for fixed-point data types
-    parameter  logic         FixPtSupport = FixedPointEnable,
+    parameter  logic         FixPtSupport    = FixedPointEnable,
     // Type used to address vector register file elements
-    parameter  type          vaddr_t      = logic,
+    localparam int  unsigned MaxVLenBPerLane = VLENB / NrLanes,      // In bytes
+    localparam int  unsigned VRFBSizePerLane = MaxVLenBPerLane * 32, // In bytes
+    localparam int  unsigned VaddrIdxWidth   = $clog2(VRFBSizePerLane),
+    localparam int  unsigned VaddrBankWidth  = $clog2(NrVRFBanksPerLane),
+    localparam int  unsigned VaddrVregWidth  = $clog2(MaxVLenBPerLane),
+    localparam type          vaddr_t         = logic [VaddrIdxWidth-1:0],
     // Dependant parameters. DO NOT CHANGE!
-    localparam int  unsigned DataWidth    = $bits(elen_t),
-    localparam int  unsigned StrbWidth    = DataWidth/8,
-    localparam type          strb_t       = logic [StrbWidth-1:0]
+    localparam int  unsigned DataWidth       = $bits(elen_t),
+    localparam int  unsigned StrbWidth       = DataWidth/8,
+    localparam type          strb_t          = logic [StrbWidth-1:0]
   ) (
     input  logic                         clk_i,
     input  logic                         rst_ni,
@@ -55,6 +60,9 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
     output logic                         mask_ready_o
   );
 
+  // Include address-handling functions
+  `include "../../include/ara_vaddr.svh"
+
   import cf_math_pkg::idx_width;
 
   /////////////
@@ -137,6 +145,8 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
     logic mask;
   } payload_t;
 
+  vaddr_t addr_d, addr_q;
+
   // Result queue
   payload_t [ResultQueueDepth-1:0]            result_queue_d, result_queue_q;
   logic     [ResultQueueDepth-1:0]            result_queue_valid_d, result_queue_valid_q;
@@ -424,6 +434,7 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
     reduction_rx_cnt_d      = reduction_rx_cnt_q;
     sldu_transactions_cnt_d = sldu_transactions_cnt_q;
     red_hs_synch_d          = red_hs_synch_q;
+    addr_d                  = addr_q;
     alu_red_valid_o         = 1'b0;
     sldu_alu_ready_d        = 1'b0;
     simd_red_cnt_max_d      = simd_red_cnt_max_q;
@@ -474,8 +485,9 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
                 mask_ready_o = !vinsn_issue_q.vm;
 
               // Store the result in the result queue
+              addr_d = next_vaddr(addr_q, vinsn_issue_q.vd);
               result_queue_d[result_queue_write_pnt_q].wdata = result_queue_q[result_queue_write_pnt_q].wdata | valu_result;
-              result_queue_d[result_queue_write_pnt_q].addr  = vaddr(vinsn_issue_q.vd, NrLanes) + ((vinsn_issue_q.vl - issue_cnt_q) >> (int'(EW64) - vinsn_issue_q.vtype.vsew));
+              result_queue_d[result_queue_write_pnt_q].addr  = addr_q;
               result_queue_d[result_queue_write_pnt_q].id    = vinsn_issue_q.id;
               result_queue_d[result_queue_write_pnt_q].mask  = vinsn_issue_q.vfu == VFU_MaskUnit;
               if (!narrowing(vinsn_issue_q.op) || !narrowing_select_q)
@@ -531,6 +543,11 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
                 else
                   vinsn_queue_d.issue_pnt = vinsn_queue_q.issue_pnt + 1;
 
+                // Change starting address when we issue a new instruction
+                // Since this unit is not pipelined and elements written in the
+                // result queue belong to vinsn_issue_q
+                addr_d = vaddr(vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vd, NrLanes);
+
                 // Assign vector length for next instruction in the instruction queue
                 if (vinsn_queue_d.issue_cnt != 0) begin
                   if (!(vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].op inside {[VMANDNOT:VMXNOR]}))
@@ -830,6 +847,8 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
         red_hs_synch_d = 1'b1; // Allow the first valid
 
         issue_cnt_d = vfu_operation_i.vl;
+        // Initialize the starting address for the next instruction
+        addr_d = vaddr(vfu_operation_i.vd, NrLanes);
         if (!(vfu_operation_i.op inside {[VMANDNOT:VMXNOR]}))
           issue_cnt_d = vfu_operation_i.vl;
         else begin
@@ -877,6 +896,7 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
       simd_red_cnt_max_q      <= '0;
       alu_red_ready_q         <= 1'b0;
       alu_vxsat_q             <= '0;
+      addr_q                  <= '0;
     end else begin
       issue_cnt_q             <= issue_cnt_d;
       commit_cnt_q            <= commit_cnt_d;
@@ -890,6 +910,7 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width;
       simd_red_cnt_max_q      <= simd_red_cnt_max_d;
       alu_red_ready_q         <= alu_red_ready_i;
       alu_vxsat_q             <= alu_vxsat_d;
+      addr_q                  <= addr_d;
     end
   end
 
diff --git a/hardware/src/lane/vector_fus_stage.sv b/hardware/src/lane/vector_fus_stage.sv
index 6eb28e7c2..42b6a347e 100644
--- a/hardware/src/lane/vector_fus_stage.sv
+++ b/hardware/src/lane/vector_fus_stage.sv
@@ -96,9 +96,8 @@ module vector_fus_stage import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg
   //////////////////
 
   valu #(
-    .NrLanes(NrLanes),
-    .FixPtSupport(FixPtSupport),
-    .vaddr_t(vaddr_t)
+    .NrLanes     (NrLanes     ),
+    .FixPtSupport(FixPtSupport)
   ) i_valu (
     .clk_i                (clk_i                          ),
     .rst_ni               (rst_ni                         ),
@@ -143,10 +142,9 @@ module vector_fus_stage import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg
   ///////////////////
 
   vmfpu #(
-    .NrLanes   (NrLanes   ),
-    .FPUSupport(FPUSupport),
-    .FixPtSupport(FixPtSupport),
-    .vaddr_t   (vaddr_t   )
+    .NrLanes     (NrLanes     ),
+    .FPUSupport  (FPUSupport  ),
+    .FixPtSupport(FixPtSupport)
   ) i_vmfpu (
     .clk_i                (clk_i                           ),
     .rst_ni               (rst_ni                          ),
diff --git a/hardware/src/lane/vmfpu.sv b/hardware/src/lane/vmfpu.sv
index c4ffc6d72..81c729864 100644
--- a/hardware/src/lane/vmfpu.sv
+++ b/hardware/src/lane/vmfpu.sv
@@ -9,17 +9,22 @@
 
 module vmfpu import ara_pkg::*; import rvv_pkg::*; import fpnew_pkg::*;
   import cf_math_pkg::idx_width; #(
-    parameter  int           unsigned NrLanes      = 0,
+    parameter  int           unsigned NrLanes         = 0,
     // Support for floating-point data types
-    parameter  fpu_support_e          FPUSupport   = FPUSupportHalfSingleDouble,
+    parameter  fpu_support_e          FPUSupport      = FPUSupportHalfSingleDouble,
     // Support for fixed-point data types
-    parameter  logic                  FixPtSupport = FixedPointEnable,
+    parameter  logic                  FixPtSupport    = FixedPointEnable,
     // Type used to address vector register file elements
-    parameter  type                   vaddr_t      = logic,
+    localparam int           unsigned MaxVLenBPerLane = VLENB / NrLanes,      // In bytes
+    localparam int           unsigned VRFBSizePerLane = MaxVLenBPerLane * 32, // In bytes
+    localparam int           unsigned VaddrIdxWidth   = $clog2(VRFBSizePerLane),
+    localparam int           unsigned VaddrBankWidth  = $clog2(NrVRFBanksPerLane),
+    localparam int           unsigned VaddrVregWidth  = $clog2(MaxVLenBPerLane),
+    localparam type          vaddr_t                  = logic [VaddrIdxWidth-1:0],
     // Dependant parameters. DO NOT CHANGE!
-    localparam int           unsigned DataWidth    = $bits(elen_t),
-    localparam int           unsigned StrbWidth    = DataWidth/8,
-    localparam type                   strb_t       = logic [DataWidth/8-1:0]
+    localparam int           unsigned DataWidth       = $bits(elen_t),
+    localparam int           unsigned StrbWidth       = DataWidth/8,
+    localparam type                   strb_t          = logic [DataWidth/8-1:0]
   ) (
     input  logic                         clk_i,
     input  logic                         rst_ni,
@@ -61,6 +66,9 @@ module vmfpu import ara_pkg::*; import rvv_pkg::*; import fpnew_pkg::*;
     output logic                         mask_ready_o
   );
 
+  // Include address-handling functions
+  `include "../../include/ara_vaddr.svh"
+
   ////////////////////////////////
   //  Vector instruction queue  //
   ////////////////////////////////
@@ -180,6 +188,8 @@ module vmfpu import ara_pkg::*; import rvv_pkg::*; import fpnew_pkg::*;
   //  Helper signals  //
   //////////////////////
 
+  vaddr_t addr_d, addr_q;
+
   logic vinsn_issue_mul, vinsn_issue_div, vinsn_issue_fpu;
 
   assign vinsn_issue_mul = vinsn_issue_q.op inside {[VMUL:VSMUL]};
@@ -1044,6 +1054,7 @@ module vmfpu import ara_pkg::*; import rvv_pkg::*; import fpnew_pkg::*;
     reduction_rx_cnt_d      = reduction_rx_cnt_q;
     sldu_transactions_cnt_d = sldu_transactions_cnt_q;
     red_hs_synch_d          = red_hs_synch_q;
+    addr_d                  = addr_q;
     mfpu_red_valid_o        = 1'b0;
     sldu_mfpu_ready_d       = 1'b0;
     simd_red_cnt_max_d      = simd_red_cnt_max_q;
@@ -1218,9 +1229,9 @@ module vmfpu import ara_pkg::*; import rvv_pkg::*; import fpnew_pkg::*;
           to_process_cnt_d = (narrowing(vinsn_processing_q.cvt_resize)) ? (to_process_cnt_q - processed_element_cnt_narrow) : (to_process_cnt_q - processed_element_cnt);
 
           // Store the result in the result queue
+          addr_d = next_vaddr(addr_q, vinsn_processing_q.vd);
           result_queue_d[result_queue_write_pnt_q].id    = vinsn_processing_q.id;
-          result_queue_d[result_queue_write_pnt_q].addr  = vaddr(vinsn_processing_q.vd, NrLanes) +
-            ((vinsn_processing_q.vl - to_process_cnt_q) >> (int'(EW64) - vinsn_processing_q.vtype.vsew));
+          result_queue_d[result_queue_write_pnt_q].addr  = addr_q;
           // FP narrowing instructions pack the result in two different cycles, and only some 16-bit slices are active
           if (narrowing(vinsn_processing_q.cvt_resize)) begin
             for (int b = 0; b < 4; b++) begin
@@ -1275,6 +1286,10 @@ module vmfpu import ara_pkg::*; import rvv_pkg::*; import fpnew_pkg::*;
 
             if (vinsn_queue_d.processing_cnt != 0) to_process_cnt_d =
               vinsn_queue_q.vinsn[vinsn_queue_d.processing_pnt].vl;
+
+            // Update the address for the results of the next cycles since they belong
+            // to the next instruction
+            addr_d = vaddr(vinsn_queue_q.vinsn[vinsn_queue_d.processing_pnt].vd, NrLanes);
           end
         end
       end
@@ -1695,6 +1710,9 @@ module vmfpu import ara_pkg::*; import rvv_pkg::*; import fpnew_pkg::*;
           if (vinsn_queue_d.processing_cnt != 0) to_process_cnt_d =
             vinsn_queue_q.vinsn[vinsn_queue_d.processing_pnt].vl;
 
+          // Update the starting address for the next instruction
+          addr_d = vaddr(vinsn_queue_q.vinsn[vinsn_queue_d.processing_pnt].vd, NrLanes);
+
           // Bump issue counter and pointers
           vinsn_queue_d.issue_cnt -= 1;
           if (vinsn_queue_q.issue_pnt == VInsnQueueDepth-1) vinsn_queue_d.issue_pnt = '0;
@@ -1833,9 +1851,12 @@ module vmfpu import ara_pkg::*; import rvv_pkg::*; import fpnew_pkg::*;
         osum_issue_cnt_d        = '0;
         issue_cnt_d             = vfu_operation_i.vl;
       end
-      if (vinsn_queue_d.processing_cnt == '0) to_process_cnt_d = vfu_operation_i.vl;
-      if (vinsn_queue_d.commit_cnt == '0) commit_cnt_d =
-        is_reduction(vfu_operation_i.op) ? 1 : vfu_operation_i.vl;
+      if (vinsn_queue_d.processing_cnt == '0) begin
+        to_process_cnt_d = vfu_operation_i.vl;
+        // A new instruction to process; update the starting address
+        addr_d = vaddr(vfu_operation_i.vd, NrLanes);
+      end
+      if (vinsn_queue_d.commit_cnt == '0) commit_cnt_d = is_reduction(vfu_operation_i.op) ? 1 : vfu_operation_i.vl;
       // Floating-Point re-encoding for widening operations
       // Enabled only for the supported formats
       if (FPUSupport != FPUSupportNone) begin
@@ -1902,6 +1923,7 @@ module vmfpu import ara_pkg::*; import rvv_pkg::*; import fpnew_pkg::*;
       intra_op_rx_cnt_q       <= '0;
       osum_issue_cnt_q        <= '0;
       mfpu_vxsat_q            <= '0;
+      addr_q                  <= '0;
     end else begin
       issue_cnt_q             <= issue_cnt_d;
       to_process_cnt_q        <= to_process_cnt_d;
@@ -1925,6 +1947,7 @@ module vmfpu import ara_pkg::*; import rvv_pkg::*; import fpnew_pkg::*;
       intra_op_rx_cnt_q       <= intra_op_rx_cnt_d;
       osum_issue_cnt_q        <= osum_issue_cnt_d;
       mfpu_vxsat_q            <= mfpu_vxsat_d;
+      addr_q                  <= addr_d;
     end
   end
 
diff --git a/hardware/src/masku/masku.sv b/hardware/src/masku/masku.sv
index afea302f6..58cc11f1b 100644
--- a/hardware/src/masku/masku.sv
+++ b/hardware/src/masku/masku.sv
@@ -10,12 +10,18 @@
 // predicated instructions.
 
 module masku import ara_pkg::*; import rvv_pkg::*; #(
-    parameter  int  unsigned NrLanes = 0,
-    parameter  type          vaddr_t = logic, // Type used to address vector register file elements
+    parameter  int  unsigned NrLanes         = 0,
+    // Address of an element in the lane's VRF
+    localparam int  unsigned MaxVLenBPerLane = VLENB / NrLanes,      // In bytes
+    localparam int  unsigned VRFBSizePerLane = MaxVLenBPerLane * 32, // In bytes
+    localparam int  unsigned VaddrIdxWidth   = $clog2(VRFBSizePerLane),
+    localparam int  unsigned VaddrBankWidth  = $clog2(NrVRFBanksPerLane),
+    localparam int  unsigned VaddrVregWidth  = $clog2(MaxVLenBPerLane),
+    localparam type          vaddr_t         = logic [VaddrIdxWidth-1:0],
     // Dependant parameters. DO NOT CHANGE!
-    localparam int  unsigned DataWidth = $bits(elen_t), // Width of the lane datapath
-    localparam int  unsigned StrbWidth = DataWidth/8,
-    localparam type          strb_t    = logic [StrbWidth-1:0] // Byte-strobe type
+    localparam int  unsigned DataWidth       = $bits(elen_t), // Width of the lane datapath
+    localparam int  unsigned StrbWidth       = DataWidth/8,
+    localparam type          strb_t          = logic [StrbWidth-1:0] // Byte-strobe type
   ) (
     input  logic                                       clk_i,
     input  logic                                       rst_ni,
@@ -48,6 +54,9 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
     input  logic                                       sldu_mask_ready_i
   );
 
+  // Include address-handling functions
+  `include "../../include/ara_vaddr.svh"
+
   import cf_math_pkg::idx_width;
 
   ////////////////
@@ -142,6 +151,8 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
   // There is a mask queue per lane, holding the operands that were not
   // yet used by the corresponding lane.
 
+  vaddr_t addr_d, addr_q;
+
   // Mask queue
   strb_t [MaskQueueDepth-1:0][NrLanes-1:0] mask_queue_d, mask_queue_q;
   logic  [MaskQueueDepth-1:0][NrLanes-1:0] mask_queue_valid_d, mask_queue_valid_q;
@@ -647,6 +658,8 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
     popcount_d     = popcount_q;
     vfirst_count_d = vfirst_count_q;
 
+    addr_d         = addr_q;
+
     mask_queue_d           = mask_queue_q;
     mask_queue_valid_d     = mask_queue_valid_q;
     mask_queue_write_pnt_d = mask_queue_write_pnt_q;
@@ -732,6 +745,9 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
         else
           mask_queue_write_pnt_d = mask_queue_write_pnt_q + 1;
 
+        // Increment write-back address
+        addr_d = next_vaddr(addr_q, vinsn_issue.vd);
+
         // Account for the operands that were issued
         read_cnt_d = read_cnt_q - NrLanes * (1 << (int'(EW64) - vinsn_issue.vtype.vsew));
         if (read_cnt_q < NrLanes * (1 << (int'(EW64) - vinsn_issue.vtype.vsew)))
@@ -838,9 +854,8 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
             result_queue_d[result_queue_write_pnt_q][lane] = '{
               wdata: result_queue_q[result_queue_write_pnt_q][lane].wdata | alu_result[lane],
               be   : (vinsn_issue.op inside {[VMSBF:VID]}) ? '1 : be(element_cnt, vinsn_issue.vtype.vsew),
-              addr : (vinsn_issue.op inside {[VMSBF:VID]}) ? vaddr(vinsn_issue.vd, NrLanes) + ((vinsn_issue.vl - issue_cnt_q) >> (int'(EW64) - vinsn_issue.vtype.vsew)) : vaddr(vinsn_issue.vd, NrLanes) +
-                (((vinsn_issue.vl - issue_cnt_q) / NrLanes / DataWidth)),
-              id : vinsn_issue.id
+              addr : addr_q,
+              id   : vinsn_issue.id
             };
           end
 
@@ -848,6 +863,9 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
           if (vinsn_issue.op inside {[VMFEQ:VMSGTU], [VMSGT:VMSBC]}) begin
             vrf_pnt_d = vrf_pnt_q + (NrLanes << (int'(EW64) - vinsn_issue.vtype.vsew));
 
+            // Increment write-back address
+            addr_d = next_vaddr(addr_q, vinsn_issue.vd);
+
             // Filled-up a word, or finished execution
             if (vrf_pnt_d == DataWidth*NrLanes || vrf_pnt_d >= issue_cnt_q) begin
               result_queue_valid_d[result_queue_write_pnt_q] = {NrLanes{1'b1}};
@@ -1077,6 +1095,9 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
         issue_cnt_d = pe_req_i.vl;
         read_cnt_d  = pe_req_i.vl;
 
+        // Initialize the starting address of the next instruction
+        addr_d = vaddr(pe_req_i.vd, NrLanes);
+
         // Trim skipped words
         if (pe_req_i.op == VSLIDEUP) begin
           issue_cnt_d -= vlen_t'(trimmed_stride);
@@ -1131,6 +1152,7 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
       result_final_gnt_q <= '0;
       popcount_q         <= '0;
       vfirst_count_q     <= '0;
+      addr_q             <= '0;
     end else begin
       vinsn_running_q    <= vinsn_running_d;
       read_cnt_q         <= read_cnt_d;
@@ -1142,6 +1164,7 @@ module masku import ara_pkg::*; import rvv_pkg::*; #(
       result_final_gnt_q <= result_final_gnt_d;
       popcount_q         <= popcount_d;
       vfirst_count_q     <= vfirst_count_d;
+      addr_q             <= addr_d;
     end
   end
 
diff --git a/hardware/src/sldu/sldu.sv b/hardware/src/sldu/sldu.sv
index 9c06c3ac5..7439bed7a 100644
--- a/hardware/src/sldu/sldu.sv
+++ b/hardware/src/sldu/sldu.sv
@@ -8,12 +8,18 @@
 // instructions, which need access to the whole Vector Register File.
 
 module sldu import ara_pkg::*; import rvv_pkg::*; #(
-    parameter  int  unsigned NrLanes = 0,
-    parameter  type          vaddr_t = logic, // Type used to address vector register file elements
+    parameter  int  unsigned NrLanes         = 0,
+    // Address of an element in the lane's VRF
+    localparam int  unsigned MaxVLenBPerLane = VLENB / NrLanes,      // In bytes
+    localparam int  unsigned VRFBSizePerLane = MaxVLenBPerLane * 32, // In bytes
+    localparam int  unsigned VaddrIdxWidth   = $clog2(VRFBSizePerLane),
+    localparam int  unsigned VaddrBankWidth  = $clog2(NrVRFBanksPerLane),
+    localparam int  unsigned VaddrVregWidth  = $clog2(MaxVLenBPerLane),
+    localparam type          vaddr_t         = logic [VaddrIdxWidth-1:0],
     // Dependant parameters. DO NOT CHANGE!
-    localparam int  unsigned DataWidth = $bits(elen_t), // Width of the lane datapath
-    localparam int  unsigned StrbWidth = DataWidth/8,
-    localparam type          strb_t    = logic [StrbWidth-1:0] // Byte-strobe type
+    localparam int  unsigned DataWidth       = $bits(elen_t), // Width of the lane datapath
+    localparam int  unsigned StrbWidth       = DataWidth/8,
+    localparam type          strb_t          = logic [StrbWidth-1:0] // Byte-strobe type
   ) (
     input  logic                   clk_i,
     input  logic                   rst_ni,
@@ -46,6 +52,9 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #(
 
   `include "common_cells/registers.svh"
 
+  // Include address-handling functions
+  `include "../../include/ara_vaddr.svh"
+
   import cf_math_pkg::idx_width;
 
   ////////////////////////////////
@@ -108,6 +117,8 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #(
 
   localparam int unsigned ResultQueueDepth = 2;
 
+  vaddr_t addr_d, addr_q;
+
   // There is a result queue per lane, holding the results that were not
   // yet accepted by the corresponding lane.
   typedef struct packed {
@@ -220,6 +231,7 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #(
     out_pnt_d     = out_pnt_q;
     vrf_pnt_d     = vrf_pnt_q;
     state_d       = state_q;
+    addr_d        = addr_q;
 
     result_queue_d           = result_queue_q;
     result_queue_valid_d     = result_queue_valid_q;
@@ -268,6 +280,9 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #(
               // Start writing at the middle of the destination vector
               vrf_pnt_d = vinsn_issue_q.stride >> $clog2(8*NrLanes);
 
+              // Fix the starting address
+              addr_d = vaddr_offset(addr_q, vrf_pnt_d, vinsn_issue_q.vd);
+
               // Go to SLIDE_RUN_VSLIDE1UP_FIRST_WORD if this is a vslide1up instruction
               if (vinsn_issue_q.use_scalar_op)
                 state_d = SLIDE_RUN_VSLIDE1UP_FIRST_WORD;
@@ -349,8 +364,7 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #(
           // Initialize id and addr fields of the result queue requests
           for (int lane = 0; lane < NrLanes; lane++) begin
             result_queue_d[result_queue_write_pnt_q][lane].id   = vinsn_issue_q.id;
-            result_queue_d[result_queue_write_pnt_q][lane].addr =
-              vaddr(vinsn_issue_q.vd, NrLanes) + vrf_pnt_q;
+            result_queue_d[result_queue_write_pnt_q][lane].addr = addr_q;
           end
 
           // Bump pointers (reductions always finish in one shot)
@@ -409,8 +423,8 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #(
             if (vinsn_issue_q.op inside {VSLIDEUP, VSLIDEDOWN})
               mask_ready_o = !vinsn_issue_q.vm;
 
-            // Increment VRF address
-            vrf_pnt_d = vrf_pnt_q + 1;
+            // Increment write-back address
+            addr_d = vaddr_offset(addr_q, 1, vinsn_issue_q.vd);
 
             // Send result to the VRF
             result_queue_cnt_d += 1;
@@ -466,6 +480,8 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #(
             // Increment vector instruction queue pointers and counters
             vinsn_queue_d.issue_pnt += 1;
             vinsn_queue_d.issue_cnt -= 1;
+
+            addr_d = vaddr(vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vd, NrLanes);
           end
         end
       end
@@ -500,6 +516,8 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #(
           // Increment vector instruction queue pointers and counters
           vinsn_queue_d.issue_pnt += 1;
           vinsn_queue_d.issue_cnt -= 1;
+
+          addr_d = vaddr(vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vd, NrLanes);
         end
       end
       SLIDE_WAIT_OSUM: begin
@@ -607,6 +625,9 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #(
         // VSLIDE1UP always writes at least 1 element
         if (pe_req_i.op == VSLIDEUP && !pe_req_i.use_scalar_op)
           issue_cnt_d -= vinsn_queue_d.vinsn[vinsn_queue_q.accept_pnt].stride;
+
+        // Initialize the starting address for the next instruction
+        addr_d = vaddr(pe_req_i.vd, NrLanes);
       end
       if (vinsn_queue_d.commit_cnt == '0) begin
         commit_cnt_d = pe_req_i.op inside {VSLIDEUP, VSLIDEDOWN}
@@ -638,6 +659,7 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #(
       pe_resp_o          <= '0;
       result_final_gnt_q <= '0;
       red_stride_cnt_q   <= 1;
+      addr_q             <= '0;
     end else begin
       vinsn_running_q    <= vinsn_running_d;
       issue_cnt_q        <= issue_cnt_d;
@@ -649,6 +671,7 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #(
       pe_resp_o          <= pe_resp;
       result_final_gnt_q <= result_final_gnt_d;
       red_stride_cnt_q   <= red_stride_cnt_d;
+      addr_q             <= addr_d;
     end
   end
 
diff --git a/hardware/src/vlsu/addrgen.sv b/hardware/src/vlsu/addrgen.sv
index def21df8e..2fbe05e55 100644
--- a/hardware/src/vlsu/addrgen.sv
+++ b/hardware/src/vlsu/addrgen.sv
@@ -89,8 +89,8 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #(
   logic             axi_addrgen_queue_empty;
 
   fifo_v3 #(
-    .DEPTH(4                ),
-    .dtype(addrgen_axi_req_t)
+    .DEPTH(VaddrgenInsnQueueDepth),
+    .dtype(addrgen_axi_req_t     )
   ) i_addrgen_req_queue (
     .clk_i     (clk_i                                                    ),
     .rst_ni    (rst_ni                                                   ),
diff --git a/hardware/src/vlsu/vldu.sv b/hardware/src/vlsu/vldu.sv
index 51042ed8e..6f94d9ec1 100644
--- a/hardware/src/vlsu/vldu.sv
+++ b/hardware/src/vlsu/vldu.sv
@@ -8,16 +8,22 @@
 // upon receiving vector memory operations.
 
 module vldu import ara_pkg::*; import rvv_pkg::*; #(
-    parameter  int  unsigned NrLanes = 0,
-    parameter  type          vaddr_t = logic,  // Type used to address vector register file elements
+    parameter  int  unsigned NrLanes         = 0,
+    // Address of an element in the lane's VRF
+    localparam int  unsigned MaxVLenBPerLane = VLENB / NrLanes,      // In bytes
+    localparam int  unsigned VRFBSizePerLane = MaxVLenBPerLane * 32, // In bytes
+    localparam int  unsigned VaddrIdxWidth   = $clog2(VRFBSizePerLane),
+    localparam int  unsigned VaddrBankWidth  = $clog2(NrVRFBanksPerLane),
+    localparam int  unsigned VaddrVregWidth  = $clog2(MaxVLenBPerLane),
+    localparam type          vaddr_t         = logic [VaddrIdxWidth-1:0],
     // AXI Interface parameters
-    parameter  int  unsigned AxiDataWidth = 0,
-    parameter  int  unsigned AxiAddrWidth = 0,
-    parameter  type          axi_r_t      = logic,
+    parameter  int  unsigned AxiDataWidth    = 0,
+    parameter  int  unsigned AxiAddrWidth    = 0,
+    parameter  type          axi_r_t         = logic,
     // Dependant parameters. DO NOT CHANGE!
-    localparam int           DataWidth    = $bits(elen_t),
-    localparam type          strb_t       = logic[DataWidth/8-1:0],
-    localparam type          axi_addr_t   = logic [AxiAddrWidth-1:0]
+    localparam int           DataWidth       = $bits(elen_t),
+    localparam type          strb_t          = logic[DataWidth/8-1:0],
+    localparam type          axi_addr_t      = logic [AxiAddrWidth-1:0]
   ) (
     input  logic                           clk_i,
     input  logic                           rst_ni,
@@ -33,6 +39,10 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
     input  logic             [NrVInsn-1:0] pe_vinsn_running_i,
     output logic                           pe_req_ready_o,
     output pe_resp_t                       pe_resp_o,
+    // Hazard handling to main sequencer
+    output vid_t                           commit_id_o,
+    output logic                           commit_id_valid_o,
+    input  logic                           hazard_i,
     // Interface with the address generator
     input  addrgen_axi_req_t               axi_addrgen_req_i,
     input  logic                           axi_addrgen_req_valid_i,
@@ -51,7 +61,11 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
     output logic                           mask_ready_o
   );
 
+  // Include address-handling functions
+  `include "../../include/ara_vaddr.svh"
+
   import cf_math_pkg::idx_width;
+
   import axi_pkg::beat_lower_byte;
   import axi_pkg::beat_upper_byte;
   import axi_pkg::BURST_INCR;
@@ -101,6 +115,9 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
   logic    vinsn_commit_valid;
   assign vinsn_commit       = vinsn_queue_q.vinsn[vinsn_queue_q.commit_pnt];
   assign vinsn_commit_valid = (vinsn_queue_q.commit_cnt != '0);
+  // To the main sequencer, for hazard checking
+  assign commit_id_valid_o = vinsn_commit_valid;
+  assign commit_id_o       = vinsn_commit.id;
 
   always_ff @(posedge clk_i or negedge rst_ni) begin
     if (!rst_ni) begin
@@ -118,6 +135,8 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
 
   localparam int unsigned ResultQueueDepth = 2;
 
+  vaddr_t addr_d, addr_q;
+
   // There is a result queue per lane, holding the results that were not
   // yet accepted by the corresponding lane.
   typedef struct packed {
@@ -197,6 +216,7 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
     vinsn_queue_d = vinsn_queue_q;
     issue_cnt_d   = issue_cnt_q;
     commit_cnt_d  = commit_cnt_q;
+    addr_d        = addr_q;
 
     len_d     = len_q;
     r_pnt_d   = r_pnt_q;
@@ -286,9 +306,7 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
         // Initialize id and addr fields of the result queue requests
         for (int lane = 0; lane < NrLanes; lane++) begin
           result_queue_d[result_queue_write_pnt_q][lane].id   = vinsn_issue_q.id;
-          result_queue_d[result_queue_write_pnt_q][lane].addr = vaddr(vinsn_issue_q.vd, NrLanes) +
-            (((vinsn_issue_q.vl - (issue_cnt_q >> int'(vinsn_issue_q.vtype.vsew))) / NrLanes) >>
-            (int'(EW64) - int'(vinsn_issue_q.vtype.vsew)));
+          result_queue_d[result_queue_write_pnt_q][lane].addr = addr_q;
         end
       end
 
@@ -304,6 +322,9 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
         // Trigger the request signal
         result_queue_valid_d[result_queue_write_pnt_q] = {NrLanes{1'b1}};
 
+        // Increase the address
+        addr_d = next_vaddr(addr_q, vinsn_issue_q.vd);
+
         // Acknowledge the mask operands
         mask_ready_o = !vinsn_issue_q.vm;
 
@@ -342,6 +363,9 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
         else
           vinsn_queue_d.issue_pnt += 1;
 
+        // Modify the next instruction's address
+        addr_d = vaddr(vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vd, NrLanes);
+
         // Prepare for the next vector instruction
         if (vinsn_queue_d.issue_cnt != 0)
           issue_cnt_d = vinsn_queue_q.vinsn[vinsn_queue_d.issue_pnt].vl << int'(vinsn_queue_q.vinsn[
@@ -354,7 +378,10 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
     //////////////////////////////////
 
     for (int lane = 0; lane < NrLanes; lane++) begin: result_write
-      ldu_result_req_o[lane]   = result_queue_valid_q[result_queue_read_pnt_q][lane];
+      // Create a request only if there are no more hazards on vd (check vs1 since the info about
+      // hazard vd is also there)
+      ldu_result_req_o[lane]   = result_queue_valid_q[result_queue_read_pnt_q][lane] &&
+                                 !vinsn_commit.hazard_vs1;
       ldu_result_addr_o[lane]  = result_queue_q[result_queue_read_pnt_q][lane].addr;
       ldu_result_id_o[lane]    = result_queue_q[result_queue_read_pnt_q][lane].id;
       ldu_result_wdata_o[lane] = result_queue_q[result_queue_read_pnt_q][lane].wdata;
@@ -415,6 +442,10 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
             vinsn_queue_d.commit_pnt].vtype.vsew);
     end
 
+    // Update the Vd hazard bit for the current instruction
+    // hazard_vs1, hazard_vs2, hazard_vm all contain the info about hazard_vd, so work on one of them (vs1)
+    if (commit_id_valid_o) vinsn_queue_d.vinsn[vinsn_queue_q.commit_pnt].hazard_vs1 &= {NrVInsn{hazard_i}};
+
     //////////////////////////////
     //  Accept new instruction  //
     //////////////////////////////
@@ -425,8 +456,10 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
       vinsn_running_d[pe_req_i.id]                  = 1'b1;
 
       // Initialize counters
-      if (vinsn_queue_d.issue_cnt == '0)
+      if (vinsn_queue_d.issue_cnt == '0) begin
         issue_cnt_d = pe_req_i.vl << int'(pe_req_i.vtype.vsew);
+        addr_d      = vaddr(pe_req_i.vd, NrLanes);
+      end
       if (vinsn_queue_d.commit_cnt == '0)
         commit_cnt_d = pe_req_i.vl << int'(pe_req_i.vtype.vsew);
 
@@ -447,6 +480,7 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
       vrf_pnt_q          <= '0;
       pe_resp_o          <= '0;
       result_final_gnt_q <= '0;
+      addr_q             <= '0;
     end else begin
       vinsn_running_q    <= vinsn_running_d;
       issue_cnt_q        <= issue_cnt_d;
@@ -456,6 +490,7 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #(
       vrf_pnt_q          <= vrf_pnt_d;
       pe_resp_o          <= pe_resp;
       result_final_gnt_q <= result_final_gnt_d;
+      addr_q             <= addr_d;
     end
   end
 
diff --git a/hardware/src/vlsu/vlsu.sv b/hardware/src/vlsu/vlsu.sv
index aa2e05283..c86b7ee15 100644
--- a/hardware/src/vlsu/vlsu.sv
+++ b/hardware/src/vlsu/vlsu.sv
@@ -44,6 +44,9 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #(
     output logic                    addrgen_ack_o,
     output logic                    addrgen_error_o,
     output vlen_t                   addrgen_error_vl_o,
+    output vid_t                    commit_id_o,
+    output logic                    commit_id_valid_o,
+    input  logic                    hazard_i,
     // Interface with the lanes
     // Store unit operands
     input  elen_t     [NrLanes-1:0] stu_operand_i,
@@ -155,8 +158,7 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #(
     .AxiAddrWidth(AxiAddrWidth),
     .AxiDataWidth(AxiDataWidth),
     .axi_r_t     (axi_r_t     ),
-    .NrLanes     (NrLanes     ),
-    .vaddr_t     (vaddr_t     )
+    .NrLanes     (NrLanes     )
   ) i_vldu (
     .clk_i                  (clk_i                     ),
     .rst_ni                 (rst_ni                    ),
@@ -172,6 +174,9 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #(
     .pe_vinsn_running_i     (pe_vinsn_running_i        ),
     .pe_req_ready_o         (pe_req_ready_o[OffsetLoad]),
     .pe_resp_o              (pe_resp_o[OffsetLoad]     ),
+    .commit_id_o            (commit_id_o               ),
+    .commit_id_valid_o      (commit_id_valid_o         ),
+    .hazard_i               (hazard_i                  ),
     // Interface with the address generator
     .axi_addrgen_req_i      (axi_addrgen_req           ),
     .axi_addrgen_req_valid_i(axi_addrgen_req_valid     ),