Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- `VLXE` and `VSXE` need to wait that the SlideAddrGenA opreq is free before being issued by the lane sequencer to the operand requester stage
- Do not trap instructions with no operands in the main sequencer
- Commit a reduction only after a grant from the VRF
- Decouple `cmdBuffer` and `dataBuffer` depth parameters in the operand queues

### Added

Expand Down Expand Up @@ -104,6 +105,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Add support for vector mask population count and find first set bit instructions: `vcpop.m`, `vfirst.m`
- Add Spyglass linting script
- Add parametrized support for Fixed-Point math
- Add support for Barber's Pole VRF Layout

### Changed

Expand Down Expand Up @@ -134,6 +136,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Adapt `fdotproduct` to `dotproduct` structure
- Pre-calculate next-cycle `aligned_start_address` in `addrgen` for timing reasons
- Add `is_reduct` signal to the operand queues, to gate the neutral value filling
- Handle WAW and WAR `vload` hazards in the `VLDU` without stalling the main sequencer
- Reductions are no more treated as widening instructions for what concerns WAW hazards in the operand requesters
- `slide1x` instructions are now not stalled in the main sequencer, but the hazard is handled downstream

## 2.2.0 - 2021-11-02

Expand Down
11 changes: 6 additions & 5 deletions hardware/include/ara_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ package ara_pkg;
localparam int unsigned ValuInsnQueueDepth = 4;
localparam int unsigned VlduInsnQueueDepth = 4;
localparam int unsigned VstuInsnQueueDepth = 4;
localparam int unsigned VaddrgenInsnQueueDepth = 4;
localparam int unsigned SlduInsnQueueDepth = 2;
localparam int unsigned NoneInsnQueueDepth = 1;
// Ara supports MaskuInsnQueueDepth = 1 only.
Expand Down Expand Up @@ -299,6 +300,8 @@ package ara_pkg;
logic wide_fp_imm;
// Resizing of FP conversions
resize_e cvt_resize;
// Widening and vslide1x instructions have different hazard stall policies
logic special_hazard;

// Vector machine metadata
vlen_t vl;
Expand Down Expand Up @@ -396,6 +399,8 @@ package ara_pkg;
logic wide_fp_imm;
// Resizing of FP conversions
resize_e cvt_resize;
// Widening and vslide1x instructions have different hazard stall policies
logic special_hazard;

// Vector machine metadata
vlen_t vl;
Expand Down Expand Up @@ -877,11 +882,6 @@ package ara_pkg;
// Each lane has eight VRF banks
localparam int unsigned NrVRFBanksPerLane = 8;

// Find the starting address of a vector register vid
function automatic logic [63:0] vaddr(logic [4:0] vid, int NrLanes);
vaddr = vid * (VLENB / NrLanes / 8);
endfunction: vaddr

// Differenciate between SLDU and ADDRGEN operands from opqueue
typedef enum logic {
ALU_SLDU = 1'b0,
Expand All @@ -898,6 +898,7 @@ package ara_pkg;
logic scale_vl; // Rescale vl taking into account the new and old EEW

resize_e cvt_resize; // Resizing of FP conversions
logic special_hazard; // Widening and vslide1x instructions have different hazard stall policies

logic is_reduct; // Is this a reduction?

Expand Down
80 changes: 80 additions & 0 deletions hardware/include/ara_vaddr.svh
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
//
// Author: Matteo Perotti <mperotti@iis.ee.ethz.ch>
// Description:
// Ara's functions to calculate VRF addresses. Not in the package
// since the functions depend on `NrLanes`

// All the functions to support a Barber-Pole VRF layout

// Find the starting VRF address of a vector register vid
function automatic vaddr_t vaddr(logic [4:0] vid, int NrLanes);
// This is not an adder, it's only wires.
// (this holds if VLENB / NrLanes >= NrVRFBanksPerLane^2)
vaddr = vid * (VLENB / NrLanes / NrVRFBanksPerLane) + vid[VaddrBankWidth-1:0];
endfunction: vaddr

// Return the physical address of the next element of a certain vector
function automatic vaddr_t next_vaddr(vaddr_t vaddr, logic [4:0] vid);
// vaddr msbs -> byte index in a bank
logic [VaddrIdxWidth-1:VaddrBankWidth] index, old_index;
// vaddr lsbs -> bank index
logic [VaddrBankWidth-1:0] bank;

index = vaddr[VaddrIdxWidth-1:VaddrBankWidth];
bank = vaddr[VaddrBankWidth-1:0];

old_index = index;

// Increment bank counter
bank += 1;
if (bank == vid[VaddrBankWidth-1:0])
// Wrap around
index += 1;

// If we change vreg, the start element position is +1 (LMUL > 1)
// This is important for B layout consistency among different LMUL
// or when inactive element policy is "undistrubed"
if (index[VaddrVregWidth] != old_index[VaddrVregWidth])
bank += 1;

return {index, bank};
endfunction

// Initialize with an offset (necessary with vslideup)
function automatic vaddr_t vaddr_offset(vaddr_t vaddr, vaddr_t off, logic [4:0] vid);
// vaddr msbs -> byte index in a bank
logic [VaddrIdxWidth-1:VaddrBankWidth] index, old_index;
// vaddr lsbs -> bank index
logic [VaddrBankWidth-1:0] bank, old_bank;

index = vaddr[VaddrIdxWidth-1:VaddrBankWidth];
bank = vaddr[VaddrBankWidth-1:0];

old_index = index;
old_bank = bank;

// Increment bank counter
index += off[VaddrIdxWidth-1:VaddrBankWidth];
bank += off[VaddrBankWidth-1:0];
// Support vstart != 0: don't hypothesize that old_bank == vid[VaddrBankWidth-1:0]
// Wrap around if we meet vid[VaddrBankWidth-1:0] during the addition
if (old_bank > vid[VaddrBankWidth-1:0]) begin
if (bank >= vid[VaddrBankWidth-1:0] && bank < old_bank)
// Wrap around
index += 1;
end else if (old_bank < vid[VaddrBankWidth-1:0]) begin
if (bank >= vid[VaddrBankWidth-1:0] || bank < old_bank)
// Wrap around
index += 1;
end

// If we change vreg, the start element position is +1
// for every reg passed (LMUL > 1). The max reg id delta is 7
// with LMUL == 8.
bank += index[VaddrVregWidth +: 3] - old_index[VaddrVregWidth +: 3];

return {index, bank};
endfunction
25 changes: 17 additions & 8 deletions hardware/src/ara.sv
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,10 @@ module ara import ara_pkg::*; #(
logic [NrVInsn-1:0][NrVInsn-1:0] global_hazard_table;
// Ready for lane 0 (scalar operand fwd)
logic pe_scalar_resp_ready;
// VLDU Hazard checking
vid_t vldu_commit_id;
logic vldu_commit_id_valid;
logic vldu_hazard;

// Mask unit operands
elen_t [NrLanes-1:0][NrMaskFUnits+2-1:0] masku_operand;
Expand Down Expand Up @@ -178,7 +182,11 @@ module ara import ara_pkg::*; #(
// Interface with the address generator
.addrgen_ack_i (addrgen_ack ),
.addrgen_error_i (addrgen_error ),
.addrgen_error_vl_i (addrgen_error_vl )
.addrgen_error_vl_i (addrgen_error_vl ),
// Interface with the VLDU for hazard handling
.vldu_commit_id_i (vldu_commit_id ),
.vldu_commit_id_valid_i(vldu_commit_id_valid ),
.vldu_hazard_o (vldu_hazard )
);

// Scalar move support
Expand Down Expand Up @@ -234,9 +242,9 @@ module ara import ara_pkg::*; #(

for (genvar lane = 0; lane < NrLanes; lane++) begin: gen_lanes
lane #(
.NrLanes (NrLanes ),
.FPUSupport (FPUSupport ),
.FixPtSupport(FixPtSupport)
.NrLanes (NrLanes ),
.FPUSupport (FPUSupport ),
.FixPtSupport (FixPtSupport )
) i_lane (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
Expand Down Expand Up @@ -344,6 +352,9 @@ module ara import ara_pkg::*; #(
.addrgen_ack_o (addrgen_ack ),
.addrgen_error_o (addrgen_error ),
.addrgen_error_vl_o (addrgen_error_vl ),
.commit_id_o (vldu_commit_id ),
.commit_id_valid_o (vldu_commit_id_valid ),
.hazard_i (vldu_hazard ),
// Interface with the Mask unit
.mask_i (mask ),
.mask_valid_i (mask_valid ),
Expand Down Expand Up @@ -377,8 +388,7 @@ module ara import ara_pkg::*; #(
logic sldu_mask_ready;

sldu #(
.NrLanes(NrLanes),
.vaddr_t(vaddr_t)
.NrLanes(NrLanes)
) i_sldu (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
Expand Down Expand Up @@ -413,8 +423,7 @@ module ara import ara_pkg::*; #(
/////////////////

masku #(
.NrLanes(NrLanes),
.vaddr_t(vaddr_t)
.NrLanes(NrLanes)
) i_masku (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
Expand Down
Loading