Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion finn-rtllib/layernorm/accuf.sv
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ module accuf #(

input logic [31:0] a,
input logic avld,
input logic alst,
input logic alst, // complete sum
output logic [31:0] s,
output logic svld
);
Expand Down
2 changes: 1 addition & 1 deletion finn-rtllib/layernorm/binopf.sv
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ module binopf #(
logic [1:0] ovf;
logic [1:0] unf;
always_ff @(posedge clk) begin
automatic logic [1:0] msk = { HAVE_MUL && Vld[2], HAVE_ADD && rvld };
automatic logic [1:0] msk = { HAVE_MUL && Vld[1+HAVE_MUL], HAVE_ADD && rvld };
assert(!(inv & msk)) else $warning("%m generated invalid output.");
assert(!(ovf & msk)) else $warning("%m generated an overflow.");
assert(!(unf & msk)) else $warning("%m generated an underflow.");
Expand Down
29 changes: 10 additions & 19 deletions finn-rtllib/layernorm/layernorm.sv
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,6 @@ module layernorm #(
$error("%m: SIMD(%0d) must divide N(%0d).", SIMD, N);
$finish;
end
if(NN <= 12) begin
$error("%m: N/SIMD must be larger than 12 for rsqrt throughput.");
$finish;
end
end

typedef logic [31:0] fp32;
Expand Down Expand Up @@ -142,20 +138,12 @@ module layernorm #(
// Balancing edge delays in trees with incomplete leaf level
typedef bit edge_delays_t[2*SIMD-1];
function edge_delays_t INIT_EDGE_DELAYS();
localparam int unsigned LEVELS = 1+$clog2(SIMD);
// Use binary encoding of number of short leaves, `sig`, to infer
// most common parent for retiming on each level.
localparam int unsigned FULL_FANIN = 2**$clog2(SIMD);
automatic edge_delays_t d = '{ default: 0 };
// Put delay onto leaves that are not on last level
for(int unsigned i = SIMD-1; i < 2*SIMD-1; i++) begin
if($clog2(i+2) == LEVELS) break;
d[i] = 1;
end
// Move delay shared between children to their parent
for(int unsigned i = SIMD-1; i > 0; i--) begin
if(d[2*i+1]) begin
d[2*i+1] = 0;
d[2*i+2] = 0;
d[i] = 1;
end
for(int unsigned sig = FULL_FANIN - SIMD, i = FULL_FANIN - 1; sig; i >>= 1, sig >>= 1) begin
d[i-sig] = sig[0];
end
return d;
endfunction : INIT_EDGE_DELAYS
Expand Down Expand Up @@ -210,7 +198,10 @@ module layernorm #(
end
1: /* Var: inverse square root */ begin
uwire vrdy;
rsqrtf #(.FORCE_BEHAVIORAL(FORCE_BEHAVIORAL)) vari_rsqurt (
rsqrtf #(
.SUSTAINABLE_INTERVAL(NN),
.FORCE_BEHAVIORAL(FORCE_BEHAVIORAL)
) vari_rsqurt (
.clk, .rst,
.x(total.dat), .xvld(total.vld), .xrdy(vrdy),
.r(norm .dat), .rvld(norm .vld)
Expand Down Expand Up @@ -259,7 +250,7 @@ module layernorm #(
else Credit <= Credit + (issue == settle? 0 : settle? 1 : -1);
end

logic signed [$clog2(NN-1):0] Cnt = 0; // [-NN,] -NN+1, ..., -1, 0
logic signed [$clog2(NN):0] Cnt = 0; // [-NN,] -NN+1, ..., -1, 0
assign norm0_rdy = !Cnt[$left(Cnt)];
assign issue = have_cap && (norm0.vld || Cnt[$left(Cnt)]);
uwire bload = norm0.vld && norm0_rdy;
Expand Down
Loading
Loading