Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,11 @@ Sync finn-dev:

.setup_venv_from_whl: &setup_venv_from_whl
# Move everything to working directory (e.g., RAMdisk)
- cp -dfR .. $PATH_WORKDIR
- cp -dfR . $PATH_WORKDIR
- cd $PATH_WORKDIR
# Create fresh virtual environment and install finn-plus from .whl (artifact)
- python3 -m venv finn-plus-venv
- finn-plus-venv/bin/pip install ./finn-plus/dist/*.whl
- finn-plus-venv/bin/pip install dist/*.whl

Build:
id_tokens:
Expand Down Expand Up @@ -171,8 +171,8 @@ FINN Test Suite 2022.2:
- $JOB_MONITORING_DIR/monitor.sh $JOB_MONITORING_DIR/$CI_PIPELINE_ID/$HOSTNAME.log &
# Launch FINN via test command, includes preparation of (cached) dependencies
- |
source ./finn-plus-venv/bin/activate
finn test --variant $TEST_SUITE --dependency-path ./finn-plus/deps --build-path $FINN_BUILD_DIR --num-workers 1 --num-test-workers $PYTEST_PARALLEL
source finn-plus-venv/bin/activate
finn test --variant $TEST_SUITE --dependency-path ./deps --build-path $FINN_BUILD_DIR --num-workers 1 --num-test-workers $PYTEST_PARALLEL
artifacts:
name: "test_reports"
when: always
Expand Down
307 changes: 307 additions & 0 deletions custom_hls/instrumentation.template.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,307 @@
/******************************************************************************
* Copyright (c) 2023, Xilinx, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************
* @brief Instrumentation wrapper module for FINN IP characterization.
* @author Thomas B. Preusser <thomas.preusser@amd.com>
* @details
* Instrumentation wrapper intercepting the feature map input to and
* the feature map output from a FINN IP to measure processing latency and
* initiation interval in terms of clock cycles. The most recent readings
* are exposed via AXI-light.
* This wrapper can run the FINN IP detached from an external data source
* and sink by feeding LFSR-generated data and sinking the output without
* backpressure.
* This module is currently not integrated with the FINN compiler. It must
* be instantiated and integrated with the rest of the system in a manual
* process.
*
* @param PENDING maximum number of feature maps in the FINN dataflow pipeline
* @param ILEN number of input transactions per IFM
* @param OLEN number of output transactions per OFM
* @param KO number of subwords within output payload vector
* @param TI type of input payload vector
* @param TO type of output payload vector
*******************************************************************************/

#include <hls_stream.h>
#include <ap_int.h>
#include <ap_axi_sdata.h>
#include <algorithm>

// Module Configuration
constexpr unsigned PENDING = @PENDING@; // Max. feature maps in flight
constexpr unsigned ILEN = @ILEN@; // Input words per IFM
constexpr unsigned OLEN = @OLEN@; // Output words per OFM
constexpr unsigned KO = @KO@; // Subwords within OFM transaction word
using TI = @TI@; // IFM transaction word
using TO = @TO@; // OFM transaction word

//---------------------------------------------------------------------------
// Utility Functions
static constexpr unsigned clog2 (unsigned x) { return x<2? 0 : 1+clog2((x+1)/2); }
static constexpr unsigned clog2nz(unsigned x) { return std::max(1u, clog2(x)); }

template<typename T>
static void move(
hls::stream<T> &src,
hls::stream<T> &dst
) {
#pragma HLS pipeline II=1 style=flp
dst.write(src.read());
}

template<typename T>
static void move(
hls::stream<hls::axis<T, 0, 0, 0>> &src,
hls::stream<T> &dst
) {
#pragma HLS pipeline II=1 style=flp
dst.write(src.read().data);
}

template<typename T>
class Payload {
public:
using type = T;
};
template<typename T>
class Payload<hls::axis<T, 0, 0, 0>> {
public:
using type = T;
};

/**
* Computes a checksum over a forwarded stream assumed to carry frames of
* N words further subdivided into K subwords.
* - Subword slicing can be customized typically by using a lambda.
* The provided DefaultSubwordSlicer assumes an `ap_(u)int`-like word
* type with a member `width` and a range-based slicing operator. It
* further assumes a little-endian arrangement of subwords within words
* for the canonical subword stream order.
* - Subwords wider than 23 bits are folded using bitwise XOR across
* slices of 23 bits starting from the LSB.
* - The folded subword values are weighted according to their position
* in the stream relative to the start of frame by a periodic weight
* sequence 1, 2, 3, ...
* - The weighted folded subword values are reduced to a checksum by an
* accumulation module 2^24.
* - A checksum is emitted for each completed frame. It is the concatenation
* of an 8-bit (modulo 256) frame counter and the 24-bit frame checksum.
*/
template<typename T, unsigned K>
class DefaultSubwordSlicer {
static_assert(T::width%K == 0, "Word size must be subword multiple.");
static constexpr unsigned W = T::width/K;
public:
ap_uint<W> operator()(T const &x, unsigned const j) const {
#pragma HLS inline
return x((j+1)*W-1, j*W);
}
};

//---------------------------------------------------------------------------
// Instrumentation Core
template<
unsigned PENDING,
unsigned ILEN,
unsigned OLEN,
unsigned KO,
typename TI,
typename TO
>
void instrument(
hls::stream<TI> &finnix,
hls::stream<TO> &finnox,
ap_uint<32> cfg, // [0] - 0:hold, 1:lfsr; [31:16] - LFSR seed
ap_uint<32> &status, // [0] - timestamp overflow; [1] - timestamp underflow
ap_uint<32> &latency,
ap_uint<32> &interval,
ap_uint<32> &checksum,
ap_uint<32> &min_latency
) {
#pragma HLS pipeline II=1 style=flp

// Timestamp Management State
using clock_t = ap_uint<32>;
static clock_t cnt_clk = 0;
#pragma HLS reset variable=cnt_clk
hls::stream<clock_t> timestamps;
#pragma HLS stream variable=timestamps depth=PENDING
static bool timestamp_ovf = false;
static bool timestamp_unf = false;
#pragma HLS reset variable=timestamp_ovf
#pragma HLS reset variable=timestamp_unf

// Input Feed & Generation
constexpr unsigned LFSR_WIDTH = (TI::width+15)/16 * 16;
static ap_uint<clog2nz(ILEN)> icnt = 0;
static ap_uint<LFSR_WIDTH> lfsr;
#pragma HLS reset variable=icnt
#pragma HLS reset variable=lfsr off
if(!finnix.full()) {

bool const first = icnt == 0;
bool wr;
if(first) {
// Start of new feature map
wr = cfg[0];
for(unsigned i = 0; i < LFSR_WIDTH; i += 16) {
#pragma HLS unroll
lfsr(15+i, i) = cfg(31, 16) ^ (i>>4)*33331;
}
}
else {
// Advance LFSR
wr = true;
for(unsigned i = 0; i < LFSR_WIDTH; i += 16) {
#pragma HLS unroll
lfsr(15+i, i) = (lfsr(15+i, i) >> 1) ^ ap_uint<16>(lfsr[i]? 0 : 0x8805);
}
}

if(wr) {
finnix.write_nb(lfsr);
if(first) timestamp_ovf |= !timestamps.write_nb(cnt_clk);
icnt = icnt == ILEN-1? decltype(icnt)(0) : decltype(icnt)(icnt + 1);
}
}

// Output Tracking
static ap_uint<clog2nz(OLEN)> ocnt = 0;
#pragma HLS reset variable=ocnt
static clock_t ts1 = 0; // last output timestamp
static clock_t last_latency = 0;
static clock_t last_interval = 0;
static clock_t cur_min_latency = ~0;
#pragma HLS reset variable=ts1
#pragma HLS reset variable=last_latency
#pragma HLS reset variable=last_interval
#pragma HLS reset variable=cur_min_latency

static ap_uint<8> pkts = 0;
#pragma HLS reset variable=pkts
static ap_uint< 2> coeff[3];
static ap_uint<24> psum;
static ap_uint<32> last_checksum = 0;
#pragma HLS reset variable=coeff off
#pragma HLS reset variable=psum off
#pragma HLS reset variable=last_checksum

TO oval;
if(finnox.read_nb(oval)) {
// Start of new output feature map
if(ocnt == 0) {
for(unsigned i = 0; i < 3; i++) coeff[i] = i+1;
psum = 0;
}

// Update checksum
for(unsigned j = 0; j < KO; j++) {
#pragma HLS unroll
auto const v0 = DefaultSubwordSlicer<TO, KO>()(oval, j);
constexpr unsigned W = 1 + (decltype(v0)::width-1)/23;
ap_uint<KO*23> v = v0;
ap_uint< 23> w = 0;
for(unsigned k = 0; k < W; k++) w ^= v(23*k+22, 23*k);
psum += (coeff[j%3][1]? (w, ap_uint<1>(0)) : ap_uint<24>(0)) + (coeff[j%3][0]? w : ap_uint<23>(0));
}

// Re-align coefficients
for(unsigned j = 0; j < 3; j++) {
#pragma HLS unroll
ap_uint<3> const cc = coeff[j] + ap_uint<3>(KO%3);
coeff[j] = cc(1, 0) + cc[2];
}

// Track frame position
if(ocnt != OLEN-1) ocnt++;
else {
clock_t ts0;
if(!timestamps.read_nb(ts0)) timestamp_unf = true;
else {
last_latency = cnt_clk - ts0; // completion - start
last_interval = cnt_clk - ts1; // completion - previous completion
cur_min_latency = std::min(cur_min_latency, last_latency);
ts1 = cnt_clk; // mark completion ^
}
ocnt = 0;

last_checksum = (pkts++, psum);
}
}

// Advance Timestamp Counter
cnt_clk++;

// Copy Status Outputs
status = timestamp_ovf | (timestamp_unf << 1);
latency = last_latency;
interval = last_interval;
checksum = last_checksum;
min_latency = cur_min_latency;

} // instrument()

void instrumentation_wrapper(
hls::stream<TI> &finnix,
hls::stream<TO> &finnox,
ap_uint<32> cfg,
ap_uint<32> &status,
ap_uint<32> &latency,
ap_uint<32> &interval,
ap_uint<32> &checksum,
ap_uint<32> &min_latency
) {
#pragma HLS interface axis port=finnix
#pragma HLS interface axis port=finnox
#pragma HLS interface s_axilite bundle=ctrl port=cfg
#pragma HLS interface s_axilite bundle=ctrl port=status
#pragma HLS interface s_axilite bundle=ctrl port=latency
#pragma HLS interface s_axilite bundle=ctrl port=interval
#pragma HLS interface s_axilite bundle=ctrl port=checksum
#pragma HLS interface s_axilite bundle=ctrl port=min_latency
#pragma HLS interface ap_ctrl_none port=return

#pragma HLS dataflow disable_start_propagation
static hls::stream<TI> finnix0;
static hls::stream<Payload<TO>::type> finnox0;
#pragma HLS stream variable=finnix0 depth=2
#pragma HLS stream variable=finnox0 depth=2

// AXI-Stream -> FIFO
move(finnox, finnox0);

// Main
instrument<PENDING, ILEN, OLEN, KO>(finnix0, finnox0, cfg, status, latency, interval, checksum, min_latency);

// FIFO -> AXI-Stream
move(finnix0, finnix);

} // instrumentation_wrapper
Loading