From 7c04eb6e628cd21820bcef02ff624edfa3702b22 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Fri, 14 Feb 2025 16:31:29 +0000 Subject: [PATCH 01/17] Integrate instrumentation into ZynqBuild --- custom_hls/instrumentation.template.cpp | 307 ++++++++++++++++++ custom_hls/instrumentation_sim.template.tcl | 67 ++++ custom_hls/instrumentation_tb.template.sv | 172 ++++++++++ src/finn/builder/build_dataflow_config.py | 4 + src/finn/builder/build_dataflow_steps.py | 22 ++ .../transformation/fpgadataflow/floorplan.py | 8 +- .../fpgadataflow/instrumentation.py | 203 ++++++++++++ .../fpgadataflow/make_zynq_proj.py | 88 ++++- 8 files changed, 860 insertions(+), 11 deletions(-) create mode 100644 custom_hls/instrumentation.template.cpp create mode 100644 custom_hls/instrumentation_sim.template.tcl create mode 100644 custom_hls/instrumentation_tb.template.sv create mode 100644 src/finn/transformation/fpgadataflow/instrumentation.py diff --git a/custom_hls/instrumentation.template.cpp b/custom_hls/instrumentation.template.cpp new file mode 100644 index 0000000000..bf15d77a87 --- /dev/null +++ b/custom_hls/instrumentation.template.cpp @@ -0,0 +1,307 @@ +/****************************************************************************** + * Copyright (c) 2023, Xilinx, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************* + * @brief Instrumentation wrapper module for FINN IP characterization. + * @author Thomas B. Preusser + * @details + * Instrumentation wrapper intercepting the feature map input to and + * the feature map output from a FINN IP to measure processing latency and + * initiation interval in terms of clock cycles. The most recent readings + * are exposed via AXI-light. + * This wrapper can run the FINN IP detached from an external data source + * and sink by feeding LFSR-generated data and sinking the output without + * backpressure. + * This module is currently not integrated with the FINN compiler. It must + * be instantiated and integrated with the rest of the system in a manual + * process. + * + * @param PENDING maximum number of feature maps in the FINN dataflow pipeline + * @param ILEN number of input transactions per IFM + * @param OLEN number of output transactions per OFM + * @param KO number of subwords within output payload vector + * @param TI type of input payload vector + * @param TO type of output payload vector + *******************************************************************************/ + + #include + #include + #include + #include + + // Module Configuration + constexpr unsigned PENDING = @PENDING@; // Max. feature maps in flight + constexpr unsigned ILEN = @ILEN@; // Input words per IFM + constexpr unsigned OLEN = @OLEN@; // Output words per OFM + constexpr unsigned KO = @KO@; // Subwords within OFM transaction word + using TI = @TI@; // IFM transaction word + using TO = @TO@; // OFM transaction word + + //--------------------------------------------------------------------------- + // Utility Functions + static constexpr unsigned clog2 (unsigned x) { return x<2? 0 : 1+clog2((x+1)/2); } + static constexpr unsigned clog2nz(unsigned x) { return std::max(1u, clog2(x)); } + + template + static void move( + hls::stream &src, + hls::stream &dst + ) { + #pragma HLS pipeline II=1 style=flp + dst.write(src.read()); + } + + template + static void move( + hls::stream> &src, + hls::stream &dst + ) { + #pragma HLS pipeline II=1 style=flp + dst.write(src.read().data); + } + + template + class Payload { + public: + using type = T; + }; + template + class Payload> { + public: + using type = T; + }; + + /** + * Computes a checksum over a forwarded stream assumed to carry frames of + * N words further subdivided into K subwords. + * - Subword slicing can be customized typically by using a lambda. + * The provided DefaultSubwordSlicer assumes an `ap_(u)int`-like word + * type with a member `width` and a range-based slicing operator. It + * further assumes a little-endian arrangement of subwords within words + * for the canonical subword stream order. + * - Subwords wider than 23 bits are folded using bitwise XOR across + * slices of 23 bits starting from the LSB. + * - The folded subword values are weighted according to their position + * in the stream relative to the start of frame by a periodic weight + * sequence 1, 2, 3, ... + * - The weighted folded subword values are reduced to a checksum by an + * accumulation module 2^24. + * - A checksum is emitted for each completed frame. It is the concatenation + * of an 8-bit (modulo 256) frame counter and the 24-bit frame checksum. + */ + template + class DefaultSubwordSlicer { + static_assert(T::width%K == 0, "Word size must be subword multiple."); + static constexpr unsigned W = T::width/K; + public: + ap_uint operator()(T const &x, unsigned const j) const { + #pragma HLS inline + return x((j+1)*W-1, j*W); + } + }; + + //--------------------------------------------------------------------------- + // Instrumentation Core + template< + unsigned PENDING, + unsigned ILEN, + unsigned OLEN, + unsigned KO, + typename TI, + typename TO + > + void instrument( + hls::stream &finnix, + hls::stream &finnox, + ap_uint<32> cfg, // [0] - 0:hold, 1:lfsr; [31:16] - LFSR seed + ap_uint<32> &status, // [0] - timestamp overflow; [1] - timestamp underflow + ap_uint<32> &latency, + ap_uint<32> &interval, + ap_uint<32> &checksum, + ap_uint<32> &min_latency + ) { + #pragma HLS pipeline II=1 style=flp + + // Timestamp Management State + using clock_t = ap_uint<32>; + static clock_t cnt_clk = 0; + #pragma HLS reset variable=cnt_clk + hls::stream timestamps; + #pragma HLS stream variable=timestamps depth=PENDING + static bool timestamp_ovf = false; + static bool timestamp_unf = false; + #pragma HLS reset variable=timestamp_ovf + #pragma HLS reset variable=timestamp_unf + + // Input Feed & Generation + constexpr unsigned LFSR_WIDTH = (TI::width+15)/16 * 16; + static ap_uint icnt = 0; + static ap_uint lfsr; + #pragma HLS reset variable=icnt + #pragma HLS reset variable=lfsr off + if(!finnix.full()) { + + bool const first = icnt == 0; + bool wr; + if(first) { + // Start of new feature map + wr = cfg[0]; + for(unsigned i = 0; i < LFSR_WIDTH; i += 16) { + #pragma HLS unroll + lfsr(15+i, i) = cfg(31, 16) ^ (i>>4)*33331; + } + } + else { + // Advance LFSR + wr = true; + for(unsigned i = 0; i < LFSR_WIDTH; i += 16) { + #pragma HLS unroll + lfsr(15+i, i) = (lfsr(15+i, i) >> 1) ^ ap_uint<16>(lfsr[i]? 0 : 0x8805); + } + } + + if(wr) { + finnix.write_nb(lfsr); + if(first) timestamp_ovf |= !timestamps.write_nb(cnt_clk); + icnt = icnt == ILEN-1? decltype(icnt)(0) : decltype(icnt)(icnt + 1); + } + } + + // Output Tracking + static ap_uint ocnt = 0; + #pragma HLS reset variable=ocnt + static clock_t ts1 = 0; // last output timestamp + static clock_t last_latency = 0; + static clock_t last_interval = 0; + static clock_t cur_min_latency = ~0; + #pragma HLS reset variable=ts1 + #pragma HLS reset variable=last_latency + #pragma HLS reset variable=last_interval + #pragma HLS reset variable=cur_min_latency + + static ap_uint<8> pkts = 0; + #pragma HLS reset variable=pkts + static ap_uint< 2> coeff[3]; + static ap_uint<24> psum; + static ap_uint<32> last_checksum = 0; + #pragma HLS reset variable=coeff off + #pragma HLS reset variable=psum off + #pragma HLS reset variable=last_checksum + + TO oval; + if(finnox.read_nb(oval)) { + // Start of new output feature map + if(ocnt == 0) { + for(unsigned i = 0; i < 3; i++) coeff[i] = i+1; + psum = 0; + } + + // Update checksum + for(unsigned j = 0; j < KO; j++) { + #pragma HLS unroll + auto const v0 = DefaultSubwordSlicer()(oval, j); + constexpr unsigned W = 1 + (decltype(v0)::width-1)/23; + ap_uint v = v0; + ap_uint< 23> w = 0; + for(unsigned k = 0; k < W; k++) w ^= v(23*k+22, 23*k); + psum += (coeff[j%3][1]? (w, ap_uint<1>(0)) : ap_uint<24>(0)) + (coeff[j%3][0]? w : ap_uint<23>(0)); + } + + // Re-align coefficients + for(unsigned j = 0; j < 3; j++) { + #pragma HLS unroll + ap_uint<3> const cc = coeff[j] + ap_uint<3>(KO%3); + coeff[j] = cc(1, 0) + cc[2]; + } + + // Track frame position + if(ocnt != OLEN-1) ocnt++; + else { + clock_t ts0; + if(!timestamps.read_nb(ts0)) timestamp_unf = true; + else { + last_latency = cnt_clk - ts0; // completion - start + last_interval = cnt_clk - ts1; // completion - previous completion + cur_min_latency = std::min(cur_min_latency, last_latency); + ts1 = cnt_clk; // mark completion ^ + } + ocnt = 0; + + last_checksum = (pkts++, psum); + } + } + + // Advance Timestamp Counter + cnt_clk++; + + // Copy Status Outputs + status = timestamp_ovf | (timestamp_unf << 1); + latency = last_latency; + interval = last_interval; + checksum = last_checksum; + min_latency = cur_min_latency; + + } // instrument() + + void instrumentation_wrapper( + hls::stream &finnix, + hls::stream &finnox, + ap_uint<32> cfg, + ap_uint<32> &status, + ap_uint<32> &latency, + ap_uint<32> &interval, + ap_uint<32> &checksum, + ap_uint<32> &min_latency + ) { + #pragma HLS interface axis port=finnix + #pragma HLS interface axis port=finnox + #pragma HLS interface s_axilite bundle=ctrl port=cfg + #pragma HLS interface s_axilite bundle=ctrl port=status + #pragma HLS interface s_axilite bundle=ctrl port=latency + #pragma HLS interface s_axilite bundle=ctrl port=interval + #pragma HLS interface s_axilite bundle=ctrl port=checksum + #pragma HLS interface s_axilite bundle=ctrl port=min_latency + #pragma HLS interface ap_ctrl_none port=return + + #pragma HLS dataflow disable_start_propagation + static hls::stream finnix0; + static hls::stream::type> finnox0; + #pragma HLS stream variable=finnix0 depth=2 + #pragma HLS stream variable=finnox0 depth=2 + + // AXI-Stream -> FIFO + move(finnox, finnox0); + + // Main + instrument(finnix0, finnox0, cfg, status, latency, interval, checksum, min_latency); + + // FIFO -> AXI-Stream + move(finnix0, finnix); + + } // instrumentation_wrapper diff --git a/custom_hls/instrumentation_sim.template.tcl b/custom_hls/instrumentation_sim.template.tcl new file mode 100644 index 0000000000..4875d799e2 --- /dev/null +++ b/custom_hls/instrumentation_sim.template.tcl @@ -0,0 +1,67 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of AMD nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +set fpga_part @FPGA_PART@ +#set output_root ".." +# path to IP folder for instrumentation wrapper, change as needed +#set instrwrp_ip_dir "$output_root/instrumentation_wrapper/project_instrwrap/sol1/impl/ip" +# path to IP folder for FINN IP, change as needed +#set finn_ip_dir "$output_root/stitched_ip/ip" + +create_project -force instr_sim_proj instr_sim_proj/ -part $fpga_part +create_bd_design "dut" +update_compile_order -fileset sources_1 +#set_property ip_repo_paths [list $instrwrp_ip_dir] [current_project] +set_property ip_repo_paths [concat [get_property ip_repo_paths [current_project]] @IP_DIRS_STR@] [current_project] +update_ip_catalog + + +create_bd_cell -type ip -vlnv xilinx_finn:finn:finn_design:1.0 finn_design_0 +create_bd_cell -type ip -vlnv xilinx.com:hls:instrumentation_wrapper:1.0 instrumentation_wrap_0 +connect_bd_intf_net [get_bd_intf_pins instrumentation_wrap_0/finnix] [get_bd_intf_pins finn_design_0/s_axis_0] +connect_bd_intf_net [get_bd_intf_pins finn_design_0/m_axis_0] [get_bd_intf_pins instrumentation_wrap_0/finnox] +make_bd_intf_pins_external [get_bd_intf_pins instrumentation_wrap_0/s_axi_ctrl] +make_bd_pins_external [get_bd_pins instrumentation_wrap_0/ap_clk] +make_bd_pins_external [get_bd_pins instrumentation_wrap_0/ap_rst_n] +connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins finn_design_0/ap_clk] +connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins finn_design_0/ap_rst_n] + +save_bd_design + +update_compile_order -fileset sources_1 +make_wrapper -files [get_files instr_sim_proj/instr_sim_proj.srcs/sources_1/bd/dut/dut.bd] -top +add_files -norecurse instr_sim_proj/instr_sim_proj.gen/sources_1/bd/dut/hdl/dut_wrapper.v + +set_property SOURCE_SET sources_1 [get_filesets sim_1] +add_files -fileset sim_1 ./instrwrap_testbench.sv +update_compile_order -fileset sim_1 + +set_property synth_checkpoint_mode None [get_files instr_sim_proj/instr_sim_proj.srcs/sources_1/bd/dut/dut.bd] +generate_target Simulation [get_files instr_sim_proj/instr_sim_proj.srcs/sources_1/bd/dut/dut.bd] +launch_simulation -simset sim_1 -mode behavioral +run all diff --git a/custom_hls/instrumentation_tb.template.sv b/custom_hls/instrumentation_tb.template.sv new file mode 100644 index 0000000000..933104c623 --- /dev/null +++ b/custom_hls/instrumentation_tb.template.sv @@ -0,0 +1,172 @@ +// Copyright (c) 2023 Advanced Micro Devices, Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of AMD nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +module tb #( + // sampling period (in cycles) for reading instrumentation wrapper registers + // TODO: make configurable or adjust automatically? + int unsigned INSTR_READ_PERIOD = 10000, + // 16-bit LFSR seed for generating fixed random data + int unsigned LFSR_SEED = 1 +)(); + + +// Clock & Reset +logic ap_clk = 0; +always #5ns ap_clk = !ap_clk; +logic ap_rst_n = 0; +uwire ap_rst = !ap_rst_n; + +// wires for instrumentation wrapper AXI lite interface +logic [31:0] axilite_ctrl_araddr = 'x; +uwire axilite_ctrl_arready; +logic axilite_ctrl_arvalid = 0; +logic [31:0] axilite_ctrl_awaddr = 'x; +uwire axilite_ctrl_awready; +logic axilite_ctrl_awvalid = 0; +uwire axilite_ctrl_bready = 1; +uwire [1:0]axilite_ctrl_bresp; +uwire axilite_ctrl_bvalid; +uwire [31:0]axilite_ctrl_rdata; +logic axilite_ctrl_rready = 1; +uwire [1:0]axilite_ctrl_rresp; +uwire axilite_ctrl_rvalid; +logic [31:0] axilite_ctrl_wdata = 'x; +uwire axilite_ctrl_wready; +uwire [3:0]axilite_ctrl_wstrb = 4'b1111; +logic axilite_ctrl_wvalid = 0; + + + + +dut_wrapper dut_wrapper_inst ( + .ap_clk_0(ap_clk), .ap_rst_n_0(ap_rst_n), + .s_axi_ctrl_0_araddr(axilite_ctrl_araddr), + .s_axi_ctrl_0_arready(axilite_ctrl_arready), + .s_axi_ctrl_0_arvalid(axilite_ctrl_arvalid), + .s_axi_ctrl_0_awaddr(axilite_ctrl_awaddr), + .s_axi_ctrl_0_awready(axilite_ctrl_awready), + .s_axi_ctrl_0_awvalid(axilite_ctrl_awvalid), + .s_axi_ctrl_0_bready(axilite_ctrl_bready), + .s_axi_ctrl_0_bresp(axilite_ctrl_bresp), + .s_axi_ctrl_0_bvalid(axilite_ctrl_bvalid), + .s_axi_ctrl_0_rdata(axilite_ctrl_rdata), + .s_axi_ctrl_0_rready(axilite_ctrl_rready), + .s_axi_ctrl_0_rresp(axilite_ctrl_rresp), + .s_axi_ctrl_0_rvalid(axilite_ctrl_rvalid), + .s_axi_ctrl_0_wdata(axilite_ctrl_wdata), + .s_axi_ctrl_0_wready(axilite_ctrl_wready), + .s_axi_ctrl_0_wstrb(axilite_ctrl_wstrb), + .s_axi_ctrl_0_wvalid(axilite_ctrl_wvalid) +); + +//--------------------------------------------------------------------------- + +initial begin + $timeformat(-9, 2, " ns"); + // perform reset + repeat(100) @(posedge ap_clk); + ap_rst_n <= 1; + $display("Reset complete"); + repeat(100) @(posedge ap_clk); + // instrumentation wrapper configuration: + // set up LFSR seed + start data generation + output sink + axilite_ctrl_awaddr <= 'h10; + axilite_ctrl_awvalid <= 1; + axilite_ctrl_wdata <= (LFSR_SEED << 16) | 'b11; + axilite_ctrl_wvalid <= 1; + repeat(8) begin + @(posedge ap_clk); + if(axilite_ctrl_wready && axilite_ctrl_awready) break; + end + axilite_ctrl_wvalid <= 0; + axilite_ctrl_awvalid <= 0; + axilite_ctrl_awaddr <= 'x; + axilite_ctrl_wdata <= 'x; + while(1) begin + axilite_ctrl_araddr <= 'h18; + axilite_ctrl_arvalid <= 1; + repeat(8) begin + @(posedge ap_clk); + if(axilite_ctrl_rvalid) begin + $display("[t=%0t] STATUS_I = %0d", $time, axilite_ctrl_rdata); + break; + end + end + axilite_ctrl_araddr <= 'h20; + axilite_ctrl_arvalid <= 1; + repeat(8) begin + @(posedge ap_clk); + if(axilite_ctrl_rvalid) begin + $display("[t=%0t] STATUS_O = %0d", $time, axilite_ctrl_rdata); + break; + end + end + axilite_ctrl_araddr <= 'h28; + axilite_ctrl_arvalid <= 1; + repeat(8) begin + @(posedge ap_clk); + if(axilite_ctrl_rvalid) begin + $display("[t=%0t] LATENCY = %0d", $time, axilite_ctrl_rdata); + break; + end + end + axilite_ctrl_araddr <= 'h38; + axilite_ctrl_arvalid <= 1; + repeat(8) begin + @(posedge ap_clk); + if(axilite_ctrl_rvalid) begin + $display("[t=%0t] INTERVAL = %0d", $time, axilite_ctrl_rdata); + break; + end + end + axilite_ctrl_araddr <= 'h48; + axilite_ctrl_arvalid <= 1; + repeat(8) begin + @(posedge ap_clk); + if(axilite_ctrl_rvalid) begin + $display("[t=%0t] CHECKSUM = %8x", $time, axilite_ctrl_rdata); + if(axilite_ctrl_rdata) begin + $display("Nonzero checksum detected, stopping simulation"); + $finish; + // TODO: simulate for configurable number of frames, like this: + // if(axilite_ctrl_rdata[31:24] == 47) begin + // $display("Frame number 48 detected, stopping simulation"); + // $finish; + // end + end + break; + end + end + axilite_ctrl_arvalid <= 0; + repeat(INSTR_READ_PERIOD) @(posedge ap_clk); + end +end + + +endmodule : tb diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py index d6437a2e5c..08545ebc14 100644 --- a/src/finn/builder/build_dataflow_config.py +++ b/src/finn/builder/build_dataflow_config.py @@ -314,6 +314,10 @@ class DataflowBuildConfig: #: debug signals in the generated hardware) enable_hw_debug: Optional[bool] = False + #: Whether the accelerator will be simulated and synthesized with an + #: instrumentation wrapper attached to accurately measure performance. + enable_instrumentation: Optional[bool] = False + #: Whether pdb postmortem debuggig will be launched when the build fails enable_build_pdb_debug: Optional[bool] = True diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index 5163b2dbdb..a4481ed778 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -89,6 +89,7 @@ from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.insert_dwc import InsertDWC from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO +from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild from finn.transformation.fpgadataflow.minimize_accumulator_width import ( @@ -644,6 +645,26 @@ def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig): """Create stitched IP for a graph after all HLS IP blocks have been generated. Depends on the DataflowOutputType.STITCHED_IP output product.""" + # introduce tLAST marker, required for instrumentation + if cfg.enable_instrumentation: + model = model.transform( + InsertTLastMarker( + # only insert marker on output (input TLAST is ignored for these use-cases anyway) + both=False, + # use ap_axiu instead of qdma_axis + external=False, + # static number of iterations (based on what the compiler/folding sets up) + dynamic=False, + ) + ) + # give a proper name to the inserted node, important for codegen + # TODO: deal with multi-I/O accelerators? + model.graph.node[-1].name = "TLastMarker_0" + # re-run codegen and HLS IP gen, will affect only the new TLastMarker layer assuming + # all other IPs have been generated already + model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())) + model = model.transform(HLSSynthIP()) + if DataflowOutputType.STITCHED_IP in cfg.generate_outputs: stitched_ip_dir = cfg.output_dir + "/stitched_ip" model = model.transform( @@ -806,6 +827,7 @@ def step_synthesize_bitfile(model: ModelWrapper, cfg: DataflowBuildConfig): cfg.board, cfg.synth_clk_period_ns, cfg.enable_hw_debug, + cfg.enable_instrumentation, partition_model_dir=partition_model_dir, ) ) diff --git a/src/finn/transformation/fpgadataflow/floorplan.py b/src/finn/transformation/fpgadataflow/floorplan.py index b24145afcb..7d93ff88fc 100644 --- a/src/finn/transformation/fpgadataflow/floorplan.py +++ b/src/finn/transformation/fpgadataflow/floorplan.py @@ -99,9 +99,13 @@ def apply(self, model): # if we have SLR assignment already. use that if node_slr != -1: continue + # if available, use the SLR of the preceding node srcnode = model.find_producer(node.input[0]) - node_slr = getCustomOp(srcnode).get_nodeattr("slr") - node_inst.set_nodeattr("slr", node_slr) + if srcnode is not None: + node_slr = getCustomOp(srcnode).get_nodeattr("slr") + node_inst.set_nodeattr("slr", node_slr) + else: + node_inst.set_nodeattr("slr", default_slr) if unassigned_nodes > 0: warnings.warn( diff --git a/src/finn/transformation/fpgadataflow/instrumentation.py b/src/finn/transformation/fpgadataflow/instrumentation.py new file mode 100644 index 0000000000..7f37c5ed14 --- /dev/null +++ b/src/finn/transformation/fpgadataflow/instrumentation.py @@ -0,0 +1,203 @@ +import numpy as np +import os +import subprocess +from qonnx.custom_op.registry import getCustomOp +from qonnx.transformation.base import Transformation + +from finn.custom_op.fpgadataflow.templates import ipgentcl_template +from finn.util.basic import make_build_dir +from finn.util.hls import CallHLS + + +# TODO: duplicate function from make_zynq_proj.py +def collect_ip_dirs(model, ipstitch_path): + # collect list of all IP dirs + ip_dirs = [] + need_memstreamer = False + for node in model.graph.node: + node_inst = getCustomOp(node) + ip_dir_value = node_inst.get_nodeattr("ip_path") + assert os.path.isdir( + ip_dir_value + ), """The directory that should + contain the generated ip blocks doesn't exist.""" + ip_dirs += [ip_dir_value] + if node.op_type.startswith("MVAU") or node.op_type == "Thresholding_hls": + if node_inst.get_nodeattr("mem_mode") == "internal_decoupled": + need_memstreamer = True + ip_dirs += [ipstitch_path + "/ip"] + if need_memstreamer: + # add RTL streamer IP + ip_dirs.append("$::env(FINN_ROOT)/finn-rtllib/memstream") + return ip_dirs + + +class GenerateInstrumentationIP(Transformation): + def __init__( + self, + fpga_part, + clk_period_ns, + format="ip", # "ip" for Vivado (Zynq) or "xo" for Vitis (Alveo/Versal) + ): + super().__init__() + self.fpga_part = fpga_part + self.clk_period_ns = clk_period_ns + self.format = format + + def apply(self, model): + # Create directory for code-gen and HLS of instrumentation IP + wrapper_output_dir = make_build_dir(prefix="code_gen_ipgen_Instrumentation_") + model.set_metadata_prop("instrumentation_ipgen", wrapper_output_dir) + + # conservative max for pending feature maps: number of layers + pending = len(model.graph.node) + # query the parallelism-dependent folded input shape from the + # node consuming the graph input + inp_name = model.graph.input[0].name + inp_node = getCustomOp(model.find_consumer(inp_name)) + inp_shape_folded = list(inp_node.get_folded_input_shape()) + inp_stream_width = inp_node.get_instream_width_padded() + # number of beats per input is given by product of folded input + # shape except the last dim (which is the stream width) + ilen = np.prod(inp_shape_folded[:-1]) + ti = "ap_uint<%d>" % inp_stream_width + # perform the same for the output + out_name = model.graph.output[0].name + out_node = getCustomOp(model.find_producer(out_name)) + out_shape_folded = list(out_node.get_folded_output_shape()) + out_stream_width = out_node.get_outstream_width_padded() + olen = np.prod(out_shape_folded[:-1]) + to = "ap_uint<%d>" % out_stream_width + ko = out_shape_folded[-1] + # fill out instrumentation wrapper template + with open( + os.path.join(os.environ["FINN_ROOT"], "custom_hls", "instrumentation.template.cpp"), "r" + ) as f: + instrwrp_cpp = f.read() + instrwrp_cpp = instrwrp_cpp.replace("@PENDING@", str(pending)) + instrwrp_cpp = instrwrp_cpp.replace("@ILEN@", str(ilen)) + instrwrp_cpp = instrwrp_cpp.replace("@OLEN@", str(olen)) + instrwrp_cpp = instrwrp_cpp.replace("@TI@", str(ti)) + instrwrp_cpp = instrwrp_cpp.replace("@TO@", str(to)) + instrwrp_cpp = instrwrp_cpp.replace("@KO@", str(ko)) + with open(wrapper_output_dir + "/top_instrumentation_wrapper.cpp", "w") as f: + f.write(instrwrp_cpp) + # fill out HLS synthesis tcl template + prjname = "project_instrwrap" + ipgentcl = ipgentcl_template + ipgentcl = ipgentcl.replace("$PROJECTNAME$", prjname) + ipgentcl = ipgentcl.replace("$HWSRCDIR$", wrapper_output_dir) + ipgentcl = ipgentcl.replace("$TOPFXN$", "instrumentation_wrapper") + ipgentcl = ipgentcl.replace("$FPGAPART$", self.fpga_part) + ipgentcl = ipgentcl.replace("$CLKPERIOD$", str(self.clk_period_ns)) + ipgentcl = ipgentcl.replace("$DEFAULT_DIRECTIVES$", "") + if self.format == "xo": + # use Vitis RTL kernel (.xo) output instead of IP-XACT + ipgentcl = ipgentcl.replace("$EXTRA_DIRECTIVES$", "config_export -format xo") + ipgentcl = ipgentcl.replace( + "export_design -format ip_catalog", "export_design -format xo" + ) + else: + ipgentcl = ipgentcl.replace("$EXTRA_DIRECTIVES$", "") + with open(wrapper_output_dir + "/hls_syn.tcl", "w") as f: + f.write(ipgentcl) + # build bash script to launch HLS synth and call it + code_gen_dir = wrapper_output_dir + builder = CallHLS() + builder.append_tcl(code_gen_dir + "/hls_syn.tcl") + builder.set_ipgen_path(code_gen_dir + "/{}".format(prjname)) + builder.build(code_gen_dir) + ipgen_path = builder.ipgen_path + assert os.path.isdir(ipgen_path), "HLS IPGen failed: %s not found" % (ipgen_path) + ip_path = ipgen_path + "/sol1/impl/ip" + assert os.path.isdir(ip_path), "HLS IPGen failed: %s not found. Check log under %s" % ( + ip_path, + code_gen_dir, + ) + if self.format == "xo": + assert False, "Not implemented" + # TODO: export for use in VitisBuild or VersalBuild + # xo_dir = self.output_dir + "/xo" + # xo_dir = str(os.path.abspath(xo_dir)) + # os.makedirs(xo_dir, exist_ok=True) + # xo_path = code_gen_dir + "/{}/sol1/impl/export.xo".format(prjname) + # xo_instr_path = xo_dir + "/instrumentation_wrapper.xo" + # shutil.copy(xo_path, xo_instr_path) + else: + # shutil.move(ip_path, self.output_dir) + pass + + return (model, False) + + +class PrepareInstrumentationSim(Transformation): + def __init__(self, fpga_part): + super().__init__() + self.fpga_part = fpga_part + + def apply(self, model): + # Create directory for simulation of instrumentation IP + FINN IP + sim_output_dir = make_build_dir(prefix="sim_Instrumentation_") + model.set_metadata_prop("instrumentation_sim", sim_output_dir) + + # check if instrumentation IP was generated + instr_ip_dir = model.get_metadata_prop("instrumentation_ipgen") + if instr_ip_dir is None or (not os.path.isdir(instr_ip_dir)): + raise Exception( + "Instrumentation IP not generated, run GenerateInstrumentationIP first." + ) + + # TODO: Support simulation with AXI-lite control interfaces (e.g., for dynamic pipelines) + # fill in testbench template + with open( + os.path.join(os.environ["FINN_ROOT"], "custom_hls", "instrumentation_tb.template.sv"), + "r", + ) as f: + testbench_sv = f.read() + with open(sim_output_dir + "/instrwrap_testbench.sv", "w") as f: + f.write(testbench_sv) + # fill in testbench project creator template + with open( + os.path.join(os.environ["FINN_ROOT"], "custom_hls", "instrumentation_sim.template.tcl"), + "r", + ) as f: + testbench_tcl = f.read() + + # collect ip repo paths for finn accelerator sub cores so Vivado can find them + ipstitch_path = model.get_metadata_prop("vivado_stitch_proj") + ip_dirs = ["list"] + ip_dirs += collect_ip_dirs(model, ipstitch_path) + ip_dirs += [instr_ip_dir] + ip_dirs_str = "[%s]" % (" ".join(ip_dirs)) + testbench_tcl = testbench_tcl.replace("@FPGA_PART@", self.fpga_part) + testbench_tcl = testbench_tcl.replace("@IP_DIRS_STR@", ip_dirs_str) + with open(sim_output_dir + "/make_instrwrap_sim_proj.tcl", "w") as f: + f.write(testbench_tcl) + + return (model, False) + + +class RunInstrumentationSim(Transformation): + def __init__(self): + super().__init__() + + def apply(self, model): + sim_output_dir = model.get_metadata_prop("instrumentation_sim") + if sim_output_dir is None or (not os.path.isdir(sim_output_dir)): + raise Exception( + "Instrumentation sim not prepared, run PrepareInstrumentationSim first." + ) + + # Prepare bash script + bash_script = os.getcwd() + "/report_power.sh" + with open(bash_script, "w") as script: + script.write("#!/bin/bash\n") + script.write("cd %s\n" % (sim_output_dir)) + script.write("vivado -mode batch -source make_instrwrap_sim_proj.tcl\n") + + # Run script + print("Running Vivado simulation of instrumentation wrapper") + sub_proc = subprocess.Popen(["bash", bash_script]) + sub_proc.communicate() + + return (model, False) diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py index 63ce2d3cbf..8192c09bae 100644 --- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py +++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py @@ -45,6 +45,7 @@ from finn.transformation.fpgadataflow.insert_dwc import InsertDWC from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA +from finn.transformation.fpgadataflow.instrumentation import GenerateInstrumentationIP from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers from finn.util.basic import make_build_dir, pynq_native_port_width, pynq_part_map @@ -102,6 +103,42 @@ def apply(self, model): axilite_idx = 0 global_clk_ns = 0 instance_names = {} + + # instantiate instrumentation IP if it was generated + instr_ip_dir = model.get_metadata_prop("instrumentation_ipgen") + if instr_ip_dir is not None and os.path.isdir(instr_ip_dir): + use_instrumentation = True + # update IP repository + config.append( + "set_property ip_repo_paths " + "[concat [get_property ip_repo_paths [current_project]] [list %s]] " + "[current_project]" % instr_ip_dir + ) + config.append("update_ip_catalog -rebuild -scan_changes") + # create instance + config.append( + "create_bd_cell -type ip -vlnv %s %s" + % ("xilinx.com:hls:instrumentation_wrapper:1.0", "instrumentation_wrap_0") + ) + # connect clock % reset + config.append( + "connect_bd_net [get_bd_pins instrumentation_wrap_0/ap_clk] " + "[get_bd_pins smartconnect_0/aclk]" + ) + config.append( + "connect_bd_net [get_bd_pins instrumentation_wrap_0/ap_rst_n] " + "[get_bd_pins smartconnect_0/aresetn]" + ) + # connect AXI-lite control interface + config.append( + "connect_bd_intf_net [get_bd_intf_pins instrumentation_wrap_0/s_axi_ctrl] " + "[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" % (axilite_idx) + ) + config.append("assign_axi_addr_proc instrumentation_wrap_0/s_axi_ctrl") + axilite_idx += 1 + else: + use_instrumentation = False + for node in model.graph.node: assert node.op_type == "StreamingDataflowPartition", "Invalid link graph" sdp_node = getCustomOp(node) @@ -150,7 +187,8 @@ def apply(self, model): # define kernel instances # name kernels connected to graph inputs as idmaxx # name kernels connected to graph outputs as odmaxx - if (producer is None) or (consumer == []): + # do not expect IDMA/ODMA when instrumentation is enabled + if not use_instrumentation and ((producer is None) or (consumer == [])): # TODO not a good way of checking for external inp&out # should look at the list of top-level in/out instead if producer is None: @@ -228,6 +266,26 @@ def apply(self, model): ) ) + # connect first/last dataflow partition to instrumentation wrapper + if use_instrumentation: + if producer is None: + config.append( + "connect_bd_intf_net [get_bd_intf_pins %s/s_axis_0] " + "[get_bd_intf_pins instrumentation_wrap_0/finnix]" + % (instance_names[node.name]) + ) + if consumer == []: + config.append( + "connect_bd_intf_net [get_bd_intf_pins %s/m_axis_0] " + "[get_bd_intf_pins instrumentation_wrap_0/finnox]" + % (instance_names[node.name]) + ) + + # TODO: WORKAROUND, do not instantiate smartconnect when not needed! + if use_instrumentation: + config.append("delete_bd_objs [get_bd_cells smartconnect_0]") + aximm_idx = 1 + # create a temporary folder for the project vivado_pynq_proj_dir = make_build_dir(prefix="vivado_zynq_proj_") model.set_metadata_prop("vivado_pynq_proj", vivado_pynq_proj_dir) @@ -305,6 +363,7 @@ def __init__( platform, period_ns, enable_debug=False, + enable_instrumentation=False, partition_model_dir=None, ): super().__init__() @@ -313,19 +372,27 @@ def __init__( self.period_ns = period_ns self.platform = platform self.enable_debug = enable_debug + self.enable_instrumentation = enable_instrumentation self.partition_model_dir = partition_model_dir def apply(self, model): # first infer layouts model = model.transform(InferDataLayouts()) # prepare at global level, then break up into kernels - prep_transforms = [ - InsertIODMA(self.axi_port_width), - InsertDWC(), - SpecializeLayers(self.fpga_part), - Floorplan(), - CreateDataflowPartition(partition_model_dir=self.partition_model_dir), - ] + if self.enable_instrumentation: + prep_transforms = [ + GenerateInstrumentationIP(self.fpga_part, self.period_ns), + Floorplan(), + CreateDataflowPartition(partition_model_dir=self.partition_model_dir), + ] + else: + prep_transforms = [ + InsertIODMA(self.axi_port_width), + InsertDWC(), + SpecializeLayers(self.fpga_part), + Floorplan(), + CreateDataflowPartition(partition_model_dir=self.partition_model_dir), + ] for trn in prep_transforms: model = model.transform(trn) model = model.transform(GiveUniqueNodeNames()) @@ -337,7 +404,10 @@ def apply(self, model): sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") kernel_model = ModelWrapper(dataflow_model_filename) - kernel_model = kernel_model.transform(InsertFIFO()) + # InsertFIFO at this stage interferes with tLastMarker + # TODO: is this really needed here at all? + if not self.enable_instrumentation: + kernel_model = kernel_model.transform(InsertFIFO()) kernel_model = kernel_model.transform(SpecializeLayers(self.fpga_part)) kernel_model = kernel_model.transform(GiveUniqueNodeNames(prefix)) kernel_model.save(dataflow_model_filename) From 419e18f65d67e3b8f498a9f4620123f1170582bf Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Wed, 19 Feb 2025 16:10:48 +0000 Subject: [PATCH 02/17] Nest AXI interconnects if required --- .../fpgadataflow/make_zynq_proj.py | 94 +++++++++++++++++-- 1 file changed, 87 insertions(+), 7 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py index 8192c09bae..5e86a58b6e 100644 --- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py +++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py @@ -27,6 +27,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import math import os import subprocess from qonnx.core.modelwrapper import ModelWrapper @@ -100,6 +101,9 @@ def apply(self, model): idma_idx = 0 odma_idx = 0 aximm_idx = 0 + nested_interconnect_count = 0 + master_axilite_idx = 0 + axilite_interconnect_idx = 0 axilite_idx = 0 global_clk_ns = 0 instance_names = {} @@ -132,13 +136,62 @@ def apply(self, model): # connect AXI-lite control interface config.append( "connect_bd_intf_net [get_bd_intf_pins instrumentation_wrap_0/s_axi_ctrl] " - "[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" % (axilite_idx) + "[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" % (master_axilite_idx) ) config.append("assign_axi_addr_proc instrumentation_wrap_0/s_axi_ctrl") - axilite_idx += 1 + master_axilite_idx += 1 else: use_instrumentation = False + # instantiate nested AXI interconnects if required + # only the nested interconnects and all interfaces connected before this line + # will be connected to the original (master) interconnect + total_axilite_count = 0 + for node in model.graph.node: + sdp_node = getCustomOp(node) + dataflow_model_filename = sdp_node.get_nodeattr("model") + kernel_model = ModelWrapper(dataflow_model_filename) + ifnames = eval(kernel_model.get_metadata_prop("vivado_stitch_ifnames")) + total_axilite_count += len(ifnames["axilite"]) + if total_axilite_count > (64 - master_axilite_idx): + nested_interconnect_count = math.ceil(total_axilite_count / 64.0) + for i in range(1, nested_interconnect_count + 1): + # create instance + config.append( + "create_bd_cell -type ip -vlnv $interconnect_vlnv axi_interconnect_%d" % (i) + ) + # configure instance + config.append( + "set_property -dict [list CONFIG.NUM_MI %d] [get_bd_cells axi_interconnect_%d]" + % (max(64, total_axilite_count), i) + ) + # connect to master interconnect + config.append( + "connect_bd_intf_net [get_bd_intf_pins axi_interconnect_0/M%02d_AXI] -boundary_type upper [get_bd_intf_pins axi_interconnect_%d/S00_AXI]" + % (master_axilite_idx, i) + ) + # connect clocks TODO: suppport zynq_7000 + config.append( + "apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_%d/ACLK]" + % (i) + ) + config.append( + "apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_%d/S00_ACLK]" + % (i) + ) + # connect reset + config.append( + "connect_bd_net [get_bd_pins axi_interconnect_%d/ARESETN] [get_bd_pins axi_interconnect_0/ARESETN]" + % (i) + ) + master_axilite_idx += 1 + total_axilite_count = min(0, total_axilite_count - 64) + + assert total_axilite_count == 0, "Not all AXI-lite interfaces connected!" + + # start populating the first nested interconnect + axilite_interconnect_idx = 1 + for node in model.graph.node: assert node.op_type == "StreamingDataflowPartition", "Invalid link graph" sdp_node = getCustomOp(node) @@ -211,8 +264,13 @@ def apply(self, model): assert axilite_intf_name is not None config.append( "connect_bd_intf_net [get_bd_intf_pins %s/%s] " - "[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" - % (instance_names[node.name], axilite_intf_name, axilite_idx) + "[get_bd_intf_pins axi_interconnect_%d/M%02d_AXI]" + % ( + instance_names[node.name], + axilite_intf_name, + axilite_interconnect_idx, + axilite_idx, + ) ) # assign_bd_address with appropriate range/offset config.append( @@ -221,6 +279,11 @@ def apply(self, model): aximm_idx += 1 axilite_idx += 1 + if axilite_idx == 64: + axilite_interconnect_idx += 1 + axilite_idx = 0 + if axilite_interconnect_idx == 0: + master_axilite_idx += 1 else: instance_names[node.name] = node.name config.append( @@ -230,8 +293,13 @@ def apply(self, model): for axilite_intf_name in ifnames["axilite"]: config.append( "connect_bd_intf_net [get_bd_intf_pins %s/%s] " - "[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" - % (instance_names[node.name], axilite_intf_name, axilite_idx) + "[get_bd_intf_pins axi_interconnect_%d/M%02d_AXI]" + % ( + instance_names[node.name], + axilite_intf_name, + axilite_interconnect_idx, + axilite_idx, + ) ) # assign_bd_address with appropriate range/offset config.append( @@ -239,6 +307,11 @@ def apply(self, model): % (instance_names[node.name], axilite_intf_name) ) axilite_idx += 1 + if axilite_idx == 64: + axilite_interconnect_idx += 1 + axilite_idx = 0 + if axilite_interconnect_idx == 0: + master_axilite_idx += 1 sdp_node.set_nodeattr("instance_name", instance_names[node.name]) config.append( @@ -286,6 +359,13 @@ def apply(self, model): config.append("delete_bd_objs [get_bd_cells smartconnect_0]") aximm_idx = 1 + # finalize nested interconnect clock TODO: support zynq_7000 + for i in range(1, nested_interconnect_count + 1): + config.append( + "apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} } [get_bd_pins axi_interconnect_%d/M*_ACLK]" + % (i) + ) + # create a temporary folder for the project vivado_pynq_proj_dir = make_build_dir(prefix="vivado_zynq_proj_") model.set_metadata_prop("vivado_pynq_proj", vivado_pynq_proj_dir) @@ -300,7 +380,7 @@ def apply(self, model): templates.custom_zynq_shell_template % ( fclk_mhz, - axilite_idx, + master_axilite_idx, aximm_idx, self.platform, pynq_part_map[self.platform], From 5628ab2a1a2505ad4014626e885ddc11c8e59238 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Wed, 19 Feb 2025 16:25:07 +0000 Subject: [PATCH 03/17] Fix AXI interconnect connection --- src/finn/transformation/fpgadataflow/make_zynq_proj.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py index 5e86a58b6e..8c990a8b3d 100644 --- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py +++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py @@ -191,6 +191,8 @@ def apply(self, model): # start populating the first nested interconnect axilite_interconnect_idx = 1 + else: + axilite_idx = master_axilite_idx for node in model.graph.node: assert node.op_type == "StreamingDataflowPartition", "Invalid link graph" From 0c57d1b373527337f80ede1714a739cb83771bad Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Wed, 19 Feb 2025 22:19:16 +0000 Subject: [PATCH 04/17] Make floorplan partitioning of AXI-lite interfaces more consistent --- .../transformation/fpgadataflow/floorplan.py | 39 ++++++++++++------- .../fpgadataflow/make_zynq_proj.py | 4 +- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/floorplan.py b/src/finn/transformation/fpgadataflow/floorplan.py index 7d93ff88fc..0b806ff44a 100644 --- a/src/finn/transformation/fpgadataflow/floorplan.py +++ b/src/finn/transformation/fpgadataflow/floorplan.py @@ -134,25 +134,27 @@ def apply(self, model): ) non_dma_nodes = list(filter(lambda x: x not in dyn_tlastmarker_nodes, non_dma_nodes)) + # assign every DMA node to its own partition for node in dma_nodes: node_inst = getCustomOp(node) node_inst.set_nodeattr("partition_id", partition_cnt) partition_cnt += 1 + # assign every dynamic tLastMarker node to its own partition for node in dyn_tlastmarker_nodes: node_inst = getCustomOp(node) node_inst.set_nodeattr("partition_id", partition_cnt) partition_cnt += 1 + # handle remaining nodes for node in non_dma_nodes: pre_node = model.find_producer(node.input[0]) node_inst = getCustomOp(node) if pre_node not in non_dma_nodes: - # input node + # input node -> start new partition node_inst.set_nodeattr("partition_id", partition_cnt) partition_cnt += 1 continue - elif not ( node.op_type.startswith("MVAU") and node_inst.get_nodeattr("mem_mode") is not None @@ -160,25 +162,36 @@ def apply(self, model): ): pre_nodes = model.find_direct_predecessors(node) else: + # exception for external weight MVAU: only consider primary input + # TODO: (why) is this necessary? should we consider such exceptions for other cases? pre_nodes = [pre_node] + axilite_intf_name = node_inst.get_verilog_top_module_intf_names()["axilite"] + if len(axilite_intf_name) != 0: + # This node has an AXI-Lite interface -> start new partition + node_inst.set_nodeattr("partition_id", partition_cnt) + partition_cnt += 1 + continue + + # examine all predecessor nodes to determine partition id for this node node_slr = node_inst.get_nodeattr("slr") + slr_mismatch_count = 0 for pre_node in pre_nodes: pre_inst = getCustomOp(pre_node) pre_slr = pre_inst.get_nodeattr("slr") if node_slr == pre_slr: - axilite_intf_name = pre_inst.get_verilog_top_module_intf_names()["axilite"] - if len(axilite_intf_name) != 0: - node_inst.set_nodeattr("partition_id", partition_cnt) - partition_cnt += 1 - else: - partition_id = pre_inst.get_nodeattr("partition_id") - node_inst.set_nodeattr("partition_id", partition_id) - + # Default case -> assign to same partition as predecessor + partition_id = pre_inst.get_nodeattr("partition_id") + node_inst.set_nodeattr("partition_id", partition_id) + break else: - # no matching, new partition - node_inst.set_nodeattr("partition_id", partition_cnt) - partition_cnt += 1 + # SLR mismatch with predecessor, can't assign same partition + slr_mismatch_count += 1 + + if slr_mismatch_count == len(pre_nodes): + # SLR mismatch with ALL predecessors -> start new partition + node_inst.set_nodeattr("partition_id", partition_cnt) + partition_cnt += 1 # save the updated floorplan floorplan = model.analysis(floorplan_params) diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py index 8c990a8b3d..4d2ee3d50e 100644 --- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py +++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py @@ -163,7 +163,7 @@ def apply(self, model): # configure instance config.append( "set_property -dict [list CONFIG.NUM_MI %d] [get_bd_cells axi_interconnect_%d]" - % (max(64, total_axilite_count), i) + % (min(64, total_axilite_count), i) ) # connect to master interconnect config.append( @@ -185,7 +185,7 @@ def apply(self, model): % (i) ) master_axilite_idx += 1 - total_axilite_count = min(0, total_axilite_count - 64) + total_axilite_count = max(0, total_axilite_count - 64) assert total_axilite_count == 0, "Not all AXI-lite interfaces connected!" From 684459c76189c22b9aa004a7c0028ee1c77a5a0d Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Wed, 19 Feb 2025 22:56:06 +0000 Subject: [PATCH 05/17] Add GPIO IP for reset --- .../transformation/fpgadataflow/make_zynq_proj.py | 14 +++++++++++--- src/finn/transformation/fpgadataflow/templates.py | 11 +++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py index 4d2ee3d50e..456441bca8 100644 --- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py +++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py @@ -94,6 +94,7 @@ def __init__(self, platform, enable_debug=False): super().__init__() self.platform = platform self.enable_debug = 1 if enable_debug else 0 + self.enable_gpio_reset = 0 def apply(self, model): # create a config file and empty list of xo files @@ -112,6 +113,12 @@ def apply(self, model): instr_ip_dir = model.get_metadata_prop("instrumentation_ipgen") if instr_ip_dir is not None and os.path.isdir(instr_ip_dir): use_instrumentation = True + + # instantiate GPIO IP to trigger reset + self.enable_gpio_reset = 1 + # in the template this will connect to first port of interconnect_0 + master_axilite_idx += 1 + # update IP repository config.append( "set_property ip_repo_paths " @@ -170,7 +177,7 @@ def apply(self, model): "connect_bd_intf_net [get_bd_intf_pins axi_interconnect_0/M%02d_AXI] -boundary_type upper [get_bd_intf_pins axi_interconnect_%d/S00_AXI]" % (master_axilite_idx, i) ) - # connect clocks TODO: suppport zynq_7000 + # connect clocks/reset TODO: suppport zynq_7000 config.append( "apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_%d/ACLK]" % (i) @@ -179,7 +186,7 @@ def apply(self, model): "apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_%d/S00_ACLK]" % (i) ) - # connect reset + # connect reset TODO: probably unneeded config.append( "connect_bd_net [get_bd_pins axi_interconnect_%d/ARESETN] [get_bd_pins axi_interconnect_0/ARESETN]" % (i) @@ -361,7 +368,7 @@ def apply(self, model): config.append("delete_bd_objs [get_bd_cells smartconnect_0]") aximm_idx = 1 - # finalize nested interconnect clock TODO: support zynq_7000 + # finalize nested interconnect clock/reset TODO: support zynq_7000 for i in range(1, nested_interconnect_count + 1): config.append( "apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} } [get_bd_pins axi_interconnect_%d/M*_ACLK]" @@ -388,6 +395,7 @@ def apply(self, model): pynq_part_map[self.platform], config, self.enable_debug, + self.enable_gpio_reset, ) ) diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py index ccf4e7a943..0f6ba7c3c4 100644 --- a/src/finn/transformation/fpgadataflow/templates.py +++ b/src/finn/transformation/fpgadataflow/templates.py @@ -218,6 +218,17 @@ ] } +# set up GPIO to trigger reset +if {%d == 1} { + create_bd_cell -type ip -vlnv xilinx.com:ip:axi_gpio:2.0 axi_gpio_0 + set_property -dict [list CONFIG.C_ALL_OUTPUTS {1} CONFIG.C_DOUT_DEFAULT {0x00000001} CONFIG.C_GPIO_WIDTH {1}] [get_bd_cells axi_gpio_0] + connect_bd_intf_net [get_bd_intf_pins axi_gpio_0/S_AXI] -boundary_type upper [get_bd_intf_pins axi_interconnect_0/M00_AXI] + assign_axi_addr_proc axi_gpio_0/S_AXI + connect_bd_net [get_bd_pins axi_gpio_0/s_axi_aclk] [get_bd_pins axi_interconnect_0/ACLK] + connect_bd_net [get_bd_pins axi_gpio_0/s_axi_aresetn] [get_bd_pins axi_interconnect_0/ARESETN] + connect_bd_net [get_bd_pins axi_gpio_0/gpio_io_o] [get_bd_pins rst_zynq_ps_*/aux_reset_in] +} + #finalize clock and reset connections for interconnects if {$ZYNQ_TYPE == "zynq_us+"} { apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} } [get_bd_pins axi_interconnect_0/M*_ACLK] From 8d454886c16f7495106d4ec477c54f5ba99bcb3d Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Thu, 20 Feb 2025 07:55:52 +0000 Subject: [PATCH 06/17] Remove unneeded connect_bd_net --- src/finn/transformation/fpgadataflow/make_zynq_proj.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py index 456441bca8..d462dc9d6b 100644 --- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py +++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py @@ -186,11 +186,6 @@ def apply(self, model): "apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_%d/S00_ACLK]" % (i) ) - # connect reset TODO: probably unneeded - config.append( - "connect_bd_net [get_bd_pins axi_interconnect_%d/ARESETN] [get_bd_pins axi_interconnect_0/ARESETN]" - % (i) - ) master_axilite_idx += 1 total_axilite_count = max(0, total_axilite_count - 64) From 960a7f46a48519d4d63183a4de234bd0b12857bf Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Thu, 20 Feb 2025 18:01:02 +0000 Subject: [PATCH 07/17] Fix redundant bd_automation --- src/finn/transformation/fpgadataflow/make_zynq_proj.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py index d462dc9d6b..846d95a11b 100644 --- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py +++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py @@ -182,10 +182,6 @@ def apply(self, model): "apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_%d/ACLK]" % (i) ) - config.append( - "apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_%d/S00_ACLK]" - % (i) - ) master_axilite_idx += 1 total_axilite_count = max(0, total_axilite_count - 64) From 76ef35d988611261142395633eb2eeb28886f9c8 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Fri, 21 Feb 2025 11:12:12 +0000 Subject: [PATCH 08/17] Remove tcl.collectionResultDisplayLimit --- src/finn/transformation/fpgadataflow/templates.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py index 0f6ba7c3c4..d9040d83f2 100644 --- a/src/finn/transformation/fpgadataflow/templates.py +++ b/src/finn/transformation/fpgadataflow/templates.py @@ -100,6 +100,10 @@ set FPGA_PART %s create_project finn_zynq_link ./ -part $FPGA_PART +# Prevent limitation on number of elements for string representations of Vivado collections of objects +# Otherwise we might run into the default limit of 500 if we have many IP_REPO_PATHS +set_param tcl.collectionResultDisplayLimit 0 + # set board part repo paths to find PYNQ-Z1/Z2 set paths_prop [get_property BOARD_PART_REPO_PATHS [current_project]] set paths_param [get_param board.repoPaths] From 3598501532ede834cf894439bab9793dc49a853f Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Thu, 27 Feb 2025 17:49:47 +0000 Subject: [PATCH 09/17] Add PYNQ driver for ZYNQ platforms --- src/finn/builder/build_dataflow_steps.py | 10 +- .../driver/driver_instrumentation.py | 143 ++++++++++++++++++ .../fpgadataflow/make_pynq_driver.py | 33 +++- 3 files changed, 183 insertions(+), 3 deletions(-) create mode 100644 src/finn/qnn-data/templates/driver/driver_instrumentation.py diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index a4481ed778..96f3bd7c63 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -90,7 +90,10 @@ from finn.transformation.fpgadataflow.insert_dwc import InsertDWC from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker -from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver +from finn.transformation.fpgadataflow.make_pynq_driver import ( + MakePYNQDriverIODMA, + MakePYNQDriverInstrumentation, +) from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild from finn.transformation.fpgadataflow.minimize_accumulator_width import ( MinimizeAccumulatorWidth, @@ -782,7 +785,10 @@ def step_make_pynq_driver(model: ModelWrapper, cfg: DataflowBuildConfig): if DataflowOutputType.PYNQ_DRIVER in cfg.generate_outputs: driver_dir = cfg.output_dir + "/driver" - model = model.transform(MakePYNQDriver(cfg._resolve_driver_platform())) + if cfg.enable_instrumentation: + model = model.transform(MakePYNQDriverInstrumentation(cfg._resolve_driver_platform(), cfg.synth_clk_period_ns)) + else: + model = model.transform(MakePYNQDriverIODMA(cfg._resolve_driver_platform())) shutil.copytree(model.get_metadata_prop("pynq_driver_dir"), driver_dir, dirs_exist_ok=True) print("PYNQ Python driver written into " + driver_dir) return model diff --git a/src/finn/qnn-data/templates/driver/driver_instrumentation.py b/src/finn/qnn-data/templates/driver/driver_instrumentation.py new file mode 100644 index 0000000000..fea9446bf5 --- /dev/null +++ b/src/finn/qnn-data/templates/driver/driver_instrumentation.py @@ -0,0 +1,143 @@ +import time +import json +import argparse +import matplotlib as mpl +import matplotlib.pyplot as plt +from IPython.display import clear_output +import numpy as np +from pynq import Overlay +from pynq.ps import Clocks +from pynq.pl_server.device import Device + +### Instrumentation wrapper register map ### +#ap_uint<32> cfg, // [0] - 0:hold, 1:lfsr; [31:16] - LFSR seed +#ap_uint<32> &status, // [0] - timestamp overflow; [1] - timestamp underflow +#ap_uint<32> &latency, +#ap_uint<32> &interval, +#ap_uint<32> &checksum, +#ap_uint<32> &min_latency + +class FINNInstrumentationOverlay(Overlay): + def __init__( + self, + bitfile_name, + platform = "zynq", + fclk_mhz = 100.0, + device = None, + download = True, + seed = 1, + ): + super().__init__(bitfile_name, download=download, device=device) + + self.platform = platform + self.fclk_mhz = fclk_mhz + self.seed = seed + + # configure clock (for ZYNQ platforms) + if self.platform == "zynq": + if self.fclk_mhz > 0: + Clocks.fclk0_mhz = self.fclk_mhz + self.fclk_mhz_actual = Clocks.fclk0_mhz + + def instrumentation_read(self, name): + return self.instrumentation_wrap_0.read(offset=self.ip_dict["instrumentation_wrap_0"]["registers"][name]["address_offset"]) + + def instrumentation_write(self, name, value): + return self.instrumentation_wrap_0.write(offset=self.ip_dict["instrumentation_wrap_0"]["registers"][name]["address_offset"], value=value) + + def reset_accelerator(self): + self.axi_gpio_0.write(offset=self.ip_dict["axi_gpio_0"]["registers"]["GPIO_DATA"]["address_offset"], value=0) + + def start_accelerator(self): + lfsr_seed = (self.seed << 16) & 0xffff0000 # upper 16 bits + self.instrumentation_write("cfg", lfsr_seed + 1) # start operation + + def observe_instrumentation(self, debug_print=True): + status_reg = self.instrumentation_read("status") + chksum_reg = self.instrumentation_read("checksum") + min_latency = self.instrumentation_read("min_latency") + latency = self.instrumentation_read("latency") + interval = self.instrumentation_read("interval") + + frame = (chksum_reg >> 24) & 0x000000ff + checksum = chksum_reg & 0x00ffffff + overflow_err = (status_reg & 0x00000001) != 0 + underflow_err = (status_reg & 0x00000002) != 0 + + if debug_print: + print("---INSTRUMENTATION_REPORT---") + if overflow_err or underflow_err: + print("Status ERROR") + print("Overflow error: %s" % overflow_err) + print("Underflow error: %s" % underflow_err) + else: + print("Status OK") + print("Frame number (8-bit): %d" % frame) + print("Checksum: 0x%06x" % checksum) + print("Min Latency (cycles): %d" % min_latency) + print("Latency (cycles): %d" % latency) + print("Interval (cycles): %d" % interval) + print("----------------------------") + + return (overflow_err, underflow_err, frame, checksum, min_latency, latency, interval) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Profile performance of FINN-generated accelerator using instrumentation wrapper') + parser.add_argument('--runtime', help='Runtime in seconds', type=int, default=10) + parser.add_argument('--frequency', help='FPGA clock frequency in MHz', type=float, default=100.0) + parser.add_argument('--seed', help='LFSR seed for input data generation', type=int, default=1) + parser.add_argument('--device', help='FPGA device to be used', type=int, default=0) + parser.add_argument('--bitfile', help='Name of bitfile', default="finn-accel.bit") + parser.add_argument('--reportfile', help='Name of output .json report file', type=str, default="measured_performance.json") + parser.add_argument('--settingsfile', help='Name of optional input .json settings file', type=str, default="") + # parse arguments + args = parser.parse_args() + runtime = args.runtime + frequency = args.frequency + seed = args.seed + bitfile = args.bitfile + reportfile = args.reportfile + settingsfile = args.settingsfile + devID = args.device + device = Device.devices[devID] + + # overwrite frequency if specified in settings file + if settingsfile != "": + with open(settingsfile, "r") as f: + settings = json.load(f) + if "fclk_mhz" in settings: + frequency = settings["fclk_mhz"] + + # instantiate FINN accelerator driver and pass batchsize and bitfile + print("Programming FPGA..") + accel = FINNInstrumentationOverlay(bitfile_name = bitfile, device = device, fclk_mhz = frequency, seed = seed) + + # start accelerator + print("Running accelerator..") + accel.start_accelerator() + + # let it run for specified runtime + time.sleep(runtime) + + # read measurement from instrumentation + (overflow_err, underflow_err, frame, checksum, min_latency, latency, interval) = accel.observe_instrumentation() + + # write report to file + report = { + "error": overflow_err or underflow_err or interval == 0, + "checksum": checksum, + "min_latency_cycles": min_latency, + "latency_cycles": latency, + "interval_cycles": interval, + "frequency_mhz": round(accel.fclk_mhz_actual), + "min_latency_ms": round(min_latency * (1 / (accel.fclk_mhz_actual * 1e6)) * 1e3, 6), + "latency_ms": round(latency * (1 / (accel.fclk_mhz_actual * 1e6)) * 1e3, 6), + "throughput_fps": round(1 / (interval * (1 / (accel.fclk_mhz_actual * 1e6)))), + "min_pipeline_depth": round(min_latency / interval, 2), + "pipeline_depth" : round(latency / interval, 2), + } + with open(reportfile, "w") as f: + json.dump(report, f, indent=2) + + print("Done.") diff --git a/src/finn/transformation/fpgadataflow/make_pynq_driver.py b/src/finn/transformation/fpgadataflow/make_pynq_driver.py index ea9bd2aa26..b935f5eea0 100644 --- a/src/finn/transformation/fpgadataflow/make_pynq_driver.py +++ b/src/finn/transformation/fpgadataflow/make_pynq_driver.py @@ -28,6 +28,7 @@ import numpy as np import os +import json import qonnx import shutil import warnings @@ -62,7 +63,7 @@ def to_external_tensor(init, w_dtype): return ext_weight -class MakePYNQDriver(Transformation): +class MakePYNQDriverIODMA(Transformation): """Create PYNQ Python code to correctly interface the generated accelerator, including data packing/unpacking. Should be called after conversion to HLS layers, folding and the creation of @@ -302,4 +303,34 @@ def apply(self, model): else: continue + +class MakePYNQDriverInstrumentation(Transformation): + def __init__(self, platform, clk_period_ns): + super().__init__() + self.platform = platform + self.clk_period_ns = clk_period_ns + + def apply(self, model): + # TODO: support runtime-writable and external weights + # TODO: support Alveo and Versal platforms + + # create a temporary folder for the generated driver + pynq_driver_dir = make_build_dir(prefix="pynq_driver_") + model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir) + + # create (copy) the static instrumentation driver + driver_template = ( + os.environ["FINN_ROOT"] + "/src/finn/qnn-data/templates/driver/driver_instrumentation.py" + ) + driver_py = pynq_driver_dir + "/driver.py" + shutil.copy(driver_template, driver_py) + + # write default settings to driver config file + settings = { + "fclk_mhz": (1.0 / self.clk_period_ns) * 1e3, + } + settingsfile = pynq_driver_dir + "/settings.json" + with open(settingsfile, "w") as f: + json.dump(settings, f, indent=2) + return (model, False) From 0c812bc54fbc4a5df24141a48e1cf646a0c008e2 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Thu, 6 Mar 2025 10:20:24 +0000 Subject: [PATCH 10/17] Nested interconnects for Zynq-7000, fixes --- .../driver/driver_instrumentation.py | 101 +++++++++++------- .../fpgadataflow/make_pynq_driver.py | 7 +- .../fpgadataflow/make_zynq_proj.py | 16 +-- .../transformation/fpgadataflow/templates.py | 2 + 4 files changed, 80 insertions(+), 46 deletions(-) diff --git a/src/finn/qnn-data/templates/driver/driver_instrumentation.py b/src/finn/qnn-data/templates/driver/driver_instrumentation.py index fea9446bf5..90a0ed5b89 100644 --- a/src/finn/qnn-data/templates/driver/driver_instrumentation.py +++ b/src/finn/qnn-data/templates/driver/driver_instrumentation.py @@ -1,31 +1,28 @@ -import time -import json import argparse -import matplotlib as mpl -import matplotlib.pyplot as plt -from IPython.display import clear_output -import numpy as np +import json +import time from pynq import Overlay -from pynq.ps import Clocks from pynq.pl_server.device import Device +from pynq.ps import Clocks + +# Instrumentation wrapper register map # +# ap_uint<32> cfg, // [0] - 0:hold, 1:lfsr; [31:16] - LFSR seed +# ap_uint<32> &status, // [0] - timestamp overflow; [1] - timestamp underflow +# ap_uint<32> &latency, +# ap_uint<32> &interval, +# ap_uint<32> &checksum, +# ap_uint<32> &min_latency -### Instrumentation wrapper register map ### -#ap_uint<32> cfg, // [0] - 0:hold, 1:lfsr; [31:16] - LFSR seed -#ap_uint<32> &status, // [0] - timestamp overflow; [1] - timestamp underflow -#ap_uint<32> &latency, -#ap_uint<32> &interval, -#ap_uint<32> &checksum, -#ap_uint<32> &min_latency class FINNInstrumentationOverlay(Overlay): def __init__( self, bitfile_name, - platform = "zynq", - fclk_mhz = 100.0, - device = None, - download = True, - seed = 1, + platform="zynq", + fclk_mhz=100.0, + device=None, + download=True, + seed=1, ): super().__init__(bitfile_name, download=download, device=device) @@ -40,27 +37,34 @@ def __init__( self.fclk_mhz_actual = Clocks.fclk0_mhz def instrumentation_read(self, name): - return self.instrumentation_wrap_0.read(offset=self.ip_dict["instrumentation_wrap_0"]["registers"][name]["address_offset"]) + return self.instrumentation_wrap_0.read( + offset=self.ip_dict["instrumentation_wrap_0"]["registers"][name]["address_offset"] + ) def instrumentation_write(self, name, value): - return self.instrumentation_wrap_0.write(offset=self.ip_dict["instrumentation_wrap_0"]["registers"][name]["address_offset"], value=value) + return self.instrumentation_wrap_0.write( + offset=self.ip_dict["instrumentation_wrap_0"]["registers"][name]["address_offset"], + value=value, + ) def reset_accelerator(self): - self.axi_gpio_0.write(offset=self.ip_dict["axi_gpio_0"]["registers"]["GPIO_DATA"]["address_offset"], value=0) + self.axi_gpio_0.write( + offset=self.ip_dict["axi_gpio_0"]["registers"]["GPIO_DATA"]["address_offset"], value=0 + ) def start_accelerator(self): - lfsr_seed = (self.seed << 16) & 0xffff0000 # upper 16 bits - self.instrumentation_write("cfg", lfsr_seed + 1) # start operation + lfsr_seed = (self.seed << 16) & 0xFFFF0000 # upper 16 bits + self.instrumentation_write("cfg", lfsr_seed + 1) # start operation def observe_instrumentation(self, debug_print=True): status_reg = self.instrumentation_read("status") chksum_reg = self.instrumentation_read("checksum") min_latency = self.instrumentation_read("min_latency") latency = self.instrumentation_read("latency") - interval = self.instrumentation_read("interval") + interval = self.instrumentation_read("interval") - frame = (chksum_reg >> 24) & 0x000000ff - checksum = chksum_reg & 0x00ffffff + frame = (chksum_reg >> 24) & 0x000000FF + checksum = chksum_reg & 0x00FFFFFF overflow_err = (status_reg & 0x00000001) != 0 underflow_err = (status_reg & 0x00000002) != 0 @@ -83,14 +87,25 @@ def observe_instrumentation(self, debug_print=True): if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Profile performance of FINN-generated accelerator using instrumentation wrapper') - parser.add_argument('--runtime', help='Runtime in seconds', type=int, default=10) - parser.add_argument('--frequency', help='FPGA clock frequency in MHz', type=float, default=100.0) - parser.add_argument('--seed', help='LFSR seed for input data generation', type=int, default=1) - parser.add_argument('--device', help='FPGA device to be used', type=int, default=0) - parser.add_argument('--bitfile', help='Name of bitfile', default="finn-accel.bit") - parser.add_argument('--reportfile', help='Name of output .json report file', type=str, default="measured_performance.json") - parser.add_argument('--settingsfile', help='Name of optional input .json settings file', type=str, default="") + parser = argparse.ArgumentParser( + description="Profile FINN-generated accelerator using instrumentation wrapper" + ) + parser.add_argument("--runtime", help="Runtime in seconds", type=int, default=10) + parser.add_argument( + "--frequency", help="FPGA clock frequency in MHz", type=float, default=100.0 + ) + parser.add_argument("--seed", help="LFSR seed for input data generation", type=int, default=1) + parser.add_argument("--device", help="FPGA device to be used", type=int, default=0) + parser.add_argument("--bitfile", help="Name of bitfile", default="finn-accel.bit") + parser.add_argument( + "--reportfile", + help="Name of output .json report file", + type=str, + default="measured_performance.json", + ) + parser.add_argument( + "--settingsfile", help="Name of optional input .json settings file", type=str, default="" + ) # parse arguments args = parser.parse_args() runtime = args.runtime @@ -111,7 +126,9 @@ def observe_instrumentation(self, debug_print=True): # instantiate FINN accelerator driver and pass batchsize and bitfile print("Programming FPGA..") - accel = FINNInstrumentationOverlay(bitfile_name = bitfile, device = device, fclk_mhz = frequency, seed = seed) + accel = FINNInstrumentationOverlay( + bitfile_name=bitfile, device=device, fclk_mhz=frequency, seed=seed + ) # start accelerator print("Running accelerator..") @@ -121,7 +138,15 @@ def observe_instrumentation(self, debug_print=True): time.sleep(runtime) # read measurement from instrumentation - (overflow_err, underflow_err, frame, checksum, min_latency, latency, interval) = accel.observe_instrumentation() + ( + overflow_err, + underflow_err, + frame, + checksum, + min_latency, + latency, + interval, + ) = accel.observe_instrumentation() # write report to file report = { @@ -135,7 +160,7 @@ def observe_instrumentation(self, debug_print=True): "latency_ms": round(latency * (1 / (accel.fclk_mhz_actual * 1e6)) * 1e3, 6), "throughput_fps": round(1 / (interval * (1 / (accel.fclk_mhz_actual * 1e6)))), "min_pipeline_depth": round(min_latency / interval, 2), - "pipeline_depth" : round(latency / interval, 2), + "pipeline_depth": round(latency / interval, 2), } with open(reportfile, "w") as f: json.dump(report, f, indent=2) diff --git a/src/finn/transformation/fpgadataflow/make_pynq_driver.py b/src/finn/transformation/fpgadataflow/make_pynq_driver.py index b935f5eea0..c26fa845ed 100644 --- a/src/finn/transformation/fpgadataflow/make_pynq_driver.py +++ b/src/finn/transformation/fpgadataflow/make_pynq_driver.py @@ -26,9 +26,9 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import json import numpy as np import os -import json import qonnx import shutil import warnings @@ -303,6 +303,8 @@ def apply(self, model): else: continue + return (model, False) + class MakePYNQDriverInstrumentation(Transformation): def __init__(self, platform, clk_period_ns): @@ -320,7 +322,8 @@ def apply(self, model): # create (copy) the static instrumentation driver driver_template = ( - os.environ["FINN_ROOT"] + "/src/finn/qnn-data/templates/driver/driver_instrumentation.py" + os.environ["FINN_ROOT"] + + "/src/finn/qnn-data/templates/driver/driver_instrumentation.py" ) driver_py = pynq_driver_dir + "/driver.py" shutil.copy(driver_template, driver_py) diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py index 846d95a11b..98372b700f 100644 --- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py +++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py @@ -174,13 +174,16 @@ def apply(self, model): ) # connect to master interconnect config.append( - "connect_bd_intf_net [get_bd_intf_pins axi_interconnect_0/M%02d_AXI] -boundary_type upper [get_bd_intf_pins axi_interconnect_%d/S00_AXI]" + "connect_bd_intf_net [get_bd_intf_pins axi_interconnect_0/M%02d_AXI] " + "-boundary_type upper [get_bd_intf_pins axi_interconnect_%d/S00_AXI]" % (master_axilite_idx, i) ) - # connect clocks/reset TODO: suppport zynq_7000 + # connect clocks/reset config.append( - "apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_%d/ACLK]" - % (i) + "apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config " + "{ Clk {/zynq_ps/$zynq_ps_clkname} Freq {} " + "Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} " + "[get_bd_pins axi_interconnect_%d/ACLK]" % (i) ) master_axilite_idx += 1 total_axilite_count = max(0, total_axilite_count - 64) @@ -359,10 +362,11 @@ def apply(self, model): config.append("delete_bd_objs [get_bd_cells smartconnect_0]") aximm_idx = 1 - # finalize nested interconnect clock/reset TODO: support zynq_7000 + # finalize nested interconnect clock/reset for i in range(1, nested_interconnect_count + 1): config.append( - "apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} } [get_bd_pins axi_interconnect_%d/M*_ACLK]" + "apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config " + "{ Clk {/zynq_ps/$zynq_ps_clkname} } [get_bd_pins axi_interconnect_%d/M*_ACLK]" % (i) ) diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py index d9040d83f2..6cde5cfa66 100644 --- a/src/finn/transformation/fpgadataflow/templates.py +++ b/src/finn/transformation/fpgadataflow/templates.py @@ -146,6 +146,7 @@ create_bd_design "top" if {$ZYNQ_TYPE == "zynq_us+"} { set zynq_ps_vlnv [get_property VLNV [get_ipdefs "xilinx.com:ip:zynq_ultra_ps_e:*"]] + set zynq_ps_clkname "pl_clk0" create_bd_cell -type ip -vlnv $zynq_ps_vlnv zynq_ps apply_bd_automation -rule xilinx.com:bd_rule:zynq_ultra_ps_e -config {apply_board_preset "1" } [get_bd_cells zynq_ps] #activate one slave port, deactivate the second master port @@ -156,6 +157,7 @@ set_property -dict [list CONFIG.PSU__CRL_APB__PL0_REF_CTRL__FREQMHZ [expr int($FREQ_MHZ)]] [get_bd_cells zynq_ps] } elseif {$ZYNQ_TYPE == "zynq_7000"} { set zynq_ps_vlnv [get_property VLNV [get_ipdefs "xilinx.com:ip:processing_system7:*"]] + set zynq_ps_clkname "FCLK_CLK0" create_bd_cell -type ip -vlnv $zynq_ps_vlnv zynq_ps apply_bd_automation -rule xilinx.com:bd_rule:processing_system7 -config {make_external "FIXED_IO, DDR" apply_board_preset "1" Master "Disable" Slave "Disable" } [get_bd_cells zynq_ps] set_property -dict [list CONFIG.PCW_USE_S_AXI_HP0 {1}] [get_bd_cells zynq_ps] From 230ac92471342c0a28e91168fc3b57895b0c8651 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Fri, 7 Mar 2025 11:52:05 +0000 Subject: [PATCH 11/17] Fix clkname variable expansion --- src/finn/transformation/fpgadataflow/make_zynq_proj.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py index 98372b700f..c6449468cf 100644 --- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py +++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py @@ -181,9 +181,7 @@ def apply(self, model): # connect clocks/reset config.append( "apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config " - "{ Clk {/zynq_ps/$zynq_ps_clkname} Freq {} " - "Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} " - "[get_bd_pins axi_interconnect_%d/ACLK]" % (i) + '"Clk /zynq_ps/$zynq_ps_clkname" [get_bd_pins axi_interconnect_%d/ACLK]' % (i) ) master_axilite_idx += 1 total_axilite_count = max(0, total_axilite_count - 64) @@ -366,8 +364,7 @@ def apply(self, model): for i in range(1, nested_interconnect_count + 1): config.append( "apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config " - "{ Clk {/zynq_ps/$zynq_ps_clkname} } [get_bd_pins axi_interconnect_%d/M*_ACLK]" - % (i) + '"Clk /zynq_ps/$zynq_ps_clkname" [get_bd_pins axi_interconnect_%d/M*_ACLK]' % (i) ) # create a temporary folder for the project From b0fb5f258c984f1f30aea20cefbed1f01b5a27e1 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Sun, 9 Mar 2025 10:29:39 +0000 Subject: [PATCH 12/17] [Driver] Reset PYNQ cache before loading Overlay --- src/finn/qnn-data/templates/driver/driver_instrumentation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/finn/qnn-data/templates/driver/driver_instrumentation.py b/src/finn/qnn-data/templates/driver/driver_instrumentation.py index 90a0ed5b89..aa5225eab6 100644 --- a/src/finn/qnn-data/templates/driver/driver_instrumentation.py +++ b/src/finn/qnn-data/templates/driver/driver_instrumentation.py @@ -1,7 +1,7 @@ import argparse import json import time -from pynq import Overlay +from pynq import PL, Overlay from pynq.pl_server.device import Device from pynq.ps import Clocks @@ -126,6 +126,7 @@ def observe_instrumentation(self, debug_print=True): # instantiate FINN accelerator driver and pass batchsize and bitfile print("Programming FPGA..") + PL.reset() # reset PYNQ cache accel = FINNInstrumentationOverlay( bitfile_name=bitfile, device=device, fclk_mhz=frequency, seed=seed ) From 7a3f928dc83ea8b98fe4464d6b8a9217a8d879b4 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Tue, 20 May 2025 17:50:02 +0200 Subject: [PATCH 13/17] Adapt to FINN_ROOT refactoring --- src/finn/transformation/fpgadataflow/instrumentation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/instrumentation.py b/src/finn/transformation/fpgadataflow/instrumentation.py index 7f37c5ed14..a22d770307 100644 --- a/src/finn/transformation/fpgadataflow/instrumentation.py +++ b/src/finn/transformation/fpgadataflow/instrumentation.py @@ -28,7 +28,7 @@ def collect_ip_dirs(model, ipstitch_path): ip_dirs += [ipstitch_path + "/ip"] if need_memstreamer: # add RTL streamer IP - ip_dirs.append("$::env(FINN_ROOT)/finn-rtllib/memstream") + ip_dirs.append("$::env(FINN_RTLLIB)/memstream") return ip_dirs @@ -71,7 +71,7 @@ def apply(self, model): ko = out_shape_folded[-1] # fill out instrumentation wrapper template with open( - os.path.join(os.environ["FINN_ROOT"], "custom_hls", "instrumentation.template.cpp"), "r" + os.path.join(os.environ["FINN_CUSTOM_HLS"], "instrumentation.template.cpp"), "r" ) as f: instrwrp_cpp = f.read() instrwrp_cpp = instrwrp_cpp.replace("@PENDING@", str(pending)) @@ -150,7 +150,7 @@ def apply(self, model): # TODO: Support simulation with AXI-lite control interfaces (e.g., for dynamic pipelines) # fill in testbench template with open( - os.path.join(os.environ["FINN_ROOT"], "custom_hls", "instrumentation_tb.template.sv"), + os.path.join(os.environ["FINN_CUSTOM_HLS"], "instrumentation_tb.template.sv"), "r", ) as f: testbench_sv = f.read() @@ -158,7 +158,7 @@ def apply(self, model): f.write(testbench_sv) # fill in testbench project creator template with open( - os.path.join(os.environ["FINN_ROOT"], "custom_hls", "instrumentation_sim.template.tcl"), + os.path.join(os.environ["FINN_CUSTOM_HLS"], "instrumentation_sim.template.tcl"), "r", ) as f: testbench_tcl = f.read() From ccebbdca2b6eb88dffded9b1e794ce9912b7af89 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Tue, 20 May 2025 17:59:48 +0200 Subject: [PATCH 14/17] Fix use of deprecated FINN_ROOT --- src/finn/transformation/fpgadataflow/make_driver.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/make_driver.py b/src/finn/transformation/fpgadataflow/make_driver.py index b17cb9c8e8..1cea95f9c5 100644 --- a/src/finn/transformation/fpgadataflow/make_driver.py +++ b/src/finn/transformation/fpgadataflow/make_driver.py @@ -477,8 +477,7 @@ def apply(self, model): # create (copy) the static instrumentation driver driver_template = ( - os.environ["FINN_ROOT"] - + "/src/finn/qnn-data/templates/driver/driver_instrumentation.py" + os.environ["FINN_QNN_DATA"] + "/templates/driver/driver_instrumentation.py" ) driver_py = pynq_driver_dir + "/driver.py" shutil.copy(driver_template, driver_py) From cc0be94bb0ae15e8721ad6c9c5a525602ae9de81 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Wed, 21 May 2025 16:16:05 +0200 Subject: [PATCH 15/17] [CI] Adapt to recent runner version change --- .gitlab-ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ebdad54bee..a2f9527976 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -93,11 +93,11 @@ Sync finn-dev: .setup_venv_from_whl: &setup_venv_from_whl # Move everything to working directory (e.g., RAMdisk) - - cp -dfR .. $PATH_WORKDIR + - cp -dfR . $PATH_WORKDIR - cd $PATH_WORKDIR # Create fresh virtual environment and install finn-plus from .whl (artifact) - python3 -m venv finn-plus-venv - - finn-plus-venv/bin/pip install ./finn-plus/dist/*.whl + - finn-plus-venv/bin/pip install dist/*.whl Build: id_tokens: @@ -171,8 +171,8 @@ FINN Test Suite 2022.2: - $JOB_MONITORING_DIR/monitor.sh $JOB_MONITORING_DIR/$CI_PIPELINE_ID/$HOSTNAME.log & # Launch FINN via test command, includes preparation of (cached) dependencies - | - source ./finn-plus-venv/bin/activate - finn test --variant $TEST_SUITE --dependency-path ./finn-plus/deps --build-path $FINN_BUILD_DIR --num-workers 1 --num-test-workers $PYTEST_PARALLEL + source finn-plus-venv/bin/activate + finn test --variant $TEST_SUITE --dependency-path ./deps --build-path $FINN_BUILD_DIR --num-workers 1 --num-test-workers $PYTEST_PARALLEL artifacts: name: "test_reports" when: always From a942390d20d27e8d2c9a1ea70e95bea523b91442 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Wed, 21 May 2025 21:09:59 +0200 Subject: [PATCH 16/17] Refactor remaining MakePYNQDriver calls --- notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb | 4 ++-- notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb | 4 ++-- src/finn/qnn-data/templates/driver/driver_base.py | 2 +- tests/end2end/test_end2end_bnn_pynq.py | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb b/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb index 2b01f24557..014a13db27 100644 --- a/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb +++ b/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb @@ -456,8 +456,8 @@ "metadata": {}, "outputs": [], "source": [ - "from finn.transformation.fpgadataflow.make_driver import MakePYNQDriver\n", - "model = model.transform(MakePYNQDriver(\"zynq-iodma\"))" + "from finn.transformation.fpgadataflow.make_driver import MakePYNQDriverIODMA\n", + "model = model.transform(MakePYNQDriverIODMA(\"zynq-iodma\"))" ] }, { diff --git a/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb b/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb index b0510b0fdb..de6de23d3f 100644 --- a/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb +++ b/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb @@ -751,8 +751,8 @@ "metadata": {}, "outputs": [], "source": [ - "from finn.transformation.fpgadataflow.make_driver import MakePYNQDriver\n", - "model = model.transform(MakePYNQDriver(\"zynq-iodma\"))" + "from finn.transformation.fpgadataflow.make_driver import MakePYNQDriverIODMA\n", + "model = model.transform(MakePYNQDriverIODMA(\"zynq-iodma\"))" ] }, { diff --git a/src/finn/qnn-data/templates/driver/driver_base.py b/src/finn/qnn-data/templates/driver/driver_base.py index a6ff29d608..af55ee13df 100644 --- a/src/finn/qnn-data/templates/driver/driver_base.py +++ b/src/finn/qnn-data/templates/driver/driver_base.py @@ -38,7 +38,7 @@ # Driver base class for FINN-generated dataflow accelerators. # The particulars of the generated accelerator are specified via the -# io_shape_dict (generated by the MakePYNQDriver transformation). +# io_shape_dict (generated by the MakePYNQDriverIODMA transformation). class FINNExampleOverlay(Overlay): diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py index 9a2da7a45e..9d40b3ba93 100644 --- a/tests/end2end/test_end2end_bnn_pynq.py +++ b/tests/end2end/test_end2end_bnn_pynq.py @@ -73,7 +73,7 @@ from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.insert_dwc import InsertDWC -from finn.transformation.fpgadataflow.make_driver import MakePYNQDriver +from finn.transformation.fpgadataflow.make_driver import MakePYNQDriverIODMA from finn.transformation.fpgadataflow.minimize_accumulator_width import MinimizeAccumulatorWidth from finn.transformation.fpgadataflow.minimize_weight_bit_width import MinimizeWeightBitWidth from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim @@ -812,7 +812,7 @@ def test_make_pynq_driver(self, topology, wbits, abits, board): prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "build") model = load_test_checkpoint_or_skip(prev_chkpt_name) board_to_driver_platform = "alveo" if build_data["kind"] == "alveo" else "zynq-iodma" - model = model.transform(MakePYNQDriver(board_to_driver_platform)) + model = model.transform(MakePYNQDriverIODMA(board_to_driver_platform)) model.save(get_checkpoint_name(board, topology, wbits, abits, "driver")) def test_deploy(self, topology, wbits, abits, board): From 5531476e923a4136638bb51a6688415c4a8d8e57 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Thu, 22 May 2025 15:16:20 +0200 Subject: [PATCH 17/17] Fix codegen after deps path refactoring --- src/finn/transformation/fpgadataflow/instrumentation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/finn/transformation/fpgadataflow/instrumentation.py b/src/finn/transformation/fpgadataflow/instrumentation.py index a22d770307..f2b3b21f6d 100644 --- a/src/finn/transformation/fpgadataflow/instrumentation.py +++ b/src/finn/transformation/fpgadataflow/instrumentation.py @@ -6,6 +6,7 @@ from finn.custom_op.fpgadataflow.templates import ipgentcl_template from finn.util.basic import make_build_dir +from finn.util.deps import get_deps_path from finn.util.hls import CallHLS @@ -87,6 +88,8 @@ def apply(self, model): ipgentcl = ipgentcl_template ipgentcl = ipgentcl.replace("$PROJECTNAME$", prjname) ipgentcl = ipgentcl.replace("$HWSRCDIR$", wrapper_output_dir) + ipgentcl = ipgentcl.replace("$FINNHLSLIB$", str(get_deps_path() / "finn-hlslib")) + ipgentcl = ipgentcl.replace("$ATTENTIONHLSLIB$", str(get_deps_path() / "attention-hlslib")) ipgentcl = ipgentcl.replace("$TOPFXN$", "instrumentation_wrapper") ipgentcl = ipgentcl.replace("$FPGAPART$", self.fpga_part) ipgentcl = ipgentcl.replace("$CLKPERIOD$", str(self.clk_period_ns))