eki-project · fpjentzsch · May 22, 2025 · Feb 14, 2025 · Feb 14, 2025 · Feb 19, 2025
diff --git a/custom_hls/virtual_fifo.hpp b/custom_hls/virtual_fifo.hpp
@@ -0,0 +1,81 @@
+#ifndef VIRTUAL_FIFO_HPP
+#define VIRTUAL_FIFO_HPP
+
+#include <ap_int.h>
+#include <hls_stream.h>
+#include <ap_axi_sdata.h>
+
+// Utility Functions, taken from instrumentation wrapper
+template<typename  T>
+static void move(
+	hls::stream<T> &src,
+	hls::stream<T> &dst
+) {
+#pragma HLS pipeline II=1 style=flp
+	dst.write(src.read());
+}
+
+template<typename  T>
+static void move(
+	hls::stream<hls::axis<T, 0, 0, 0>> &src,
+	hls::stream<T> &dst
+) {
+#pragma HLS pipeline II=1 style=flp
+	dst.write(src.read().data);
+}
+
+template<typename  T>
+class Payload {
+public:
+	using  type = T;
+};
+template<typename  T>
+class Payload<hls::axis<T, 0, 0, 0>> {
+public:
+	using  type = T;
+};
+
+template<unsigned int Width>
+void VirtualFIFO(hls::stream<ap_uint<Width> > &in, hls::stream<ap_uint<Width> > &out,
+                ap_uint<32> mode,
+                ap_uint<32> depth,
+                ap_uint<32> &occupancy,
+                ap_uint<32> &max_occupancy)
+{
+    #pragma HLS pipeline II=1 style=flp
+
+    static ap_uint<32> c_occupancy = 0;
+    static ap_uint<32> c_max_occupancy = 0;
+    #pragma HLS reset variable=c_occupancy
+    #pragma HLS reset variable=c_max_occupancy
+
+    ap_uint<Width> inElem;
+
+    bool read = mode == 0 || c_occupancy != depth;
+    bool write = c_occupancy != 0;
+
+    // INPUT
+    if(read)
+    {
+        if(in.read_nb(inElem)) //disregard input data
+        {
+            c_occupancy++;
+            c_max_occupancy = (c_occupancy > c_max_occupancy) ? c_occupancy : c_max_occupancy;
+        }
+    }
+
+    // OUTPUT
+    if(write)
+    {
+        if(out.write_nb(0)) //write dummy output data
+        {
+            c_occupancy--;
+        }
+    }
+
+    // Update output status registers
+    occupancy = c_occupancy;
+    max_occupancy = c_max_occupancy;
+}
+
+#endif
diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py
@@ -283,6 +283,9 @@ class DataflowBuildConfig(DataClassJSONMixin, DataClassYAMLMixin):
     #: for each FIFO.
     auto_fifo_depths: Optional[bool] = True
 
+    # Enables experimental live FIFO sizing
+    live_fifo_sizing: Optional[bool] = False
+
     #: Whether FIFO nodes with depth larger than 32768 will be split.
     #: Allow to configure very large FIFOs in the folding_config_file.
     split_large_fifos: Optional[bool] = False

diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py
@@ -43,6 +43,7 @@
     GiveUniqueNodeNames,
     RemoveStaticGraphInputs,
     RemoveUnusedTensors,
+    SortGraph,
 )
 from qonnx.transformation.infer_data_layouts import InferDataLayouts
 from qonnx.transformation.infer_datatypes import InferDataTypes
@@ -529,6 +530,81 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig):
     `GiveUniqueNodeNames`.
     """
 
+    hw_attrs = [
+        "PE",
+        "SIMD",
+        "parallel_window",
+        "ram_style",
+        "depth",
+        "impl_style",
+        "resType",
+        "mem_mode",
+        "runtime_writeable_weights",
+        "inFIFODepths",
+        "outFIFODepths",
+        "depth_trigger_uram",
+        "depth_trigger_bram",
+    ]
+
+    # Experimental live FIFO-sizing, overwrites all other FIFO-related behavior
+    if cfg.live_fifo_sizing:
+        # Create all DWCs and FIFOs normally
+        model = model.transform(InsertDWC())
+        model = model.transform(
+            InsertFIFO(vivado_ram_style=cfg.large_fifo_mem_style, create_shallow_fifos=True)
+        )
+
+        # Clean up model
+        model = model.transform(SortGraph())
+        model = model.transform(GiveUniqueNodeNames())
+        model = model.transform(GiveReadableTensorNames())
+
+        # save original folding config before potentially modifying it
+        cfg_path = cfg.output_dir + "/report/folding_config_before_lfs.json"
+        extract_model_config_to_json(model, cfg_path, hw_attrs)
+        model.set_metadata_prop("folding_config_before_lfs", cfg_path)
+
+        # Disable runtime-writable weights, external weights, and dynamic mode,
+        # as we don't support additional AXI-lite interfaces next to the FIFOs
+        for node in model.graph.node:
+            if node.domain.startswith("finn.custom_op.fpgadataflow"):
+                node_inst = getCustomOp(node)
+                try:
+                    if node_inst.get_nodeattr("runtime_writeable_weights") == 1:
+                        node_inst.set_nodeattr("runtime_writeable_weights", 0)
+                        if node_inst.get_nodeattr("ram_style") == "ultra":
+                            node_inst.set_nodeattr("ram_style", "block")
+                except AttributeError:
+                    pass
+                try:
+                    if node_inst.get_nodeattr("mem_mode") == "external":
+                        node_inst.set_nodeattr("mem_mode", "internal_decoupled")
+                except AttributeError:
+                    pass
+                try:
+                    if node_inst.get_nodeattr("dynamic_mode") == 1:
+                        node_inst.set_nodeattr("dynamic_mode", 0)
+                except AttributeError:
+                    pass
+
+        # Specialize FIFOs to HLS back-end instead of default RTL back-end
+        for node in model.get_nodes_by_op_type("StreamingFIFO"):
+            node_inst = getCustomOp(node)
+            node_inst.set_nodeattr("preferred_impl_style", "hls")
+        model = model.transform(SpecializeLayers(cfg._resolve_fpga_part()))
+
+        # Fix impl_style attribute
+        for node in model.get_nodes_by_op_type("StreamingFIFO_hls"):
+            node_inst = getCustomOp(node)
+            node_inst.set_nodeattr("impl_style", "virtual")
+
+        # Clean up model
+        model = model.transform(SortGraph())
+        model = model.transform(GiveUniqueNodeNames())
+        model = model.transform(GiveReadableTensorNames())
+
+        return model
+
     if cfg.auto_fifo_depths:
         if cfg.auto_fifo_strategy == "characterize":
             model = model.transform(InsertDWC())
@@ -587,21 +663,6 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig):
             model = model.transform(ApplyConfig(cfg.folding_config_file))
 
     # extract the final configuration and save it as json
-    hw_attrs = [
-        "PE",
-        "SIMD",
-        "parallel_window",
-        "ram_style",
-        "depth",
-        "impl_style",
-        "resType",
-        "mem_mode",
-        "runtime_writeable_weights",
-        "inFIFODepths",
-        "outFIFODepths",
-        "depth_trigger_uram",
-        "depth_trigger_bram",
-    ]
     extract_model_config_to_json(model, cfg.output_dir + "/final_hw_config.json", hw_attrs)
 
     # perform FIFO splitting and shallow FIFO removal only after the final config
@@ -611,6 +672,23 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig):
         model = model.transform(SplitLargeFIFOs())
     model = model.transform(RemoveShallowFIFOs())
 
+    # generate a dedicated report about final FIFO sizes
+    fifo_info = {}
+    fifo_info["fifo_depths"] = {}
+    fifo_info["fifo_sizes"] = {}
+    total_fifo_size = 0
+    for node in model.get_nodes_by_op_type("StreamingFIFO_rtl"):
+        node_inst = getCustomOp(node)
+        fifo_info["fifo_depths"][node.name] = node_inst.get_nodeattr("depth")
+        fifo_info["fifo_sizes"][
+            node.name
+        ] = node_inst.get_instream_width() * node_inst.get_nodeattr("depth")
+        total_fifo_size += fifo_info["fifo_sizes"][node.name]
+    fifo_info["total_fifo_size_kB"] = int(total_fifo_size / 8.0 / 1000.0)
+
+    with open(cfg.output_dir + "/report/fifo_sizing.json", "w") as f:
+        json.dump(fifo_info, f, indent=2)
+
     # after FIFOs are ready to go, call PrepareIP and HLSSynthIP again
     # this will only run for the new nodes (e.g. FIFOs and DWCs)
     model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()))
@@ -745,7 +823,7 @@ def step_make_driver(model: ModelWrapper, cfg: DataflowBuildConfig):
         if cfg.enable_instrumentation:
             model = model.transform(
                 MakePYNQDriverInstrumentation(
-                    cfg._resolve_driver_platform(), cfg.synth_clk_period_ns
+                    cfg._resolve_driver_platform(), cfg.synth_clk_period_ns, cfg.live_fifo_sizing
                 )
             )
         else:

diff --git a/src/finn/custom_op/fpgadataflow/hls/__init__.py b/src/finn/custom_op/fpgadataflow/hls/__init__.py
@@ -93,6 +93,7 @@ def register_custom_op(cls):
     StreamingDataWidthConverter_hls,
 )
 from finn.custom_op.fpgadataflow.hls.streamingeltwise_hls import StreamingEltwise_hls
+from finn.custom_op.fpgadataflow.hls.streamingfifo_hls import StreamingFIFO_hls
 from finn.custom_op.fpgadataflow.hls.streamingmaxpool_hls import StreamingMaxPool_hls
 from finn.custom_op.fpgadataflow.hls.thresholding_hls import Thresholding_hls
 from finn.custom_op.fpgadataflow.hls.tlastmarker_hls import TLastMarker_hls
@@ -119,6 +120,7 @@ def register_custom_op(cls):
 custom_op["StreamingEltwise_hls"] = StreamingEltwise_hls
 custom_op["StreamingDataWidthConverter_hls"] = StreamingDataWidthConverter_hls
 custom_op["StreamingMaxPool_hls"] = StreamingMaxPool_hls
+custom_op["StreamingFIFO_hls"] = StreamingFIFO_hls
 custom_op["Thresholding_hls"] = Thresholding_hls
 custom_op["TLastMarker_hls"] = TLastMarker_hls
 custom_op["UpsampleNearestNeighbour_hls"] = UpsampleNearestNeighbour_hls