diff --git a/src/finn/analysis/fpgadataflow/dataflow_performance.py b/src/finn/analysis/fpgadataflow/dataflow_performance.py index a4bf40760e..819782184d 100644 --- a/src/finn/analysis/fpgadataflow/dataflow_performance.py +++ b/src/finn/analysis/fpgadataflow/dataflow_performance.py @@ -29,6 +29,7 @@ from qonnx.custom_op.registry import getCustomOp +from finn.util.basic import decompress_string_to_numpy from finn.util.fpgadataflow import is_hls_node, is_rtl_node @@ -76,3 +77,84 @@ def dataflow_performance(model): "max_cycles": int(max_cycles), "max_cycles_node_name": max_node_name, } + + +def max_period(model): + """Extract maximum period among all nodes in the graph + + Preconditions: + - model consists of HLS/RTL nodes + - model has cycle estimates annotated (see AnnotateCycles transformation) + - nodes have unique names (see GiveUniqueNodeNames) + - model has been characteristically derived and contains specific chr periods + + Returns: + - max_cycles : number of cycles for slowest node + - max_cycles_node_name : name of slowest node + - critical_path_cycles : pessimistic expected latency from input to output + """ + max_cycles = 0 + + for node in model.graph.node: + if node is not None and node.op_type not in [ + "AddStreams_hls", + "DuplicateStreams_hls", + "StreamingFIFO_hls", + "StreamingFIFO_rtl", + ]: + if is_hls_node(node) or is_rtl_node(node): + inst = getCustomOp(node) + node_cycles_in = ( + len(decompress_string_to_numpy(inst.get_nodeattr("io_chrc_in"))[0]) // 2 + ) + node_cycles_out = ( + len(decompress_string_to_numpy(inst.get_nodeattr("io_chrc_out"))[0]) // 2 + ) + node_cycles = max(node_cycles_in, node_cycles_out) + + if node_cycles > max_cycles: + max_cycles = node_cycles + + return { + "max_cycles": int(max_cycles), + } + + +def max_remaining_period(model, node): + """Extract maximum period among all nodes in the graph + + Preconditions: + - model consists of HLS/RTL nodes + - model has cycle estimates annotated (see AnnotateCycles transformation) + - nodes have unique names (see GiveUniqueNodeNames) + - model has been characteristically derived and contains specific chr periods + + Returns: + - max_cycles : number of cycles for slowest node + - max_cycles_node_name : name of slowest node + - critical_path_cycles : pessimistic expected latency from input to output + """ + max_cycles = 0 + node_index = list(model.graph.node).index(node) + for node in model.graph.node[node_index:]: + if node is not None and node.op_type not in [ + "AddStreams_hls", + "DuplicateStreams_hls", + "StreamingFIFO_hls", + "StreamingFIFO_rtl", + ]: + if is_hls_node(node) or is_rtl_node(node): + inst = getCustomOp(node) + node_cycles = int(inst.get_nodeattr("io_chrc_period")) + node_cycles_in = ( + len(decompress_string_to_numpy(inst.get_nodeattr("io_chrc_in"))[0]) // 2 + ) + node_cycles_out = ( + len(decompress_string_to_numpy(inst.get_nodeattr("io_chrc_out"))[0]) // 2 + ) + node_cycles = max(node_cycles_in, node_cycles_out) + if node_cycles > max_cycles: + max_cycles = node_cycles + return { + "max_cycles": int(max_cycles), + } diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py index a08fc3a04c..d0c42c8e44 100644 --- a/src/finn/builder/build_dataflow_config.py +++ b/src/finn/builder/build_dataflow_config.py @@ -40,11 +40,30 @@ class AutoFIFOSizingMethod(str, Enum): "Select the type of automatic FIFO sizing strategy." - - CHARACTERIZE = "characterize" + ANALYTIC = "analytical" LARGEFIFO_RTLSIM = "largefifo_rtlsim" +class TAVGenerationMethod(str, Enum): + "Select the strategy for constructing token access vectors of an operator." + RTLSIM = "rtlsim" + TREE_MODEL = "tree_model" + + +class TAVUtilizationMethod(str, Enum): + """Select the strategy for utilizing token access vectors of an operator + for buffer sizing.""" + + # worst-case ratio of data rates between a consumer and producer + CONSERVATIVE_RELAXATION = "conservative_relaxation" + + # average-case ratio of data rates between a consumer and producer + AGGRESSIVE_RELAXATION = "aggressive_relaxation" + + # no relaxation, use the token access vectors as-is + NO_RELAXATION = "no_relaxation" + + class ShellFlowType(str, Enum): """For builds that produce a bitfile, select the shell flow that will integrate the FINN-generated accelerator.""" @@ -278,6 +297,31 @@ class DataflowBuildConfig: #: setting the FIFO sizes. auto_fifo_strategy: Optional[AutoFIFOSizingMethod] = AutoFIFOSizingMethod.LARGEFIFO_RTLSIM + #: Which strategy will be used for token access vector generation for FIFO sizing. + #: RTLSIM will result in performing RTLSIM for each node + #: to deduce the token access vectors empirically + #: TREE_MODEL will use the tree mode of an operator if available, avoiding the generation + #: of IP cores. + tav_generation_strategy: Optional[TAVGenerationMethod] = TAVGenerationMethod.RTLSIM + + #: Which strategy will be used for token access vector generation for FIFO sizing. + #: RTLSIM will result in performing RTLSIM for each node + #: to deduce the token access vectors empirically + #: TREE_MODEL will use the tree mode of an operator if available, avoiding the generation + #: of IP cores. + tav_utilization_strategy: Optional[ + TAVUtilizationMethod + ] = TAVUtilizationMethod.CONSERVATIVE_RELAXATION + + #: When True, skips the resynthesis steps after fifo sizing. This makes it + #: possible to run the step for rapid fifo size analysis during + #: automatic folding optimizations or as a first approximation. + skip_resynth_during_fifo_sizing: Optional[bool] = False + + #: Avoid using C++ rtlsim for auto FIFO sizing and rtlsim throughput test + #: if set to True, always using Python instead + force_python_rtlsim: Optional[bool] = False + #: Memory resource type for large FIFOs #: Only relevant when `auto_fifo_depths = True` large_fifo_mem_style: Optional[LargeFIFOMemStyle] = LargeFIFOMemStyle.AUTO diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index e81d7d09f7..afa35f0a4b 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -53,7 +53,10 @@ import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw import finn.transformation.streamline.absorb as absorb -from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance +from finn.analysis.fpgadataflow.dataflow_performance import ( + dataflow_performance, + max_period, +) from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation from finn.analysis.fpgadataflow.op_and_param_counts import ( @@ -80,8 +83,13 @@ ) from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP from finn.transformation.fpgadataflow.derive_characteristic import ( - DeriveCharacteristic, + DelayCharacteristicFunctions, DeriveFIFOSizes, + DeriveTokenAccessVectors, + HandleBranches, + JustInTimeSynthesize, + LocalStretchCharacteristicFunctions, + ProducerDelayCharacteristicFunctions, ) from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.insert_dwc import InsertDWC @@ -102,6 +110,7 @@ ) from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.set_fifo_depths import ( + CapConvolutionFIFODepths, InsertAndSetFIFODepths, RemoveShallowFIFOs, SplitLargeFIFOs, @@ -573,19 +582,79 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): """ if cfg.auto_fifo_depths: - if cfg.auto_fifo_strategy == "characterize": - model = model.transform(InsertDWC()) - model = model.transform(SpecializeLayers(cfg._resolve_fpga_part())) - model = model.transform(GiveUniqueNodeNames()) + model = model.transform(InsertDWC()) + model = model.transform(SpecializeLayers(cfg._resolve_fpga_part())) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(AnnotateCycles()) + + if cfg.auto_fifo_strategy == "analytical": + if cfg.tav_generation_strategy == "tree_model": + # if we have tree models, only rtlsim nodes for which we dont + only_jit_nodes_without_tree = True + else: + # rtlsim everything by force if not using trees + only_jit_nodes_without_tree = False model = model.transform( - PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()) + JustInTimeSynthesize( + cfg._resolve_fpga_part(), + cfg._resolve_hls_clk_period(), + only_jit_nodes_without_tree, + ) + ) + period = int(model.analysis(dataflow_performance)["max_cycles"]) + model = model.transform( + DeriveTokenAccessVectors( + model, + period, + cfg.tav_generation_strategy, + cfg._resolve_fpga_part(), + cfg._resolve_hls_clk_period(), + ) + ) + + period = int(model.analysis(dataflow_performance)["max_cycles"]) + model = model.transform( + LocalStretchCharacteristicFunctions( + 1, + period, + nodes_to_ignore=[], + ) ) - model = model.transform(HLSSynthIP()) - model = model.transform(PrepareRTLSim(behav=True)) - model = model.transform(AnnotateCycles()) - period = model.analysis(dataflow_performance)["max_cycles"] + 10 - model = model.transform(DeriveCharacteristic(period)) - model = model.transform(DeriveFIFOSizes()) + + period = int(model.analysis(dataflow_performance)["max_cycles"]) + + model = model.transform(HandleBranches(model, period)) + + period = int(model.analysis(dataflow_performance)["max_cycles"]) + model = model.transform( + DelayCharacteristicFunctions( + 1, + period, + nodes_to_ignore=[], + ) + ) + + period = int(model.analysis(dataflow_performance)["max_cycles"]) + + model = model.transform( + ProducerDelayCharacteristicFunctions( + 1, + period, + nodes_to_ignore=[], + ) + ) + + period = int(model.analysis(max_period)["max_cycles"]) + + model = model.transform( + DeriveFIFOSizes( + period=period, + nodes_to_ignore=[], + global_offset_correction=True, + tav_utilization_strategy=cfg.tav_utilization_strategy, + ) + ) + model = model.transform( InsertFIFO( vivado_ram_style=cfg.large_fifo_mem_style, @@ -593,9 +662,13 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): create_shallow_fifos=True, ) ) + model = model.transform(SpecializeLayers(cfg._resolve_fpga_part())) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) + if cfg.default_swg_exception: + model = model.transform(CapConvolutionFIFODepths(max_qsrl_depth=256)) + elif cfg.auto_fifo_strategy == "largefifo_rtlsim": if cfg.fifosim_save_waveform: report_dir = cfg.output_dir + "/report" @@ -665,8 +738,10 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): # after FIFOs are ready to go, call PrepareIP and HLSSynthIP again # this will only run for the new nodes (e.g. FIFOs and DWCs) - model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())) - model = model.transform(HLSSynthIP()) + if not cfg.skip_resynth_during_fifo_sizing: + model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())) + model = model.transform(HLSSynthIP()) + return model diff --git a/src/finn/custom_op/fpgadataflow/addstreams.py b/src/finn/custom_op/fpgadataflow/addstreams.py index c11fb3db3e..a049ea8dcc 100644 --- a/src/finn/custom_op/fpgadataflow/addstreams.py +++ b/src/finn/custom_op/fpgadataflow/addstreams.py @@ -32,6 +32,7 @@ from qonnx.core.datatype import DataType from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp +from finn.util.basic import Characteristic_Node class AddStreams(HWCustomOp): @@ -149,7 +150,17 @@ def execute_node(self, context, graph): result = inp0_values + inp1_values context[node.output[0]] = np.asarray(result, dtype=np.float32).reshape(oshape) - def derive_characteristic_fxns(self, period): + def prepare_tree_model(self): + dim = np.prod(self.get_folded_output_shape()[1:-1]) + + read_write = Characteristic_Node("passing addstreams layer", [(dim, [1, 1])], True) + addstreams_top = Characteristic_Node("compute addstreams", [(1, read_write)], False) + + return addstreams_top # top level phase of this node + + def derive_token_access_vectors( + self, model, period, strategy, fpga_part, clk_period, op_type, override_dict=None + ): n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { "inputs": { @@ -158,4 +169,7 @@ def derive_characteristic_fxns(self, period): }, "outputs": {"out0": []}, } - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) + + super().derive_token_access_vectors( + model, period, strategy, fpga_part, clk_period, op_type, override_dict=io_dict + ) diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op.py b/src/finn/custom_op/fpgadataflow/channelwise_op.py index abb1adc1fb..083dac17ce 100644 --- a/src/finn/custom_op/fpgadataflow/channelwise_op.py +++ b/src/finn/custom_op/fpgadataflow/channelwise_op.py @@ -34,6 +34,7 @@ from qonnx.util.basic import qonnx_make_model from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp +from finn.util.basic import Characteristic_Node # ONNX i/o tensor shape assumptions for channelwise ops: # input 0 is the input tensor, shape (..., NumChannels) @@ -240,3 +241,13 @@ def execute_node(self, context, graph): sess = rt.InferenceSession(model_func.SerializeToString()) result = sess.run(None, idict) context[node.output[0]] = np.asarray(result, dtype=np.float32).reshape(oshape) + + def get_tree_model(self): + # key parameters + + dim = np.prod(self.get_folded_output_shape()[1:-1]) + + pass_channelwise = Characteristic_Node("passing channelwise layer", [(dim, [1, 1])], True) + channelwise_top = Characteristic_Node("compute pool", [(1, pass_channelwise)], False) + + return channelwise_top # top level phase of this node diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py index 8c1a36232f..7504ca6e4e 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py @@ -26,6 +26,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import numpy as np import warnings from onnx import TensorProto, helper from qonnx.core.datatype import DataType @@ -35,6 +36,7 @@ from qonnx.util.basic import qonnx_make_model from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp +from finn.util.basic import Characteristic_Node # ONNX i/o tensor shape assumptions for ConvolutionInputGenerator: # input 0 is the input tensor, shape NHWC = (1, IFMDim, IFMDim, IFMChannels) @@ -259,3 +261,606 @@ def execute_node(self, context, graph): # this automatically updates the execution context inst = getCustomOp(im2col_node) inst.execute_node(context, model_im2col.graph) + + def get_tree_model_uniform_distribution_based(self): + def distribute_outputs_uniform( + out_total, in_total, stride_y=1, stride_x=1, feature_map_x=1, kernel_x=1, kernel_y=1 + ): + if in_total == 0: + return [out_total] + + # if kernel_y > 1: + # stride_y = stride_y - (kernel_y-1) // 2 + # if kernel_x > 1: + # stride_x = stride_x - (kernel_x-1) // 2 + + spacing_y = max(feature_map_x * (stride_y - 1), 1) + spacing_x = max((stride_x - 1 + (kernel_x - 1) // 2), 1) + + weights = [] + for i in range(in_total): + weight = 1 + if stride_y > 1: + if i % spacing_y == 0: + weight += spacing_y + if stride_x > 1: + if i % spacing_x == 0: + weight += spacing_x + weights.append(weight) + + # Normalize weights to match out_total + total_weight = sum(weights) + raw_counts = [w * out_total / total_weight for w in weights] + + # Round to nearest integers + int_counts = [int(round(x)) for x in raw_counts] + + # Adjust rounding error + diff = sum(int_counts) - out_total + if diff != 0: + adjustments = sorted( + enumerate(raw_counts), key=lambda x: x[1] - int_counts[x[0]], reverse=(diff > 0) + ) + for i, _ in adjustments: + if diff == 0: + break + int_counts[i] -= int(diff / abs(diff)) + diff -= int(diff / abs(diff)) + + return int_counts + + IMPL_STYLE = "rtl" if "_rtl" in (self.__class__.__name__) else "hls" + assert IMPL_STYLE in ["rtl", "hls"], "Implementation style must be 'rtl' or 'hls'" + + # Extract node attributes + ifm_dim_y, ifm_dim_x = self.get_nodeattr("IFMDim") + ifm_ch = self.get_nodeattr("IFMChannels") + simd = self.get_nodeattr("SIMD") + k_h, k_w = self.get_nodeattr("ConvKernelDim") + stride_y, stride_x = self.get_nodeattr("Stride") + dilation_y, dilation_x = self.get_nodeattr("Dilation") + is1d = self.get_nodeattr("is1D") + parallel_window = self.get_nodeattr("parallel_window") + # numReps = 1 + + assert ifm_ch % simd == 0 + factor = ifm_ch // simd + ofm_dim_y = compute_conv_output_dim(ifm_dim_y, k_h, stride_y, 0, dilation_y) + ofm_dim_x = compute_conv_output_dim(ifm_dim_x, k_w, stride_x, 0, dilation_x) + total_outputs = ofm_dim_y * ofm_dim_x + total_inputs = ifm_dim_y * ifm_dim_x + if parallel_window: + k_h = 1 + k_w = 1 + # if not is1d: + # # 2D convolution + # output_tokens = total_outputs * (k_h * k_w) + # else: + # # 1D convolution + # output_tokens = total_outputs * (k_h) + + # key parameters + # IFMDim_x = self.get_nodeattr("IFMDim")[0] + # OFMDim_x = self.get_nodeattr("OFMDim")[0] + ConvKernelDim_x = self.get_nodeattr("ConvKernelDim")[0] + # Stride_x = self.get_nodeattr("Stride")[0] + + # OFMDim_y = self.get_nodeattr("OFMDim")[1] + ConvKernelDim_y = self.get_nodeattr("ConvKernelDim")[1] + # Stride_y = self.get_nodeattr("Stride")[1] + + # SIMD = self.get_nodeattr("SIMD") + + # IFMChannels = self.get_nodeattr("IFMChannels") + + DEPTHWISE = self.get_nodeattr("depthwise") + is1d = self.get_nodeattr("is1D") + + # SF = IFMChannels // SIMD + # OUTPUT_SIZE = OFMDim_x * ConvKernelDim_x * SF + # INPUT_SIZE = IFMDim_x * SF + # WINDOW_SIZE = ConvKernelDim_x * SF + # if DEPTHWISE: + # BUFFER_SIZE = ConvKernelDim_x * SF + # READ_CYCLES = SF * (ConvKernelDim_x - 1) - (ConvKernelDim_x - 1) + # FINISH = IFMDim_x - ConvKernelDim_x - 2 + # else: + # BUFFER_SIZE = (ConvKernelDim_x - 1) * SF + # READ_CYCLES = 0 + # FINISH = 0 + + assert ifm_ch % simd == 0 + factor = ifm_ch // simd + + # OCNT_INITIAL = BUFFER_SIZE + (Stride_x - 1) + + # DEFAULT_FIFO_DEPTH = 2 + + ofm_dim_y = compute_conv_output_dim(ifm_dim_y, k_h, stride_y, 0, dilation_y) + ofm_dim_x = compute_conv_output_dim(ifm_dim_x, k_w, stride_x, 0, dilation_x) + + if DEPTHWISE: + ofm_dim_y = ofm_dim_y * ConvKernelDim_y + ofm_dim_x = ofm_dim_x * ConvKernelDim_x + + if DEPTHWISE: + flip_factor = factor + else: + flip_factor = 1 + + total_outputs = ofm_dim_y * ofm_dim_x * flip_factor + total_inputs = ifm_dim_y * ifm_dim_x * flip_factor + if parallel_window: + k_h = 1 + k_w = 1 + # if not is1d: + # # 2D convolution + # output_tokens = total_outputs * (k_h * k_w) + # else: + # # 1D convolution + # output_tokens = total_outputs * (k_h) + + ch_write = Characteristic_Node("Output Write", [(factor // flip_factor, [0, 1])], True) + ch_read = Characteristic_Node("Streamed Read", [(factor // flip_factor, [1, 0])], True) + ch_both = Characteristic_Node("Streamed Read", [(factor // flip_factor, [1, 1])], True) + + out_total = np.prod(self.get_folded_output_shape()[:-1]) // factor * flip_factor + in_total = np.prod(self.get_folded_input_shape()[:-1]) // factor * flip_factor + + # Calculate startup and steady reads + if not is1d: + startup_reads = (k_h - 1) * ifm_dim_x + k_w # - (ifm_dim_x-k_w) + # startup_writes = ofm_dim_x - (ofm_dim_x-k_w) // (stride_x * stride_y)# * + # factor # we can only write the middle in this section!!! + if not DEPTHWISE: + if k_h > 1: + startup_writes = ofm_dim_x # k_w*stride_x # // (stride_x) + else: + startup_writes = ofm_dim_x # // (stride_x * stride_y) + else: + if k_h > 1: + startup_writes = 0 + else: + startup_writes = 0 + else: + startup_reads = ifm_dim_x + startup_writes = ofm_dim_x // stride_x + + startup_reads = startup_reads * flip_factor + startup_writes = startup_writes * flip_factor + + # startup_reads = 0 + steady_reads = total_inputs - startup_reads + steady_writes = total_outputs - startup_writes + + total_inputs = total_inputs - startup_reads + total_outputs = total_outputs - startup_writes + # inputs_read = startup_reads + + if startup_writes == 0: + offset_writing = 1 + else: + offset_writing = 0 + + # Steady-state reads > 0, normal case + # Spread steady reads evenly across output_tokens cycles + in_total = in_total - startup_reads + out_total = out_total - startup_writes + + if startup_writes > startup_reads: + schedule = distribute_outputs_uniform( + startup_writes, startup_reads, stride_x, stride_y, k_w, k_h, ifm_dim_x + ) + per_cycle_nodes = [] + + for tokens_this_cycle in schedule: + cycle = Characteristic_Node( + "Cycle", + [ + (1 - offset_writing, ch_both), + ( + 1, + Characteristic_Node( + "Output Write", + [(tokens_this_cycle - 1 + offset_writing, ch_write)], + False, + ), + ), + ], + False, + ) + per_cycle_nodes.append((1, cycle)) + + startup = Characteristic_Node("Processing Loop", per_cycle_nodes, False) + else: + schedule = distribute_outputs_uniform( + startup_reads, startup_writes, stride_x, stride_y, k_w, k_h, ifm_dim_x + ) + per_cycle_nodes = [] + + for tokens_this_cycle in schedule: + cycle = Characteristic_Node( + "Cycle", + [ + (1 - offset_writing, ch_both), + ( + 1, + Characteristic_Node( + "Input Read", + [(tokens_this_cycle - 1 + offset_writing, ch_read)], + False, + ), + ), + ], + False, + ) + per_cycle_nodes.append((1, cycle)) + + startup = Characteristic_Node("Processing Loop", per_cycle_nodes, False) + + if out_total > in_total: + if steady_reads <= 0: + return Characteristic_Node( + "SlidingWindow_2D", [(1, startup), (steady_writes, ch_write)], False + ) + + schedule = distribute_outputs_uniform( + out_total, in_total, stride_x, stride_y, k_w, k_h, ifm_dim_x + ) + per_cycle_nodes = [] + + for tokens_this_cycle in schedule: + cycle = Characteristic_Node( + "Cycle", + [ + (1, ch_both), + ( + 1, + Characteristic_Node( + "Output Write", [(tokens_this_cycle - 1, ch_write)], False + ), + ), + ], + False, + ) + per_cycle_nodes.append((1, cycle)) + + steady = Characteristic_Node("Processing Loop", per_cycle_nodes, False) + + return Characteristic_Node("SlidingWindow_2D", [(1, startup), (1, steady)], False) + + else: + if steady_reads <= 0: + return Characteristic_Node( + "SlidingWindow_2D", [(1, startup), (steady_writes, ch_write)], False + ) + + schedule = distribute_outputs_uniform( + in_total, out_total, stride_x, stride_y, k_w, k_h, ifm_dim_x + ) + per_cycle_nodes = [] + + for tokens_this_cycle in schedule: + cycle = Characteristic_Node( + "Cycle", + [ + (1, ch_both), + ( + 1, + Characteristic_Node( + "Output Write", [(tokens_this_cycle - 1, ch_read)], False + ), + ), + ], + False, + ) + per_cycle_nodes.append((1, cycle)) + + steady = Characteristic_Node("Processing Loop", per_cycle_nodes, False) + + return Characteristic_Node("SlidingWindow_2D", [(1, startup), (1, steady)], False) + + def get_tree_model(self): + # Extract node attributes + ifm_dim_y, ifm_dim_x = self.get_nodeattr("IFMDim") + ifm_ch = self.get_nodeattr("IFMChannels") + simd = self.get_nodeattr("SIMD") + k_y, k_x = self.get_nodeattr("ConvKernelDim") + stride_y, stride_x = self.get_nodeattr("Stride") + dilation_y, dilation_x = self.get_nodeattr("Dilation") + parallel_window = self.get_nodeattr("parallel_window") + depthwise = self.get_nodeattr("depthwise") + SF = ifm_ch // simd + + # hyper parameter for when we stop merging + buffering_threshold = 1024 + # + # print("simd: ", simd) + # print("ifm y, x: ", ifm_dim_y, ifm_dim_x) + # print("K: ", k_y, k_x) + # print("stride: ", stride_y, stride_x) + # print("dilation: ", dilation_y, dilation_x) + # print("parallel_window: ", parallel_window) + # print("dw: ", depthwise) + # print("buffer depth: ", self.get_buffer_depth()) + # print("buffering threshold: ", buffering_threshold) + + stride_y_skips = (stride_y - 1) * ifm_dim_x + + import math + + kernels_in_line = math.ceil( + (ifm_dim_x - (k_x - 1 + (k_x - 1) * (dilation_x - 1))) / stride_x + ) + kernel_lines = math.ceil( + (ifm_dim_y - ((k_y - 1) + (k_y - 1) * (dilation_y - 1))) / stride_y + ) + + # compute tail end of a kernel line which has to be read + shifts_x = (kernels_in_line - 1) * stride_x + starting_index_x = k_x + (k_x - 1) * (dilation_x - 1) + remainder_x = ifm_dim_x - (starting_index_x + shifts_x) + + # compute tail end rows of the full feature map which have to be read + shifts_y = (kernel_lines - 1) * stride_y + starting_index_y = k_y + (k_y - 1) * (dilation_y - 1) + remainder_y = (ifm_dim_y - (starting_index_y + shifts_y)) * ifm_dim_x + + reads_to_prepare_line = (k_x - 1) + (k_x - 1) * (dilation_x - 1) + reads_to_prepare_first_line = ((k_y - 1) + (k_y - 1) * (dilation_y - 1)) * ifm_dim_x + total_kernel_y = k_y + (k_y - 1) * (dilation_y - 1) + first_line_kernel_buffer = k_x + (k_x - 1) * (dilation_x - 1) + first_line_buffer = (total_kernel_y - 1) * ifm_dim_x + + if parallel_window == 1: + writes_per_kernel = 1 + else: + writes_per_kernel = k_y * k_x + + # inner line first buffer fill + inner_line_buffer_reads = (stride_y - 1) * ifm_dim_x + + # handling of a kernel shift on x axis + single_move_dif = writes_per_kernel - stride_x + if single_move_dif > 0: + # more writes than reads, dif both, write rest + do_both = stride_x + writes_only = single_move_dif + reads_only = 0 + else: + # more reads than writes + do_both = writes_per_kernel + reads_only = -single_move_dif + writes_only = 0 + + first_do_both = 0 + first_writes_only = writes_per_kernel + first_reads_only = first_line_kernel_buffer + + # absorb some remaining reads into writes if possible + absorbing_kernels = 0 + + # only allow absorbing up to kernels_in_line-1 as the first kernel is an exception + remaining_buffer_reads = inner_line_buffer_reads + if inner_line_buffer_reads > 0 and ((kernels_in_line - 1) * writes_only) > 0: + # determine how many lines can absorb them + absorbing_kernels = min( + math.floor((inner_line_buffer_reads) // writes_only), kernels_in_line - 1 + ) + absorbed_reads = absorbing_kernels * writes_only + + # print("absorbing krn: ", absorbing_kernels) + # print("absorved reads: ", absorbed_reads) + # print("remaining hanging reads: ", (inner_line_buffer_reads) - absorbed_reads) + # print("remaining old kernels: ", (kernels_in_line - 2) - absorbing_kernels) + inner_line_buffer_reads -= absorbed_reads + remaining_buffer_reads -= absorbed_reads + + # first kernel is a special case, we absorb the buffer reads into it as well + first_reads = first_line_kernel_buffer + remaining_buffer_reads + first_single_move_dif = writes_per_kernel - first_reads + if first_single_move_dif > 0: + # more writes than reads, dif both, write rest + first_do_both = first_reads + first_writes_only = first_single_move_dif + first_reads_only = 0 + else: + # more reads than writes + first_do_both = writes_per_kernel + first_reads_only = -first_single_move_dif + first_writes_only = 0 + + # first kernel is a special case, we absorb the buffer reads into it as well + absolute_first_reads = first_line_kernel_buffer + first_line_buffer + absolute_first_single_move_dif = writes_per_kernel - absolute_first_reads + + absolute_first_do_both = 0 + absolute_first_writes_only = writes_per_kernel + absolute_first_reads_only = absolute_first_reads + + if depthwise == 0: + if absolute_first_single_move_dif > 0: + # more writes than reads, dif both, write rest + absolute_first_do_both = absolute_first_reads + absolute_first_writes_only = absolute_first_single_move_dif + absolute_first_reads_only = 0 + else: + # more reads than writes + absolute_first_do_both = writes_per_kernel + absolute_first_reads_only = -absolute_first_single_move_dif + absolute_first_writes_only = 0 + + ch_idle = Characteristic_Node("Output Write", [(SF, [0, 0])], True) + ch_write = Characteristic_Node("Output Write", [(SF, [0, 1])], True) + + ch_read = Characteristic_Node("Streamed Read", [(SF, [1, 0])], True) + ch_both = Characteristic_Node("Streamed Read+Write", [(SF, [1, 1])], True) + + if parallel_window == 2: + # parallel window path works reliably, but should + # eventually be using paralle window 0's structure + # however currently is still inaccurate for some + # configs with parallel window=0 + ch_handle = Characteristic_Node("write out", [(1, ch_both)], False) + + handle_kernel = Characteristic_Node( + "handle one kernel", [(1, ch_handle), (stride_x - 1, ch_read)], False + ) + + handle_last_kernel = Characteristic_Node( + "handle last kernel", + [ + (1, ch_handle), + (remainder_x, ch_read), + ], + False, + ) + + handle_line = Characteristic_Node( + "write_one_line", + [ + (reads_to_prepare_line, ch_read), + (kernels_in_line - 1, handle_kernel), + (1, handle_last_kernel), + (stride_y_skips, ch_read), + ], + False, + ) + handle_last_line = Characteristic_Node( + "write line without stride at end", + [ + (reads_to_prepare_line, ch_read), + (kernels_in_line, handle_kernel), + (remainder_y, ch_read), + ], + False, + ) + swg = Characteristic_Node( + "SlidingWindowGenerator", + [ + (1, ch_idle), + (reads_to_prepare_first_line, ch_read), + (kernel_lines - 1, handle_line), + (1, handle_last_line), + ], + False, + ) + + else: + # --- handle_first_kernel --- + # print("\n\nhandle first kernel") + # print(f"do_both: {first_do_both}\n") + # print(f"reads_only: {first_reads_only}\n") + # print(f"writes_only: {first_writes_only}\n") + + handle_absolute_kernel = Characteristic_Node( + "handle one kernel", + [ + (absolute_first_do_both, ch_both), + (absolute_first_reads_only, ch_read), + (absolute_first_writes_only, ch_write), + ], + False, + ) + + # --- handle_first_kernel --- + # print("\n\nhandle first kernel") + # print(f"do_both: {first_do_both}\n") + # print(f"reads_only: {first_reads_only}\n") + # print(f"writes_only: {first_writes_only}\n") + + handle_first_kernel = Characteristic_Node( + "handle one kernel", + [ + (first_do_both, ch_both), + (first_reads_only, ch_read), + (first_writes_only, ch_write), + ], + False, + ) + + # --- handle_kernel --- + # print("\n\nhandle kernel") + # print(f"do_both: {do_both}\n") + # print(f"reads_only: {reads_only}\n") + # print(f"writes_only: {writes_only}\n") + + handle_kernel = Characteristic_Node( + "handle one kernel", + [ + (do_both, ch_both), + (reads_only, ch_read), + (writes_only, ch_write), + ], + False, + ) + + # --- handle_kernel_absorbed --- + # print("\n\nhandle absorbed kernel") + # print(f"do_both: {do_both+writes_only}\n") + # print(f"reads_only: {reads_only}\n") + # + handle_kernel_absorbed = Characteristic_Node( + "handle one kernel with fused writes", + [ + (do_both + writes_only, ch_both), + (reads_only, ch_read), + ], + False, + ) + + # --- handle_first_line --- + # print("\n\nhandle first line") + # print(f"first_line_buffer: {first_line_buffer}\n") + # print(f"first line kernelbuffer: {first_line_kernel_buffer}\n") + # print(f"kernels_in_line: {kernels_in_line}\n") + # print(f"remainder_x: {remainder_x}\n") + + handle_first_line = Characteristic_Node( + "write first line", + [ + # (first_line_buffer, ch_read), + (1, handle_absolute_kernel), + (kernels_in_line - 1, handle_kernel), + (remainder_x, ch_read), + ], + False, + ) + + # --- handle_line --- + # print("\n\nhandle regular line") + # print(f"inner_line_buffer_reads: {inner_line_buffer_reads}\n") + # print(f"absorbing_kernels: {absorbing_kernels}\n") + # print("kernels_in_line - absorbing_kernels: ") + # print(f"{kernels_in_line - absorbing_kernels}\n") + # print(f"remainder_x: {remainder_x}\n") + + handle_line = Characteristic_Node( + "write one inner line", + [ + # (remaining_buffer_reads, ch_read), + (1, handle_first_kernel), + (absorbing_kernels, handle_kernel_absorbed), + (kernels_in_line - 1 - absorbing_kernels, handle_kernel), + (remainder_x, ch_read), + ], + False, + ) + + # --- swg --- + # print("\n\nswg") + # print(f"kernel_lines - 1: {kernel_lines - 1}\n") + # print(f"remainder_y: {remainder_y}\n") + + swg = Characteristic_Node( + "SlidingWindowGenerator", + [ + (1, handle_first_line), + (kernel_lines - 1, handle_line), + (remainder_y, ch_read), + ], + False, + ) + + return swg + diff --git a/src/finn/custom_op/fpgadataflow/duplicatestreams.py b/src/finn/custom_op/fpgadataflow/duplicatestreams.py index 4a52a36006..ac095fa9af 100644 --- a/src/finn/custom_op/fpgadataflow/duplicatestreams.py +++ b/src/finn/custom_op/fpgadataflow/duplicatestreams.py @@ -31,6 +31,7 @@ from qonnx.core.datatype import DataType from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp +from finn.util.basic import Characteristic_Node class DuplicateStreams(HWCustomOp): @@ -148,12 +149,27 @@ def execute_node(self, context, graph): for outp in node.output: context[outp] = output - def derive_characteristic_fxns(self, period): + def get_tree_model(self): + # key parameters + + dim = np.prod(self.get_folded_output_shape()[1:-1]) + + read_write = Characteristic_Node("passing duplicate layer", [(dim, [1, 1])], True) + duplicatestreams_top = Characteristic_Node("compute duplicate", [(1, read_write)], False) + + return duplicatestreams_top # top level phase of this node + + def derive_token_access_vectors( + self, model, period, strategy, fpga_part, clk_period, op_type, override_dict=None + ): n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { "inputs": { "in0": [0 for i in range(n_inps)], }, - "outputs": {"out0": [], "out1": []}, + "outputs": {*[f"out{x}" for x in range(self.get_num_output_streams())]}, } - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) + + super().derive_token_access_vectors( + model, period, strategy, fpga_part, clk_period, op_type, override_dict=io_dict + ) diff --git a/src/finn/custom_op/fpgadataflow/fmpadding.py b/src/finn/custom_op/fpgadataflow/fmpadding.py index 2ff9bb13b7..322d12c9de 100644 --- a/src/finn/custom_op/fpgadataflow/fmpadding.py +++ b/src/finn/custom_op/fpgadataflow/fmpadding.py @@ -31,6 +31,7 @@ from qonnx.core.datatype import DataType from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp +from finn.util.basic import Characteristic_Node class FMPadding(HWCustomOp): @@ -111,6 +112,13 @@ def get_folded_output_shape(self, ind=0): folded_oshape = normal_oshape[:-1] + [fold, simd] return tuple(folded_oshape) + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape() + oshape = self.get_normal_output_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpect input shape for FMPadding." + return super().make_const_shape_op(oshape) + def infer_node_datatype(self, model): node = self.onnx_node idt = model.get_tensor_datatype(node.input[0]) @@ -124,6 +132,9 @@ def infer_node_datatype(self, model): self.set_nodeattr("inputDataType", idt.name) model.set_tensor_datatype(node.output[0], idt) + def verify_node(self): + pass + def get_input_datatype(self, ind=0): """Returns FINN DataType of input.""" ret = DataType[self.get_nodeattr("inputDataType")] @@ -146,6 +157,10 @@ def get_outstream_width(self, ind=0): simd = self.get_nodeattr("SIMD") return obits * simd + def get_number_output_values(self): + folded_oshape = self.get_folded_output_shape() + return np.prod(folded_oshape[:-1]) + def execute_node(self, context, graph): # simulate behavior with Python functionality node = self.onnx_node @@ -156,3 +171,61 @@ def execute_node(self, context, graph): inp_values, ((0, 0), (pad[0], pad[2]), (pad[1], pad[3]), (0, 0)), "constant" ) context[node.output[0]] = np.asarray(result, dtype=np.float32).reshape(oshape) + + def get_tree_model(self): + # key parameters + # this depends on the kernel type, hls or rtl etc + + # extract node attr + IMGDIM = self.get_nodeattr("ImgDim") + PADDING = self.get_nodeattr("Padding") + NUMCHANNELS = self.get_nodeattr("NumChannels") + SIMD = self.get_nodeattr("SIMD") + batch_size = self.get_nodeattr("numInputVectors") + IMPL_STYLE = "rtl" if "_rtl" in (self.__class__.__name__) else "hls" + assert IMPL_STYLE in ["rtl", "hls"], "Implementation style must be 'rtl' or 'hls'" + + # compute new parameters + NF = int(NUMCHANNELS / SIMD) + y_padding_top, x_padding_left, y_padding_bottom, x_padding_right = PADDING + y_dim = IMGDIM[0] + x_dim = IMGDIM[1] + + if IMPL_STYLE == "hls" and NF == 1: + loop_overhead = 1 + else: + loop_overhead = 0 + + ch_pad = Characteristic_Node("Channel_Pad", [(NF, [0, 1]), (loop_overhead, [0, 0])], True) + + ch_pass = Characteristic_Node("Channel_Pass", [(NF, [1, 1]), (loop_overhead, [0, 0])], True) + + x_inner_line = Characteristic_Node( + "Fill X full inner line", + [(x_padding_left, ch_pad), (x_dim, ch_pass), (x_padding_right, ch_pad)], + False, + ) + + x_outer_line = Characteristic_Node( + "Pad X outer line", [(x_padding_left + x_dim + x_padding_right, ch_pad)], False + ) + + fmpadding = Characteristic_Node( + "FMPadding FM", + [ + (y_padding_top, x_outer_line), + (y_dim, x_inner_line), + (y_padding_bottom, x_outer_line), + ], + False, + ) + + fmpadding_top = Characteristic_Node( + "FMPadding FM", + [ + (batch_size, fmpadding), + ], + False, + ) + + return fmpadding_top # top level phase of this node diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index f8f7a73c54..61ffe66579 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -33,12 +33,20 @@ import numpy as np import os -import warnings from abc import abstractmethod from qonnx.custom_op.base import CustomOp from qonnx.util.basic import roundup_to_integer_multiple -from finn.util.basic import get_liveness_threshold_cycles, is_versal +from finn.util.basic import ( + compress_numpy_to_string, + get_liveness_threshold_cycles, + is_versal, +) + +try: + import pyxsi_utils +except ModuleNotFoundError: + pyxsi_utils = None class HWCustomOp(CustomOp): @@ -87,14 +95,19 @@ def get_nodeattr_types(self): "inFIFODepths": ("ints", False, [2]), "outFIFODepths": ("ints", False, [2]), "output_hook": ("s", False, ""), - # accumulated characteristic function over two periods - "io_chrc_in": ("t", False, np.asarray([], dtype=np.int32)), - "io_chrc_out": ("t", False, np.asarray([], dtype=np.int32)), + # token access vectors used for analytical FIFO sizing + "io_chrc_in": ("s", False, ""), + "io_chrc_out": ("s", False, ""), + "io_chrc_in_stretch": ("s", False, ""), + "io_chrc_out_stretch": ("s", False, ""), + "io_chrc_in_original": ("s", False, ""), + "io_chrc_out_original": ("s", False, ""), # the period for which the characterization was run "io_chrc_period": ("i", False, 0), - # amount of zero padding inserted during chrc. - "io_chrc_pads_in": ("ints", False, []), - "io_chrc_pads_out": ("ints", False, []), + # extra buffers added to a branch, needed for coupling + # token access vectors at the end of + # branches during analytical FIFO sizing + "extra_branch_fifos": ("ints", False, [0, 0]), } def make_shape_compatible_op(self, model): @@ -219,6 +232,19 @@ def reset_rtlsim(self, sim): back to one""" finnxsi.reset_rtlsim(sim) + def rtlsim_multi_io_custom(self, sim, io_dict, sname="_V", batch_size=1): + "Run rtlsim for this node, supports multiple i/o streams." + num_out_values = self.get_number_output_values() * batch_size + total_cycle_count = finnxsi.rtlsim_multi_io( + sim, + io_dict, + num_out_values, + sname=sname, + liveness_threshold=get_liveness_threshold_cycles(), + ) + + self.set_nodeattr("cycles_rtlsim", total_cycle_count) + def rtlsim_multi_io(self, sim, io_dict, sname="_V"): "Run rtlsim for this node, supports multiple i/o streams." num_out_values = self.get_number_output_values() @@ -297,11 +323,166 @@ def get_outstream_width_padded(self, ind=0): out_width = self.get_outstream_width(ind=ind) return roundup_to_integer_multiple(out_width, 8) + def get_tree_model(self): + """Returns the characteristic function of a node, default is None and forces + to skip the analytical characterization of the node and fallback to rtlsim. + Implemented in each node, potentially overriding between rtl and hls""" + return None + + def derive_token_access_vectors( + self, model, period, strategy, fpga_part, clk_period, op_type, override_dict=None + ): + if override_dict is None: + n_inps = np.prod(self.get_folded_input_shape()[:-1]) + io_dict = { + "inputs": { + "in0": [i for i in range(n_inps)], + }, + "outputs": {"out0": []}, + } + else: + io_dict = override_dict + if strategy == "tree_model": + # check for override function + if self.get_tree_model() is not None: + print(f"using tree model for node {self}") + self.derive_token_access_vectors_using_tree_model(period, io_dict=io_dict) + return + print(f"using rtlsim for node {self}") + # RTL-based flow + # there is a 20 clock marging added for when get_exp_cycles() + # is underestimating the real operator runtime. + period = self.get_exp_cycles() + 20 + self.derive_token_access_vectors_using_rtlsim(model, period, fpga_part, clk_period, io_dict) + + def derive_token_access_vectors_using_tree_model(self, period, io_dict): + # Analytical flow + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in0" in key} + txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out0" in key} + + chr_node = self.get_tree_model() + period, in_clocks, _ = chr_node.get_total_cycles(0) + + self.set_nodeattr("io_chrc_period", period) + + txn_in = [] + txn_out = [] + counter = 0 + + top_level_phase = self.get_tree_model() + # first period + cycles = 0 + + counter, cycles, txn_in = top_level_phase.traverse_phase_tree(0, counter, cycles, txn_in) + + def apply_micro_buffer_correction(start, txn_in, period): + """There are cases where a node can buffer up the very first 1-2 inputs + immediately, even if it has not started properly consuming inputs yet + This behavior is extremely difficult to model in a characterization tree + and so we perform a manual correction by incrementing the number of + inputs read by 1 and detracting 1 read from the tail of the period + + Which node types & configurations this applies for is yet to be + fully determined, but the corrections should happen here. + This correction is not critical for buffer sizing, as it will only + lead to two extra fifos in the absolute worst case, which should be very + rare regardless. However it is necessary if attempting to perfectly model + the rtlsim result.""" + + buffer = 0 + + if "FMPadding" in self.onnx_node.name: + if "_rtl" in (self.__class__.__name__): + buffer = 1 + else: + buffer = 2 + + if "StreamingDataWidthConverter" in self.onnx_node.name: + if "_rtl" in (self.__class__.__name__): + buffer = 1 + else: + buffer = 2 + + if "Pool" in self.onnx_node.name: + if "_rtl" in (self.__class__.__name__): + buffer = 1 + else: + buffer = 2 + + if "MVAU" in self.onnx_node.name: + if "_rtl" in (self.__class__.__name__): + buffer = 1 + else: + buffer = 2 + + if buffer > 0: + # buffering does not happen in nodes with short wind-ups + if period < 14: + return txn_in + + # main routine + if buffer == 2: + if txn_in[start + 1] - txn_in[start] >= 1: + buffer = 1 + else: + txn_in[start + 1] += 1 + + idx = start + buffer + while idx < len(txn_in): + if txn_in[idx] - txn_in[idx - 1] < buffer: + txn_in[idx] += buffer + idx += 1 + + idx = len(txn_in) - 1 + last = txn_in[idx] + + # deduct 1 read from the tail + while last == txn_in[idx]: + txn_in[idx] -= buffer + idx -= 1 + + # one extra element to deduct in case of 2 buffers + if buffer == 2: + txn_in[idx] -= 1 + + return txn_in + + txn_in = apply_micro_buffer_correction(0, txn_in, period) + + # second period + cycles = len(txn_in) + + counter, cycles, txn_in = top_level_phase.traverse_phase_tree(0, counter, cycles, txn_in) + txn_in = apply_micro_buffer_correction(period, txn_in, period) + + # final assignments + + all_txns_in = np.empty((len(txns_in.keys()), cycles), dtype=np.int32) + all_txns_in[0, :] = np.array(txn_in[:]) + compressed_np_array = compress_numpy_to_string(all_txns_in) + self.set_nodeattr("io_chrc_in", compressed_np_array) + self.set_nodeattr("io_chrc_in_original", compressed_np_array) + + counter = 0 + cycles = 0 + + counter, cycles, txn_out = top_level_phase.traverse_phase_tree(1, counter, cycles, txn_out) + + cycles = period + + counter, cycles, txn_out = top_level_phase.traverse_phase_tree(1, counter, cycles, txn_out) + + all_txns_out = np.empty((len(txns_out.keys()), cycles), dtype=np.int32) + all_txns_out[0, :] = np.array(txn_out[:]) + compressed_np_array = compress_numpy_to_string(all_txns_out) + self.set_nodeattr("io_chrc_out", compressed_np_array) + self.set_nodeattr("io_chrc_out_original", compressed_np_array) + def generate_hdl_memstream(self, fpgapart, pumped_memory=0): """Helper function to generate verilog code for memstream component. Currently utilized by MVAU, VVAU and HLS Thresholding layer.""" ops = ["MVAU_hls", "MVAU_rtl", "VVAU_hls", "VVAU_rtl", "Thresholding_hls"] - if self.onnx_node.op_type in ops or self.onnx_node.op_type.startswith("Elementwise"): + if self.onnx_node.op_type in ops: template_path = ( os.environ["FINN_ROOT"] + "/finn-rtllib/memstream/hdl/memstream_wrapper_template.v" ) @@ -374,21 +555,28 @@ def generate_hdl_dynload(self): ) as f: f.write(template_wrapper) - def derive_characteristic_fxns(self, period, override_rtlsim_dict=None): - """Return the unconstrained characteristic functions for this node.""" + def derive_token_access_vectors_using_rtlsim( + self, model, period, fpga_part, clk_period, override_rtlsim_dict=None + ): + """Return the token access vectors for this node using rtlsim.""" # ensure rtlsim is ready + + periods_to_simulate = 5 + periods_to_store = 2 + + if self.get_nodeattr("rtlsim_so") == "": + self.prepare_rtlsim() + assert self.get_nodeattr("rtlsim_so") != "", "rtlsim not ready for " + self.onnx_node.name - if self.get_nodeattr("io_chrc_period") > 0: - warnings.warn("Skipping node %s: already has FIFO characteristic" % self.onnx_node.name) - return - exp_cycles = self.get_exp_cycles() - n_inps = np.prod(self.get_folded_input_shape()[:-1]) - n_outs = np.prod(self.get_folded_output_shape()[:-1]) + + exp_cycles = (self.get_exp_cycles() + 20) * periods_to_simulate + n_inps = np.prod(self.get_folded_input_shape()[:-1]) * periods_to_simulate + n_outs = np.prod(self.get_folded_output_shape()[:-1]) * periods_to_simulate if exp_cycles == 0: # try to come up with an optimistic estimate exp_cycles = min(n_inps, n_outs) assert ( - exp_cycles <= period + exp_cycles <= period * periods_to_simulate ), "Period %d too short to characterize %s : expects min %d cycles" % ( period, self.onnx_node.name, @@ -397,6 +585,10 @@ def derive_characteristic_fxns(self, period, override_rtlsim_dict=None): sim = self.get_rtlsim() if override_rtlsim_dict is not None: io_dict = override_rtlsim_dict + + for input_key in io_dict["inputs"]: + io_dict["inputs"][input_key] = io_dict["inputs"][input_key] * periods_to_simulate + else: io_dict = { "inputs": { @@ -407,25 +599,23 @@ def derive_characteristic_fxns(self, period, override_rtlsim_dict=None): # extra dicts to keep track of cycle-by-cycle transaction behavior # note that we restrict key names to filter out weight streams etc - txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} - txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in0" in key} + txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out0" in key} # signal name, note no underscore at the end (new finnxsi behavior) sname = "_V" self.reset_rtlsim(sim) + # create stream tracers for all input and output streams for k in txns_in.keys(): txns_in[k] = sim.trace_stream(k + sname) for k in txns_out.keys(): txns_out[k] = sim.trace_stream(k + sname) - self.rtlsim_multi_io(sim, io_dict) + + self.rtlsim_multi_io_custom(sim, io_dict, sname="_V", batch_size=periods_to_simulate) + total_cycle_count = self.get_nodeattr("cycles_rtlsim") - assert ( - total_cycle_count <= period - ), """Total cycle count from rtl simulation is higher than - specified period, please set the period higher than {}""".format( - total_cycle_count - ) - self.set_nodeattr("io_chrc_period", period) + + self.set_nodeattr("io_chrc_period", total_cycle_count) # call str() on stream tracers to get their outputs, and convert # to list of ints for k in txns_in.keys(): @@ -433,27 +623,33 @@ def derive_characteristic_fxns(self, period, override_rtlsim_dict=None): for k in txns_out.keys(): txns_out[k] = [int(c) for c in str(txns_out[k])] - def accumulate_char_fxn(chrc): - p = len(chrc) + period = total_cycle_count // periods_to_simulate + + def accumulate_char_fxn(chrc, period_to_simulate, periods_to_store, period): + mid_point = period * 2 ret = [] - for t in range(2 * p): - if t == 0: - ret.append(chrc[0]) + for t in range( + mid_point, mid_point + period * 2 + ): # *2 when running 1 sim and replicating + if t == mid_point: + ret.append(chrc[t]) else: - ret.append(ret[-1] + chrc[t % p]) + ret.append(ret[-1] + chrc[t]) return np.asarray(ret, dtype=np.int32) - all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) - all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) + all_txns_in = np.empty((len(txns_in.keys()), period * periods_to_store), dtype=np.int32) + all_txns_out = np.empty((len(txns_out.keys()), period * periods_to_store), dtype=np.int32) all_pad_in = [] all_pad_out = [] + pad_in = 0 + pad_out = 0 for in_idx, in_strm_nm in enumerate(txns_in.keys()): txn_in = txns_in[in_strm_nm] pad_in = 0 if len(txn_in) < period: pad_in = period - len(txn_in) txn_in += [0 for x in range(pad_in)] - txn_in = accumulate_char_fxn(txn_in) + txn_in = accumulate_char_fxn(txn_in, periods_to_simulate, periods_to_store, period) all_txns_in[in_idx, :] = txn_in all_pad_in.append(pad_in) @@ -463,11 +659,14 @@ def accumulate_char_fxn(chrc): if len(txn_out) < period: pad_out = period - len(txn_out) txn_out += [0 for x in range(pad_out)] - txn_out = accumulate_char_fxn(txn_out) + txn_out = accumulate_char_fxn(txn_out, periods_to_simulate, periods_to_store, period) all_txns_out[out_idx, :] = txn_out all_pad_out.append(pad_out) - self.set_nodeattr("io_chrc_in", all_txns_in) - self.set_nodeattr("io_chrc_out", all_txns_out) - self.set_nodeattr("io_chrc_pads_in", all_pad_in) - self.set_nodeattr("io_chrc_pads_out", all_pad_out) + compressed_np_array_in = compress_numpy_to_string(all_txns_in) + self.set_nodeattr("io_chrc_in", compressed_np_array_in) + self.set_nodeattr("io_chrc_in_original", compressed_np_array_in) + + compressed_np_array_out = compress_numpy_to_string(all_txns_out) + self.set_nodeattr("io_chrc_out", compressed_np_array_out) + self.set_nodeattr("io_chrc_out_original", compressed_np_array_out) diff --git a/src/finn/custom_op/fpgadataflow/labelselect.py b/src/finn/custom_op/fpgadataflow/labelselect.py index f925b51652..cb9339b78c 100644 --- a/src/finn/custom_op/fpgadataflow/labelselect.py +++ b/src/finn/custom_op/fpgadataflow/labelselect.py @@ -32,6 +32,7 @@ from qonnx.util.basic import qonnx_make_model, roundup_to_integer_multiple from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp +from finn.util.basic import Characteristic_Node class LabelSelect(HWCustomOp): @@ -95,6 +96,21 @@ def get_folded_output_shape(self, ind=0): oshape = tuple(vecs + [k, 1]) return oshape + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape() + oshape = self.get_normal_output_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpected input shape." + return helper.make_node( + "RandomNormal", + inputs=[], + outputs=[self.onnx_node.output[0]], + mean=0.0, + scale=1.0, + dtype=TensorProto.INT64, + shape=list(oshape), + ) + def infer_node_datatype(self, model): node = self.onnx_node # check input datatype against property @@ -104,6 +120,9 @@ def infer_node_datatype(self, model): odt = self.get_output_datatype() model.set_tensor_datatype(self.onnx_node.output[0], odt) + def verify_node(self): + pass + def get_input_datatype(self, ind=0): """Returns FINN DataType of input.""" ret = DataType[self.get_nodeattr("inputDataType")] @@ -161,5 +180,37 @@ def execute_node(self, context, graph): def get_exp_cycles(self): nlabels = self.get_nodeattr("Labels") pe = self.get_nodeattr("PE") - exp_cycles = nlabels / pe + K = self.get_nodeattr("K") + exp_cycles = nlabels // pe + K return int(exp_cycles) + + def get_tree_model(self): + # key parameters + # this depends on the kernel type, hls or rtl etc + + # extract node attr + num_in_words = self.get_nodeattr("Labels") + PE = self.get_nodeattr("PE") + # PE = 1 + K = self.get_nodeattr("K") + + NF = num_in_words // PE + + output_delay = int(np.log2(num_in_words)) + 1 + # output_delay = NF + + print("num_in_words,PE,K,NF,output_delay") + print(num_in_words, PE, K, NF, output_delay) + print(f"exp cycles: {self.get_exp_cycles()}") + + read_k = Characteristic_Node("read only", [(NF, [1, 0])], True) + + compute_k = Characteristic_Node("compute k", [(output_delay, [0, 0])], True) + + write_k = Characteristic_Node("write k", [(K, [0, 1])], True) + + labelselect_top = Characteristic_Node( + "Fill feature map", [(1, read_k), (1, compute_k), (1, write_k)], False + ) + + return labelselect_top # top level phase of this node diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index 08d88ac069..2ece81c4a3 100644 --- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -41,6 +41,7 @@ ) from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp +from finn.util.basic import Characteristic_Node from finn.util.data_packing import numpy_to_hls_code, pack_innermost_dim_as_hex_string # ONNX i/o tensor shape assumptions for MatrixVectorActivation: @@ -467,6 +468,7 @@ def get_exp_cycles(self): mw = self.get_nodeattr("MW") # since mmv != 1 is not supported yet, we set mmv for now to 1 mmv = 1 + exp_cycles = (mh / pe) * (mw / simd) * np.prod(num_inp_vec) / mmv return int(exp_cycles) @@ -882,21 +884,6 @@ def get_op_and_param_counts(self): ret_dict[thres_param_type] = thres_count return ret_dict - def derive_characteristic_fxns(self, period): - n_inps = np.prod(self.get_folded_input_shape()[:-1]) - io_dict = { - "inputs": { - "in0": [0 for i in range(n_inps)], - }, - "outputs": {"out0": []}, - } - mem_mode = self.get_nodeattr("mem_mode") - if mem_mode in ["internal_decoupled", "external"]: - n_weight_inps = self.calc_wmem() - num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) - io_dict["inputs"]["in1"] = [0 for i in range(num_w_reps * n_weight_inps)] - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) - def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() try: @@ -1107,3 +1094,73 @@ def code_generation_ipi(self): else: raise Exception("Unrecognized mem_mode for MatrixVectorActivation") return cmd + + def get_tree_model(self): + MW = self.get_nodeattr("MW") + MH = self.get_nodeattr("MH") + + SIMD = self.get_nodeattr("SIMD") + PE = self.get_nodeattr("PE") + numVectors = np.prod(self.get_nodeattr("numInputVectors")) + SF = int(MW / SIMD) + NF = int(MH / PE) + + IMPL_STYLE = "rtl" if "_rtl" in (self.__class__.__name__) else "hls" + assert IMPL_STYLE in ["rtl", "hls"], "Implementation style must be 'rtl' or 'hls'" + + # additional precision which is typically unnecessary for FIFO size modelling + # if IMPL_STYLE == "hls": + # output_delay = 0 # cycles before output starts + # writing when input is read. Typically 2 + # wind_up = 0 # about 3 cycles of wind-up for HLS MVAU + # else: + # # RTL implementation + # output_delay = 0 + wind_up = 0 + + idle = Characteristic_Node("idle cycles", [(1, [0, 0])], True) + read = Characteristic_Node("Read a burst of input", [(1, [1, 0])], True) + write = Characteristic_Node("update output", [(1, [0, 1])], True) + read_and_write = Characteristic_Node("update output", [(1, [1, 1])], True) + + write_PE = Characteristic_Node( + "iterate MW/SIMD and update an output", + [ + (SF - 1, idle), + (1, write), + ], + False, + ) + + feature_map = Characteristic_Node( + "Compute single feature map", + [(wind_up, idle), (SF - 1, read), (0, idle), (1, read_and_write), (NF - 1, write_PE)], + False, + ) + + all_feature_maps = Characteristic_Node( + "compute set of feature maps", [(1, idle), (numVectors, feature_map)], False + ) + + return all_feature_maps + + def derive_token_access_vectors( + self, model, period, strategy, fpga_part, clk_period, op_type, override_dict=None + ): + n_inps = np.prod(self.get_folded_input_shape()[:-1]) + io_dict = { + "inputs": { + "in0": [i for i in range(n_inps)], + }, + "outputs": {"out0": []}, + } + + mem_mode = self.get_nodeattr("mem_mode") + if mem_mode in ["internal_decoupled", "external"]: + n_weight_inps = self.calc_wmem() + num_w_reps = int(np.prod(self.get_nodeattr("numInputVectors"))) + io_dict["inputs"]["in1"] = [i for i in range(num_w_reps * n_weight_inps)] + + super().derive_token_access_vectors( + model, period, strategy, fpga_part, clk_period, op_type, override_dict=io_dict + ) diff --git a/src/finn/custom_op/fpgadataflow/pool.py b/src/finn/custom_op/fpgadataflow/pool.py index 4a1013af05..a72a55c2b4 100644 --- a/src/finn/custom_op/fpgadataflow/pool.py +++ b/src/finn/custom_op/fpgadataflow/pool.py @@ -30,6 +30,7 @@ from qonnx.core.datatype import DataType from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp +from finn.util.basic import Characteristic_Node class Pool(HWCustomOp): @@ -211,3 +212,50 @@ def execute_node(self, context, graph): result = np.right_shift(result.astype(int), shift_bits) oshape = context[node.output[0]].shape context[node.output[0]] = np.asarray(result, dtype=np.float32).reshape(oshape) + + def get_tree_model(self): + # extract node attr + + PE = self.get_nodeattr("PE") + Channels = self.get_nodeattr("Channels") + KernelSize = self.get_nodeattr("KernelSize") + OutImgDims = self.get_nodeattr("OutImgDims") + BatchSize = self.get_nodeattr("BatchSize") + + # Derived parameters + NF = Channels // PE # neuron folding + func = self.get_nodeattr("Function") + if func == "MaxPool": + SF = KernelSize[1] ** 2 # spatial folding per pooling window + if KernelSize[0] == 1 or KernelSize[1] == 1: + if KernelSize[0] == 1: + SF = KernelSize[1] ** 2 + else: + SF = KernelSize[0] ** 2 + SF = np.prod(KernelSize) + reps = BatchSize * np.prod(OutImgDims) # number of pooling windows to process + else: + SF = np.prod(KernelSize) # spatial folding per pooling window + reps = BatchSize * np.prod(OutImgDims) # number of pooling windows to process + + # One input read per SF iteration + read_pooling_input = Characteristic_Node("Read Pool Input", [(1, [1, 0])], True) + + readwrite_pooling_input = Characteristic_Node("Read Write Pool Input", [(1, [1, 1])], True) + + # SF - 1 reads + 1 read that overlaps with write + compute_pool_window = Characteristic_Node( + "Compute Pool Window", + [(SF - 1, read_pooling_input), (1, readwrite_pooling_input)], # overlap with output + False, + ) + + # For each NF tile per pooling window + compute_all_tiles = Characteristic_Node( + "Compute All Tiles", [(NF, compute_pool_window)], False + ) + + # For each image region (spatial + batch) + pool_top = Characteristic_Node("Top Pool Loop", [(reps, compute_all_tiles)], False) + + return pool_top diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter.py index 8fcbae5fcc..4582cb22cf 100644 --- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter.py +++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter.py @@ -32,6 +32,7 @@ from qonnx.core.datatype import DataType from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp +from finn.util.basic import Characteristic_Node # does not do anything at the ONNX node-by-node level, and input-output # tensor shapes are the same. performs data width conversion at the rtlsim level @@ -125,6 +126,14 @@ def get_folded_output_shape(self, ind=0): return dummy_t.shape + def get_number_input_values(self): + folded_ishape = self.get_folded_input_shape() + return np.prod(folded_ishape[:-1]) + + def get_number_output_values(self): + folded_oshape = self.get_folded_output_shape() + return np.prod(folded_oshape[:-1]) + def get_instream_width(self, ind=0): in_width = self.get_nodeattr("inWidth") return in_width @@ -175,6 +184,9 @@ def execute_node(self, context, graph): output = np.asarray([output], dtype=np.float32).reshape(*exp_shape) context[node.output[0]] = output + def get_exp_cycles(self): + return np.prod(self.get_folded_input_shape()) + np.prod(self.get_folded_output_shape()) + def lut_estimation(self): """Calculates resource estimations for LUTs""" inw = self.get_instream_width() @@ -203,3 +215,78 @@ def lut_estimation(self): cset_luts += outw return int(cnt_luts + cset_luts) + + def get_tree_model(self): + inWidth = self.get_nodeattr("inWidth") + outWidth = self.get_nodeattr("outWidth") + + wind_up = 0 + + idle = Characteristic_Node("idle", [(1, [0, 0])], True) + + if inWidth > outWidth: + numReps = self.get_number_input_values() + # down-conversion + if inWidth % outWidth != 0: + return None # no support for gcd partial conversion yet + + writes_per_read = inWidth // outWidth + # read 1, write many, repeats for in-word count + + read_input = Characteristic_Node("read 1 word", [(1, [1, 1])], True) + + write_output = Characteristic_Node("write words", [(writes_per_read - 1, [0, 1])], True) + + down_convert_word = Characteristic_Node( + "down convert all words in a single transaction", + [(1, read_input), (1, write_output)], + False, + ) + + dwc_top = Characteristic_Node( + "compute a set of DWCs with down conversion", + [(wind_up, idle), (numReps, down_convert_word)], + False, + ) + + elif inWidth < outWidth: + numReps = self.get_number_output_values() + # up-conversion + + if outWidth % inWidth != 0: + return None # no support for gcd partial conversion yet + + reads_per_write = outWidth // inWidth + # read 1, write many, repeats for in-word count + + read_input = Characteristic_Node( + "read first N-1 words", [(reads_per_write - 1, [1, 0])], True + ) + + write_output = Characteristic_Node( + "read Nth word and write output word", [(1, [1, 1])], True + ) + + up_convert_word = Characteristic_Node( + "down convert all words in a single transaction", + [(1, read_input), (1, write_output)], + False, + ) + + dwc_top = Characteristic_Node( + "compute a set of DWCs with up conversion", + [(wind_up, idle), (numReps, up_convert_word)], + False, + ) + + else: + # pass-through + numReps = self.get_number_input_values() + + pass_through = Characteristic_Node("pass-through", [(1, [1, 1])], True) + + dwc_top = Characteristic_Node( + "DWC pass-through, no conversion", [(wind_up, idle), (numReps, pass_through)], False + ) + + return dwc_top diff --git a/src/finn/custom_op/fpgadataflow/thresholding.py b/src/finn/custom_op/fpgadataflow/thresholding.py index 93871b4e11..70bd1a81cb 100644 --- a/src/finn/custom_op/fpgadataflow/thresholding.py +++ b/src/finn/custom_op/fpgadataflow/thresholding.py @@ -33,6 +33,7 @@ from qonnx.util.basic import interleave_matrix_outer_dim_from_partitions from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp +from finn.util.basic import Characteristic_Node class Thresholding(HWCustomOp): @@ -271,3 +272,50 @@ def calc_tmem(self): num_channels = self.get_nodeattr("NumChannels") pe = self.get_nodeattr("PE") return num_channels // pe + + def get_tree_model(self): + reps = list(self.get_nodeattr("numInputVectors"))[0] + + NumChannels = self.get_nodeattr("NumChannels") + PE = self.get_nodeattr("PE") + ImgDim = np.prod(list(self.get_nodeattr("numInputVectors"))) // reps + + act = DataType[self.get_nodeattr("outputDataType")] + IMPL_STYLE = "rtl" if "_rtl" in (self.__class__.__name__) else "hls" + assert IMPL_STYLE in ["rtl", "hls"], "Implementation style must be 'rtl' or 'hls'" + + NF = NumChannels // PE + total_iterations = ImgDim * NF + + if IMPL_STYLE == "hls": + output_delay = 0 # 4 if 2023.1 vivado + else: + if act == DataType["BIPOLAR"]: + output_delay = 0 # 4 if 2023.1 vivado + else: + output_delay = 0 + + if total_iterations > output_delay: + read = Characteristic_Node("read", [(output_delay, [1, 0])], True) + + read_write = Characteristic_Node( + "Compute", [(total_iterations - output_delay, [1, 1])], True + ) + + write = Characteristic_Node("write", [(output_delay, [0, 1])], True) + + threshold_top = Characteristic_Node( + "Thresholding Top", [(1, read), (1, read_write), (1, write)], False + ) + + else: + read = Characteristic_Node("Rush-in", [(total_iterations, [1, 0])], True) + idle = Characteristic_Node("Idle", [(output_delay - total_iterations, [0, 0])], True) + + write = Characteristic_Node("Compute", [(total_iterations, [0, 1])], True) + + threshold_top = Characteristic_Node( + "Thresholding Top", [(1, read), (1, idle), (1, write)], False + ) + + return threshold_top # top level phase of this node diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py index 965fad66e1..5799ba49b2 100644 --- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py @@ -41,6 +41,7 @@ ) from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp +from finn.util.basic import Characteristic_Node from finn.util.data_packing import numpy_to_hls_code, pack_innermost_dim_as_hex_string @@ -779,21 +780,6 @@ def get_op_and_param_counts(self): ret_dict[thres_param_type] = thres_count return ret_dict - def derive_characteristic_fxns(self, period): - n_inps = np.prod(self.get_folded_input_shape()[:-1]) - io_dict = { - "inputs": { - "in0": [0 for i in range(n_inps)], - }, - "outputs": {"out0": []}, - } - mem_mode = self.get_nodeattr("mem_mode") - if mem_mode in ["internal_decoupled", "external"]: - n_weight_inps = self.calc_wmem() - num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) - io_dict["inputs"]["in1"] = [0 for i in range(num_w_reps * n_weight_inps)] - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) - def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() mem_mode = self.get_nodeattr("mem_mode") @@ -913,3 +899,79 @@ def code_generation_ipi(self): else: raise Exception("Unrecognized mem_mode for VectorVectorActivation") return cmd + + def get_tree_model(self): + # key parameters + IMPL_STYLE = "rtl" if "_rtl" in (self.__class__.__name__) else "hls" + assert IMPL_STYLE in ["rtl", "hls"], "Implementation style must be 'rtl' or 'hls'" + + SIMD = self.get_nodeattr("SIMD") + PE = self.get_nodeattr("PE") + Channels = self.get_nodeattr("Channels") + Kernel_2 = np.prod(self.get_nodeattr("Kernel")) + NF = int(Channels / PE) + numReps = np.prod(self.get_nodeattr("Dim")) + dim_h, dim_w = self.get_nodeattr("Dim") + + if IMPL_STYLE == "rtl": + SF = Kernel_2 // SIMD + # wind_up = 5 + else: + SF = Kernel_2 // SIMD + # wind_up = 7 + + # INNER = TOTAL_FOLD // SF + + # wind_up_stage = Characteristic_Node( + # "write only", + # [(wind_up, [1,0])], + # True) + + # the windup stage should also exist and delay the outputs + # this requires the same pattern of limiting SF and is probably best done as a correction + # after the feature map? + # alternative is to construct a split of first, middle and last sf, + # with the first having a longer read phase (sf+windup-1) and the last (sf-windup-1) + + write_out = Characteristic_Node("write out simd (1 for hls)", [(1, [1, 1])], True) + + compute_one_sf = Characteristic_Node("read one SF input", [(1, [1, 0])], True) + + compute_sf = Characteristic_Node( + "process SF-1 inputs", [(SF - 1, compute_one_sf), (1, write_out)], False + ) + + compute_transaction = Characteristic_Node( + "Compute VVAU one transaction", + [ + (NF, compute_sf), + ], + False, + ) + + vvau_top = Characteristic_Node( + "Compute VVAU input set", [(numReps, compute_transaction)], False + ) + + return vvau_top # top level phase of this node + + def derive_token_access_vectors( + self, model, period, strategy, fpga_part, clk_period, op_type, override_dict=None + ): + n_inps = np.prod(self.get_folded_input_shape()[:-1]) + io_dict = { + "inputs": { + "in0": [i for i in range(n_inps)], + }, + "outputs": {"out0": []}, + } + + mem_mode = self.get_nodeattr("mem_mode") + if mem_mode in ["internal_decoupled", "external"]: + # n_weight_inps = self.calc_wmem() + # num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) + io_dict["inputs"]["in1"] = [0 for i in range(1 * n_inps)] + + super().derive_token_access_vectors( + model, period, strategy, fpga_part, clk_period, op_type, override_dict=io_dict + ) diff --git a/src/finn/transformation/fpgadataflow/derive_characteristic.py b/src/finn/transformation/fpgadataflow/derive_characteristic.py index 4d3ac7dc67..0cf5a75beb 100644 --- a/src/finn/transformation/fpgadataflow/derive_characteristic.py +++ b/src/finn/transformation/fpgadataflow/derive_characteristic.py @@ -28,15 +28,112 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import numpy as np +import os import qonnx.custom_op.registry as registry import warnings from qonnx.core.modelwrapper import ModelWrapper -from qonnx.transformation.base import NodeLocalTransformation +from qonnx.transformation.base import NodeLocalTransformation, Transformation +from finn.transformation.fpgadataflow.prepare_ip import _codegen_single_node +from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( + ReplaceVerilogRelPaths, +) +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.util.basic import ( + compress_numpy_to_string, + decompress_string_to_numpy, + stretch, +) from finn.util.fpgadataflow import is_hls_node, is_rtl_node -class DeriveCharacteristic(NodeLocalTransformation): +class JustInTimeSynthesize(Transformation): + def __init__(self, part, clk_period, only_without_tree_model=False): + super().__init__() + self.part = part + self.clk_period = clk_period + self.only_without_tree_model = only_without_tree_model + + def apply(self, model): + for node in model.graph.node: + inst = registry.getCustomOp(node) + if (is_hls_node(node) or is_rtl_node(node)) and ( + ( + (inst.get_tree_model() is None and self.only_without_tree_model) + or not self.only_without_tree_model + ) + and (inst.get_nodeattr("io_chrc_in") == "") + ): + _codegen_single_node( + node, + model, + self.part, + self.clk_period, + ) + + op_type = node.op_type + if is_hls_node(node): + try: + # ensure that code is generated + assert ( + inst.get_nodeattr("code_gen_dir_ipgen") != "" + ), """Node + attribute "code_gen_dir_ipgen" is empty. Please run + transformation PrepareIP first.""" + if not os.path.isdir( + inst.get_nodeattr("ipgen_path") + ) or not inst.get_nodeattr("code_gen_dir_ipgen") in inst.get_nodeattr( + "ipgen_path" + ): + # call the compilation function for this node + inst.ipgen_singlenode_code() + else: + warnings.warn("Using pre-existing IP for %s" % node.name) + # ensure that executable path is now set + assert ( + inst.get_nodeattr("ipgen_path") != "" + ), """Transformation + HLSSynthIP was not successful. Node attribute "ipgen_path" + is empty.""" + except KeyError: + raise Exception("Custom op_type %s is currently not supported." % op_type) + + model = model.transform(ReplaceVerilogRelPaths()) + for node in model.graph.node: + inst = registry.getCustomOp(node) + if ( + (is_hls_node(node) or is_rtl_node(node)) + and ( + (inst.get_tree_model() is None and self.only_without_tree_model) + or not self.only_without_tree_model + ) + and ( + node.op_type + not in [ + "AddStreams_hls", + "DuplicateStreams_hls", + "StreamingFIFO_hls", + "StreamingFIFO_rtl", + ] + ) + and (inst.get_nodeattr("rtlsim_so") == "") + ): + try: + inst.prepare_rtlsim() + # ensure that executable path is now set + assert ( + inst.get_nodeattr("rtlsim_so") != "" + ), "Failed to prepare RTLSim, no rtlsim_so attribute found." + except KeyError: + raise Exception("Custom op_type %s is currently not supported." % op_type) + + model = model.transform(SetExecMode("rtlsim")) + + return (model, False) + + +class DeriveTokenAccessVectors(NodeLocalTransformation): """For each node in the graph, run rtlsim to obtain the i/o characteristic function for FIFO sizing and set the attribute. It is assumed that the PrepareRTLSim transformation was already @@ -52,18 +149,51 @@ class DeriveCharacteristic(NodeLocalTransformation): NodeLocalTransformation for more details. """ - def __init__(self, period, num_workers=None, manual_bypass=False): + def __init__( + self, + model, + period, + strategy, + fpga_part, + clk_period, + num_workers=None, + manual_bypass=False, + nodes_to_ignore=[], + ): super().__init__(num_workers=num_workers) + self.model = model self.period = period + self.strategy = strategy + self.fpga_part = fpga_part + self.clk_period = clk_period self.manual_bypass = manual_bypass + self.nodes_to_ignore = set(nodes_to_ignore) def applyNodeLocal(self, node): op_type = node.op_type if is_hls_node(node) or is_rtl_node(node): try: # lookup op_type in registry of CustomOps + print("deriving: ", node.name) inst = registry.getCustomOp(node) - inst.derive_characteristic_fxns(period=self.period) + if node.name in self.nodes_to_ignore: + print(f"ignoring derivation of node {node.name}") + return (node, False) + + if op_type not in [ + "AddStreams_hls", + "DuplicateStreams_hls", + "StreamingFIFO_hls", + "StreamingFIFO_rtl", + ]: + inst.derive_token_access_vectors( + model=self.model, + period=self.period, + strategy=self.strategy, + fpga_part=self.fpga_part, + clk_period=self.clk_period, + op_type=op_type, + ) except KeyError: # exception if op_type is not supported raise Exception("Custom op_type %s is currently not supported." % op_type) @@ -73,114 +203,1053 @@ def apply(self, model: ModelWrapper): (model, run_again) = super().apply(model) if not self.manual_bypass: return (model, run_again) - # apply manual fix for DuplicateStreams and AddStreams for - # simple residual reconvergent paths with bypass + + return (model, run_again) + + +class LocalStretchCharacteristicFunctions(NodeLocalTransformation): + """Prerequisite: DeriveTokenAccessVectors already called on graph. + For each node in the graph, use the accumulated I/O characteristic function + and stretch it if there is a difference in periods between the producer and consumer. + + * num_workers (int or None) number of parallel workers, see documentation in + NodeLocalTransformation for more details. + period (int or None) the period to stretch the individual node chr function dumps to. + """ + + def __init__(self, num_workers=None, period=None, nodes_to_ignore=[]): + super().__init__(num_workers=num_workers) + self.period = period + self.nodes_to_ignore = set(nodes_to_ignore) + + def applyNodeLocal(self, node): + op_type = node.op_type + if is_hls_node(node) or is_rtl_node(node): + try: + if node.name in self.nodes_to_ignore or node.op_type in [ + "AddStreams_hls", + "DuplicateStreams_hls", + "StreamingFIFO_hls", + "StreamingFIFO_rtl", + ]: + return (node, False) + + # model = self.ref_input_model + + # lookup op_type in registry of CustomOps + prod = registry.getCustomOp(node) + + prod_chrc_out_original = decompress_string_to_numpy( + prod.get_nodeattr("io_chrc_out") + )[0] + prod_chrc_in_original = decompress_string_to_numpy(prod.get_nodeattr("io_chrc_in"))[ + 0 + ] + + prod_chrc_out = prod_chrc_out_original + prod_chrc_in = prod_chrc_in_original + + compressed_prod_chrc_out = compress_numpy_to_string(np.array([prod_chrc_out])) + compressed_prod_chrc_in = compress_numpy_to_string(np.array([prod_chrc_in])) + + period = max(len(prod_chrc_in), len(prod_chrc_out)) + + #period = self.period + + # perform stretching if necessary + prod_chrc_in = stretch(prod_chrc_in, period) + prod_chrc_out = stretch(prod_chrc_out, period) + + compressed_prod_chrc_in = compress_numpy_to_string(np.array([prod_chrc_in])) + compressed_prod_chrc_out = compress_numpy_to_string(np.array([prod_chrc_out])) + + # prod.set_nodeattr("io_chrc_in", compressed_prod_chrc_in) + # prod.set_nodeattr("io_chrc_out", compressed_prod_chrc_out) + except KeyError: + # exception if op_type is not supported + raise Exception("Custom op_type %s is currently not supported." % op_type) + return (node, False) + + +def get_top_producer_period(node, model): + highest_period = 0 + for indx, input_name in enumerate(node.input): + #prod_node = model.find_producer(input_name) + prod_node = find_non_dwc_producer(model,node) + + if prod_node is not None: + prod_chrc = decompress_string_to_numpy( + registry.getCustomOp(prod_node).get_nodeattr("io_chrc_out") + )[0] + cons_chrc = decompress_string_to_numpy( + registry.getCustomOp(prod_node).get_nodeattr("io_chrc_in") + )[0] + period = max(len(prod_chrc) // 2, len(cons_chrc) // 2) + highest_period = max(period, highest_period) + return highest_period, prod_node + + +def get_top_consumer_period(node, model): + highest_period = 0 + for indx, output_name in enumerate(node.output): + #prod_node = model.find_consumer(output_name) + prod_node = find_non_dwc_consumer(model,node) + + if prod_node is not None: + prod_chrc = decompress_string_to_numpy( + registry.getCustomOp(prod_node).get_nodeattr("io_chrc_out") + )[0] + cons_chrc = decompress_string_to_numpy( + registry.getCustomOp(prod_node).get_nodeattr("io_chrc_in") + )[0] + period = max(len(prod_chrc) // 2, len(cons_chrc) // 2) + highest_period = max(period, highest_period) + return highest_period, prod_node + + +def max_throughput(trace, max_depth=10, min_size=10): + """ + Recursively find the maximum throughput (delta / time) from a cumulative trace. + + Parameters: + trace (np.ndarray): 1D cumulative access trace. + max_depth (int): maximum depth of recursive splitting. + min_size (int): minimum size of segment allowed for consideration. + + Returns: + float: maximum throughput found in any segment. + """ + segments = [(0, len(trace) - 1)] + best_throughput = 0.0 + + for _ in range(max_depth): + max_local_throughput = 0 + max_segment = None + + # Evaluate current segments + for start, end in segments: + duration = end - start + if duration < min_size: + continue + delta = trace[end] - trace[start] + throughput = delta / duration + if throughput > max_local_throughput: + max_local_throughput = throughput + max_segment = (start, end) + + if max_segment is None: + break + + best_throughput = max(best_throughput, max_local_throughput) + + # Subdivide the fastest segment if large enough + start, end = max_segment + mid = (start + end) // 2 + if (mid - start) < min_size or (end - mid) < min_size: + break + + segments = [s for s in segments if s != max_segment] + segments += [(start, mid), (mid, end)] + + return best_throughput + + +def get_nodes_until_converging(node, model): + # init_node = node + count = 0 + while node is not None: + if node.name.startswith("DuplicateStreams"): + return count + node = model.find_producer(node.input[0]) + count += 1 + return count + + +def get_throughput(node, dir="in"): + # calculate all budgets for nodes faster than the global period + + trace = None + throughput = 0 + inst = registry.getCustomOp(node) + if inst.get_nodeattr(f"io_chrc_{dir}_stretch") != "": + trace = decompress_string_to_numpy(inst.get_nodeattr(f"io_chrc_{dir}_stretch"))[0] + period = len(trace) // 2 + else: + if inst.get_nodeattr(f"io_chrc_{dir}") != "": + trace = decompress_string_to_numpy(inst.get_nodeattr(f"io_chrc_{dir}"))[0] + period = len(trace) // 2 + else: + period = 0 + if period != 0: + # throughput = max_throughput(trace,min_size=int(np.sqrt(period))) + throughput = trace[-1] / inst.get_nodeattr("io_chrc_period") + # throughput = max_throughput(trace,min_size=1000) + return throughput + + +def get_parent_throughput(node, model): + throughputs = [] + for indx, input_name in enumerate(node.input): + prod_node = model.find_producer(input_name) + if prod_node is not None: + throughputs.append(get_throughput(prod_node, "out")) + else: + throughputs.append(0) + return max(throughputs) + + +def get_parent(node, model): + for indx, input_name in enumerate(node.input): + prod_node = model.find_producer(input_name) + if prod_node is not None: + return prod_node + else: + return None + return None + + +def get_consumer(node, model): + for indx, output_name in enumerate(node.output): + cons = model.find_consumer(output_name) + return cons + + +def get_consumer_throughput(node, model): + throughputs = [] + for indx, output_name in enumerate(node.output): + prod_node = model.find_consumer(output_name) + if prod_node is not None: + throughputs.append(get_throughput(prod_node, "in")) + else: + throughputs.append(0) + return max(throughputs) + + +def get_true_period(node): + in_chrc = decompress_string_to_numpy(node.get_nodeattr("io_chrc_in"))[0] + out_chrc = decompress_string_to_numpy(node.get_nodeattr("io_chrc_out"))[0] + + return max(len(in_chrc) // 2, len(out_chrc) // 2) + + +def get_branch_nodes(last_node, model): + branch_nodes = [] + while last_node.op_type != "DuplicateStreams_hls": + branch_nodes.append(last_node) + last_node = model.find_producer(last_node.input[0]) + return branch_nodes, last_node + + +def get_branch_volume(as_node, indx, model): + last_node = model.find_producer(as_node.input[indx]) + branch_nodes, ds_node = get_branch_nodes(last_node, model) + branch = [as_node, *branch_nodes, ds_node] + + # now perform volume calculation based on characteristic functions + # note that the nodes are reversed, we start at addstreams node + volume = 0 + max_i = 0 + max_period = 0 + latency = 0 + for i, node in enumerate(branch[1:]): + volume += 1 # placeholder + period = registry.getCustomOp(node).get_nodeattr("io_chrc_period") + if period > max_period: + max_period = period + max_i = i + + # actual calculation has to consider the exp cycles and total nr of elements. + # maybe maximum amount of values per period? + # we can do this sort of calc by comparing the first consumed token to the + # last produced token in some form. + print("returning vol,max_i,lat: ", volume, max_i, latency) + + return volume, branch, max_i + 1, latency, max_period + +def find_non_dwc_producer(model, node): + producer = model.find_producer(node.input[0]) + if producer is None: + return None + if "StreamingDataWidthConverter" in producer.name: + producer = model.find_producer(producer.input[0]) + return producer + +def find_non_dwc_consumer(model, node): + consumer = model.find_consumer(node.output[0]) + if consumer is None: + return None + if "StreamingDataWidthConverter" in consumer.name: + consumer = model.find_consumer(consumer.output[0]) + return consumer + + +def calculate_peak_volume_delta(b0_lat, node_0, b1_lat, node_1, period_0, period_1, global_period): + n0 = registry.getCustomOp(node_0) + n1 = registry.getCustomOp(node_1) + p0_v = decompress_string_to_numpy(n0.get_nodeattr("io_chrc_out"))[0] + p1_v = decompress_string_to_numpy(n1.get_nodeattr("io_chrc_out"))[0] + + p0_v = stretch(p0_v, global_period) + p1_v = stretch(p1_v, global_period) + + # pad vectors with latency + p0_v = np.concatenate((np.zeros(b0_lat, dtype=p0_v.dtype), p0_v)) + p1_v = np.concatenate((np.zeros(b1_lat, dtype=p1_v.dtype), p1_v)) + + if len(p0_v) > len(p1_v): + # pad p1_v end + last = p1_v[-1] + p1_v = np.concatenate((p1_v, np.array([last] * (len(p0_v) - len(p1_v)), dtype=p1_v.dtype))) + else: + # pad p0_v end + last = p0_v[-1] + p0_v = np.concatenate((p0_v, np.array([last] * (len(p1_v) - len(p0_v)), dtype=p0_v.dtype))) + + p = max(len(p0_v), len(p1_v)) + + max_positive_delta = 0 + max_negative_delta = 0 + peak_b0 = 0 + peak_b1 = 0 + peak_deltas = [0, 0] + + for i in range(p): + delta = p0_v[i] - p1_v[i] + if delta > max_positive_delta: + max_positive_delta = delta + peak_deltas[0] = delta + if delta < max_negative_delta: + max_negative_delta = delta + peak_deltas[1] = delta * -1 + + peak_b0 = max(p0_v[i], peak_b0) + peak_b1 = max(p1_v[i], peak_b1) + + final_fifos = [int(max(0, (b1_lat)) + peak_deltas[1]), int(max(0, (b0_lat)) + peak_deltas[0])] + return final_fifos + + +def compute_node_latency_init_periods(node, branch_max): + cons_chrc = decompress_string_to_numpy(node.get_nodeattr("io_chrc_in"))[0] + prod_chrc = decompress_string_to_numpy(node.get_nodeattr("io_chrc_out"))[0] + + cons_chrc = stretch(cons_chrc, branch_max) + prod_chrc = stretch(prod_chrc, branch_max) + + def max_dist(a, b): + a_last = a[-1] + b_last = b[-1] + + idx_a = np.argmax(a == a_last) + idx_b = np.argmax(b == b_last) + + return abs(idx_a - idx_b) + + max_distance = max_dist(cons_chrc, prod_chrc) + return max_distance + +def get_full_branch_latency(nodes, branch_max): + total_latency = 0 + for node in nodes: + total_latency += compute_node_latency_init_periods(registry.getCustomOp(node), branch_max) + return total_latency + + +def assign_extra_fifo_volume(as_node, model, global_period): + assert len(as_node.input) > 1 + + _, branch_0, _, _, period_0 = get_branch_volume(as_node, 0, model) + _, branch_1, _, _, period_1 = get_branch_volume(as_node, 1, model) + + + # propagate the producer to duplicatestreams node + ds_node = registry.getCustomOp(branch_0[-1]) + prod_node = model.find_producer(branch_0[-1].input[0]) + + period_ds = get_true_period(registry.getCustomOp(prod_node)) + + tav_ds = registry.getCustomOp(prod_node).get_nodeattr("io_chrc_out") + tav_stretched_ds = registry.getCustomOp(prod_node).get_nodeattr("io_chrc_out_stretch") + tav_pad_ds = registry.getCustomOp(prod_node).get_nodeattr("io_chrc_out_original") + ds_node.set_nodeattr("io_chrc_in", tav_ds) + ds_node.set_nodeattr("io_chrc_out", tav_ds) + + ds_node.set_nodeattr("io_chrc_in_original", tav_pad_ds) + ds_node.set_nodeattr("io_chrc_out_original", tav_pad_ds) + + ds_node.set_nodeattr("io_chrc_in_stretch", tav_stretched_ds) + ds_node.set_nodeattr("io_chrc_out_stretch", tav_stretched_ds) + + ds_node.set_nodeattr("io_chrc_period", period_ds) + + # last node with latencies version + latency_to_first_output_0 = get_full_branch_latency(branch_0[1:], period_0) + latency_to_first_output_1 = get_full_branch_latency(branch_1[1:], period_1) + peak_deltas = calculate_peak_volume_delta( + latency_to_first_output_0, + branch_0[1], + latency_to_first_output_1, + branch_1[1], + period_0, + period_1, + global_period, + ) + + # latency_delta = max(latency_0, latency_1) - min(latency_0, latency_1) + # peak delta should also contain additional fifos + # for any latency differences between nodes + # here we take the sum input to output latency + # of each node in a branch and take the + # last node's volume at that clock + # This is a severe over-estimation to improve in the future + + addstrm_node_inst = registry.getCustomOp(as_node) + + add_strm_child = get_consumer(as_node, model) + volumes = [0, 0] + + + volumes[0] = peak_deltas[1] + volumes[1] = peak_deltas[0] + + print([volumes[0], volumes[1]]) + ds_node.set_nodeattr("extra_branch_fifos", volumes) + + old_sizes = ds_node.get_nodeattr("outFIFODepths") + old_sizes[0] += volumes[0] + old_sizes[1] += volumes[1] + ds_node.set_nodeattr("outFIFODepths", old_sizes) + + + + tav = registry.getCustomOp(add_strm_child).get_nodeattr("io_chrc_in") + tav_pad = registry.getCustomOp(add_strm_child).get_nodeattr("io_chrc_in_original") + + + + period_add = get_true_period(registry.getCustomOp(add_strm_child)) + + addstrm_node_inst.set_nodeattr("io_chrc_in", tav) + addstrm_node_inst.set_nodeattr("io_chrc_out", tav) + + addstrm_node_inst.set_nodeattr("io_chrc_out_original", tav_pad) + addstrm_node_inst.set_nodeattr("io_chrc_in_original", tav_pad) + + addstrm_node_inst.set_nodeattr("io_chrc_period", period_add) + return sum(volumes) + + +class HandleBranches(Transformation): + """Given a characterized model, additionally generate the token + access vectors for DuplicateStreams and AddStreams such that no + deadlocks occur. These nodes were not characterized in the + DeriveTokenAccessVectors step and must inherit the edge node + token access vectors of the faster of the two branches'. + The inherited token access vector is also further padded in this + case to simulate additional stalling on the faster branch. + We expect the stretching operation afterwards to stretch the + faster branch 'less' due to this padding, thus introducing FIFO + depth during the DeriveFIFOSizes transform + """ + + def __init__(self, model, period): + super().__init__() + self.model = model + self.period = period + + def apply(self, model: ModelWrapper): + depth_added = 0 addstrm_nodes = model.get_nodes_by_op_type("AddStreams_hls") + if len(addstrm_nodes) == 0: + warnings.warn("No AddStreams nodes found, skipping") + return (model, False) + for addstrm_node in addstrm_nodes: - # we currently only support the case where one branch is - # a bypass - b0 = model.find_producer(addstrm_node.input[0]) - b1 = model.find_producer(addstrm_node.input[1]) - if (b0 is None) or (b1 is None): - warnings.warn("Found unsupported AddStreams, skipping") - return (model, run_again) - b0_is_bypass = b0.op_type == "DuplicateStreams_hls" - b1_is_bypass = b1.op_type == "DuplicateStreams_hls" - if (not b0_is_bypass) and (not b1_is_bypass): - warnings.warn("Found unsupported AddStreams, skipping") - return (model, run_again) - ds_node = b0 if b0_is_bypass else b1 - comp_branch_last = b1 if b0_is_bypass else b0 - - ds_comp_bout = ds_node.output[0] if b0_is_bypass else ds_node.output[1] - comp_branch_first = model.find_consumer(ds_comp_bout) - if comp_branch_first is None or comp_branch_last is None: - warnings.warn("Found unsupported DuplicateStreams, skipping") - return (model, run_again) - comp_branch_last = registry.getCustomOp(comp_branch_last) - comp_branch_first = registry.getCustomOp(comp_branch_first) - # for DuplicateStreams, use comp_branch_first's input characterization - # for AddStreams, use comp_branch_last's output characterization - period = comp_branch_first.get_nodeattr("io_chrc_period") - comp_branch_first_f = comp_branch_first.get_nodeattr("io_characteristic")[: 2 * period] - comp_branch_last_f = comp_branch_last.get_nodeattr("io_characteristic")[2 * period :] - ds_node_inst = registry.getCustomOp(ds_node) - addstrm_node_inst = registry.getCustomOp(addstrm_node) - ds_node_inst.set_nodeattr("io_chrc_period", period) - ds_node_inst.set_nodeattr("io_characteristic", comp_branch_first_f * 2) - addstrm_node_inst.set_nodeattr("io_chrc_period", period) - addstrm_node_inst.set_nodeattr("io_characteristic", comp_branch_last_f * 2) - warnings.warn(f"Set {ds_node.name} chrc. from {comp_branch_first.onnx_node.name}") - warnings.warn(f"Set {addstrm_node.name} chrc. from {comp_branch_last.onnx_node.name}") - return (model, run_again) + depth_added += assign_extra_fifo_volume(addstrm_node, model, self.period) + + return (model, False) -class DeriveFIFOSizes(NodeLocalTransformation): - """Prerequisite: DeriveCharacteristic already called on graph. +class ProducerDelayCharacteristicFunctions(NodeLocalTransformation): + """Prerequisite: DeriveTokenAccessVectors already called on graph. For each node in the graph, use the accumulated I/O characteristic function - to perform FIFO sizing, setting the in/outFIFODepths attributes of HLSCustomOp - nodes. + and delay it if there is a difference in periods between the producer and consumer. + This step adjusts for a delayed consumer and a fast producer so that additional + depth is not introduced by stretching the consumer too much in the next step + The consumer is 'faster' than what an immediate stretch might produce if + we dont adjust for the latency of the producer's output starting to arrive * num_workers (int or None) number of parallel workers, see documentation in NodeLocalTransformation for more details. + period (int or None) the period to stretch the individual node chr function dumps to. """ - def __init__(self, num_workers=None, io_fifo_depth=32): + def __init__(self, num_workers=None, period=None, nodes_to_ignore=[]): super().__init__(num_workers=num_workers) - self.io_fifo_depth = io_fifo_depth + self.period = period + self.nodes_to_ignore = set(nodes_to_ignore) def applyNodeLocal(self, node): op_type = node.op_type if is_hls_node(node) or is_rtl_node(node): + print(f"PRODUCER delaying {node.name}") try: # lookup op_type in registry of CustomOps prod = registry.getCustomOp(node) - assert not (op_type.startswith("StreamingFIFO")), "Found existing FIFOs" - period = prod.get_nodeattr("io_chrc_period") - prod_chrc = prod.get_nodeattr("io_chrc_out")[0] - assert len(prod_chrc) == 2 * period, "Found unexpected characterization attribute" - if any([x > 2 for x in prod.get_nodeattr("outFIFODepths")]): - # FIFO depth already set, can skip this node + + if node.op_type in [ + "DuplicateStreams_hls", + "StreamingFIFO_hls", + "StreamingFIFO_rtl", + ]: return (node, False) - # find consumers + if node.name in self.nodes_to_ignore: + return (node, False) + + prod_chrc_out = decompress_string_to_numpy(prod.get_nodeattr("io_chrc_out"))[0] + period = len(prod_chrc_out) // 2 + prod.set_nodeattr("io_chrc_period", period) + model = self.ref_input_model - out_fifo_depths = [] for output_name in node.output: - cons_node = model.find_consumer(output_name) - if cons_node is None: - # could be final node, will be overridden if so - # need an entry in the list anyway - out_fifo_depths.append(self.io_fifo_depth) + #cons = model.find_consumer(output_name) + cons = find_non_dwc_consumer(model, node) + if cons is None: + print("first node, skip") continue - cons = registry.getCustomOp(cons_node) - cons_chrc = cons.get_nodeattr("io_chrc_in")[0] - # find minimum phase shift satisfying the constraint - pshift_min = period - 1 - for pshift_cand in range(period): - prod_chrc_part = prod_chrc[pshift_cand:period] - cons_chrc_part = cons_chrc[: period - pshift_cand] - if (prod_chrc_part >= cons_chrc_part).all(): - pshift_min = pshift_cand - break - prod_chrc_part = prod_chrc[pshift_min : (pshift_min + period)] - cons_chrc_part = cons_chrc[:period] - fifo_depth = int((prod_chrc_part - cons_chrc_part).max()) - out_fifo_depths.append(fifo_depth) - # set output FIFO depth for this (producing) node - # InsertFIFO looks at the max of (outFIFODepths, inFIFODepths) - # for each tensor - prod.set_nodeattr("outFIFODepths", out_fifo_depths) - - # finally, check node inputs to ensure FIFOs are added to - # any top-level inputs (at least self.io_fifo_depth deep) - in_fifo_depths = prod.get_nodeattr("inFIFODepths") - for i, input_name in enumerate(node.input): - if input_name in [x.name for x in model.graph.input]: - in_fifo_depths[i] = max(self.io_fifo_depth, in_fifo_depths[i]) - prod.set_nodeattr("inFIFODepths", in_fifo_depths) + + cons = registry.getCustomOp(cons) + cons_chrc_in = decompress_string_to_numpy(cons.get_nodeattr("io_chrc_in"))[0] + + diff = len(cons_chrc_in) - len(prod_chrc_out) + + if diff > 0: + # stretching + prod_chrc_out_stretch = stretch(prod_chrc_out, len(cons_chrc_in)) + + # padding + # prod_chrc_out_stretch = np.concatenate( + # [prod_chrc_out, np.array([prod_chrc_out[-1]] * diff)] + # ) + + + prod.set_nodeattr( + "io_chrc_out_stretch", + compress_numpy_to_string(np.array([prod_chrc_out_stretch])), + ) except KeyError: # exception if op_type is not supported raise Exception("Custom op_type %s is currently not supported." % op_type) return (node, False) + + +class DelayCharacteristicFunctions(NodeLocalTransformation): + """Prerequisite: DeriveTokenAccessVectors already called on graph. + For each node in the graph, use the accumulated I/O characteristic function + and delay it if there is a difference in periods between the producer and consumer. + This step adjusts for a delayed consumer and a fast producer so that additional + depth is not introduced by stretching the consumer too much in the next step + The consumer is 'faster' than what an immediate stretch might produce if + we dont adjust for the latency of the producer's output starting to arrive + + * num_workers (int or None) number of parallel workers, see documentation in + NodeLocalTransformation for more details. + period (int or None) the period to stretch the individual node chr function dumps to. + """ + + def __init__(self, num_workers=None, period=None, nodes_to_ignore=[]): + super().__init__(num_workers=num_workers) + self.period = period + self.nodes_to_ignore = set(nodes_to_ignore) + + def applyNodeLocal(self, node): + op_type = node.op_type + if is_hls_node(node) or is_rtl_node(node): + print(f"delaying {node.name}'s consumer") + try: + # lookup op_type in registry of CustomOps + # prod = registry.getCustomOp(node) + + if node.op_type in [ + "DuplicateStreams_hls", + "StreamingFIFO_hls", + "StreamingFIFO_rtl", + ]: + return (node, False) + # assert not (op_type.startswith("StreamingFIFO")), "Found existing FIFOs" + # we allow a FIFO, it will get removed in the next transform and is used to + # fill in a bypass branch + if node.name in self.nodes_to_ignore: + print(f"ignoring delaying of node {node.name} consumers") + return (node, False) + + # perform stretching if necessary + # prod_period = prod.get_nodeattr("io_chrc_period") + + model = self.ref_input_model + for input_name in node.input: + #prod = model.find_producer(input_name) + prod = find_non_dwc_producer(model, node) + if prod is None: + print("last node, skip") + continue + + prod = registry.getCustomOp(prod) + + prod_chrc_out = decompress_string_to_numpy(prod.get_nodeattr("io_chrc_out"))[0] + # period = len(prod_chrc_out) // 2 + + cons = registry.getCustomOp(node) + cons_chrc_in = decompress_string_to_numpy(cons.get_nodeattr("io_chrc_in"))[0] + + cons_period = len(cons_chrc_in) // 2 + + cons.set_nodeattr("io_chrc_period", cons_period) + + import sys + + np.set_printoptions(threshold=sys.maxsize) + + diff = len(prod_chrc_out) - len(cons_chrc_in) + + if diff > 0: + print("padding cons input") + + # stretch + cons_chrc_in_stretch = stretch(cons_chrc_in, len(prod_chrc_out)) + + # padding + # cons_chrc_in_stretch = np.concatenate( + # [np.array([cons_chrc_in[-1]] * diff), cons_chrc_in] + # ) + # + cons.set_nodeattr( + "io_chrc_in_stretch", + compress_numpy_to_string(np.array([cons_chrc_in_stretch])), + ) + + compressed_cons_chrc_in = compress_numpy_to_string(np.array([cons_chrc_in])) + # compressed_cons_chrc_out = compress_numpy_to_string(np.array([cons_chrc_out])) + + # setting these parameters here will make final + # characterization func comparisons impossible! + cons.set_nodeattr("io_chrc_in", compressed_cons_chrc_in) + print(f"updated {cons.onnx_node.name} period to {len(cons_chrc_in)}") + + except KeyError: + # exception if op_type is not supported + raise Exception("Custom op_type %s is currently not supported." % op_type) + return (node, False) + + +def inter_token_gaps(tav): + if tav is None or tav.size == 0: + return np.array([1]), np.array([0]) # reasonable defaults + + # Find indices where tokens are added (nonzero diff indicates a new token) + token_times = np.flatnonzero(np.diff(tav) > 0) + 1 # +1 to align with time index + + if token_times.size < 2: + # Not enough token events to compute gaps + # Default gap of 1 between tokens (or 0 if no tokens) + return np.array([1]), token_times + + # Compute gaps between token emissions + # median = np.median + gaps = np.diff(token_times) + # median_gap = np.array([int(np.median(gaps))]) + return gaps, token_times # ,gaps_min + + +def remove_trailing_duplicates_keep_one(arr): + arr = np.asarray(arr) + if arr.size == 0: + return arr + + last_val = arr[-1] + # Find index where values stop being the same as the last value (from the end) + i = len(arr) - 1 + while i > 0 and arr[i - 1] == last_val: + i -= 1 + + # Keep everything before the trailing duplicates + one final instance + return np.concatenate((arr[:i], [last_val])) + + +def remove_leading_duplicates_keep_one(arr): + arr = np.asarray(arr) + if arr.size == 0: + return arr + + first_val = arr[0] + # Find index where values stop being the same as the first value (from the start) + i = 0 + while i < len(arr) - 1 and arr[i + 1] == first_val: + i += 1 + + # Keep one leading instance, then the rest + return np.concatenate(([first_val], arr[i + 1 :])) + +class DeriveFIFOSizes(Transformation): + """Prerequisite: DeriveTokenAccessVectors, ProducerDelayCharacteristic + # and DelayCharacteristic already called on graph. + For each node in the graph, use the accumulated Token Access Vectors + to perform FIFO sizing, setting the in/outFIFODepths attributes of HLSCustomOp + nodes. + """ + + def __init__( + self, + num_workers=None, + io_fifo_depth=5, + period=None, + nodes_to_ignore=[], + global_offset_correction=False, + tav_utilization_strategy="conservative_relaxation", + ): + super().__init__() + self.io_fifo_depth = io_fifo_depth + self.period = period + self.minimum_size = 2 + self.nodes_to_ignore = set(nodes_to_ignore) + self.global_budgets = [] + self.slowdown_so_far = [0, 0] + self.fifos_removed = 0 + self.max_delay_so_far = 0 + self.nodes_parsed = 0 + self.global_offset_correction = global_offset_correction + self.tav_utilization_strategy = tav_utilization_strategy + self.delta_total_fifo_size = 0 + self.delta_adjusted_fifo_size = 0 + self.hybrid_fifo_size_rate = 0 + self.data_rate_total_fifo_size = 0 + self.data_rate_adjusted_fifo_size = 0 + self.hybrid_fifo_size = 0 + + def apply(self, model): + nodes = [node for node in model.graph.node] + + for node in nodes: + op_type = node.op_type + if is_hls_node(node) or is_rtl_node(node): + try: + # lookup op_type in registry of CustomOps + self.nodes_parsed += 1 + + if node.name in self.nodes_to_ignore: + continue + + if "StreamingDataWidthConverter" in node.name: + continue + + assert not (op_type.startswith("StreamingFIFO")), "Found existing FIFOs" + + prod = registry.getCustomOp(node) + out_fifo_depths = [] + for indx, output_name in enumerate(node.output): + #cons_node = model.find_consumer(output_name) + cons_node = find_non_dwc_consumer(model,node) + if cons_node is None: + # could be final node, will be overridden if so + # need an entry in the list anyway + out_fifo_depths.append(self.io_fifo_depth) + continue + + cons = registry.getCustomOp(cons_node) + + if node.op_type != "AddStreams_hls": + # determine which of prod and cons TAVs to compare + # based on which one was stretched + chr_pairs = [] + + if prod.get_nodeattr("io_chrc_out_stretch") != "": + chr_pairs.append(["io_chrc_out_stretch", "io_chrc_in"]) + + if cons.get_nodeattr("io_chrc_in_stretch") != "": + chr_pairs.append(["io_chrc_out", "io_chrc_in_stretch"]) + + if len(chr_pairs) == 0: + chr_pairs = [["io_chrc_out", "io_chrc_in"]] + + + depth_attempts = [] + # currently only testing the first (main) pair + + if (prod.get_nodeattr(chr_pairs[0][0])) == "": + out_fifo_depths.append(2) + continue + + if (cons.get_nodeattr(chr_pairs[0][1])) == "": + out_fifo_depths.append(2) + continue + + for pair in chr_pairs[:1]: + if (prod.get_nodeattr(pair[0])) != "": + prod_chrc = decompress_string_to_numpy( + prod.get_nodeattr(pair[0]) + )[0] + else: + out_fifo_depths.append(2) + continue + + if (cons.get_nodeattr(pair[1])) != "": + cons_chrc = decompress_string_to_numpy( + cons.get_nodeattr(pair[1]) + )[0] + else: + out_fifo_depths.append(2) + continue + + if len(cons_chrc) != len(prod_chrc): + period_prod = max(len(prod_chrc) // 2, len(cons_chrc) // 2) + cons_chrc = stretch(cons_chrc, period_prod * 2) + prod_chrc = stretch(prod_chrc, period_prod * 2) + else: + period_prod = len(prod_chrc) // 2 + + global_period = self.period + + prod_original_chr = decompress_string_to_numpy( + prod.get_nodeattr("io_chrc_out") + )[0] + cons_original_chr = decompress_string_to_numpy( + cons.get_nodeattr("io_chrc_in") + )[0] + + prod_chr_original = decompress_string_to_numpy( + prod.get_nodeattr("io_chrc_out_original") + )[0] + cons_chr_original = decompress_string_to_numpy( + cons.get_nodeattr("io_chrc_in_original") + )[0] + + period_true = len(prod_original_chr) // 2 + + period_cons = len(cons_original_chr) // 2 + + # Step 1: Compute un-relaxed initial FIFO size guess - a conservative estimate to further + # decrease in size using relaxation strategies + + # find phase shift + pshift_min = 0 + + for pshift_cand in range(period_prod): + prod_chrc_part = prod_chrc[pshift_cand:period_prod] + cons_chrc_part = cons_chrc[: period_prod - pshift_cand] + if (prod_chrc_part >= cons_chrc_part).all(): + pshift_min = pshift_cand + break + + # shift TAVs by that amount + pshift_min = max(0, pshift_min - max(0, period_true - period_cons)) + prod_chrc_part = prod_chrc[pshift_min : (pshift_min + period_prod)] + cons_chrc_part = cons_chrc[:period_prod] + diff = prod_chrc_part - cons_chrc_part + + # find peak delta between the two TAVs and use as initial FIFO guess + max_pos = np.argmax(diff) + fifo_depth_maximum = max(0, int(diff[max_pos])) + + # Step 2: Compute relaxation factors to refine the fifo size computed in Step 1 + # using the original tav for determining data rates + + parent_period, producer_node = get_top_producer_period(node, model) + consumer_period, consumer_node = get_top_consumer_period( + node, model + ) + + gaps, token_times = inter_token_gaps(prod_chr_original) + gaps_cons, token_times_cons = inter_token_gaps(cons_chr_original) + + local_max_delay_prod_list = sorted(gaps, reverse=True) + local_max_delay_cons_list = sorted(gaps_cons, reverse=True) + + local_max_delay_prod = local_max_delay_prod_list[-1] + local_max_delay_cons = local_max_delay_cons_list[ + min(0, len(local_max_delay_cons_list) - 1) + ] + print("prod del: ",local_max_delay_prod_list) + print("cons:delay: ",local_max_delay_cons_list) + + min_gap = min( + len(local_max_delay_prod_list), len(local_max_delay_cons_list) + ) + + gap_ratios = np.array( + local_max_delay_cons_list[:min_gap] + ) / np.array(local_max_delay_prod_list[:min_gap]) + + self.max_delay_so_far = max( + self.max_delay_so_far, local_max_delay_prod + ) + + # Compute the slowdown numerator using the new logic + effective_depth = min(len(gap_ratios), fifo_depth_maximum) + remainder = fifo_depth_maximum - effective_depth + + if len(gap_ratios) > 0: + last_value = gap_ratios[-1] + else: + last_value = 0 + # or raise an error if gap_ratios is + # expected to have at least one element + + slowdown_numerator = ( + sum(gap_ratios[:effective_depth]) + remainder * last_value + ) + + fifo_slowdown = slowdown_numerator / period_true + fifo_slowdown = sum(gap_ratios) / period_true + + minimum_fifos_true = int( + (local_max_delay_prod + local_max_delay_cons) + / local_max_delay_prod + ) + minimum_fifos = minimum_fifos_true + + fifo_slowdown_rate = ( + minimum_fifos_true * local_max_delay_prod + ) / period_true + + cycle_loss_of_fifo = max( + 1, local_max_delay_cons - local_max_delay_prod + ) + parent_period = min(parent_period, global_period) + + # ======= TOLERABLE SLOWDOWN CALCULATION ========================= + tolerable_slowdown_parent = max( + 0, + 1 + - ( + parent_period / (global_period - self.slowdown_so_far[indx]) + ), + ) + tolerable_slowdown_prod = max( + 0, + 1 + - (period_prod / (global_period - self.slowdown_so_far[indx])), + ) + tolerable_slowdown = min( + [tolerable_slowdown_parent, tolerable_slowdown_prod] + ) + + prod_loss = (global_period - period_true) // cycle_loss_of_fifo + cons_loss = (global_period - period_cons) // cycle_loss_of_fifo + pred_loss = (global_period - parent_period) // cycle_loss_of_fifo + # print("node: ",node.name) + # print("pred, prod, cons periods and losses:") + # print(parent_period, period_true, period_cons) + # print(pred_loss, prod_loss, cons_loss) + #ignorable_fifos = int(max(0,min(prod_loss, cons_loss, pred_loss))) + ignorable_fifos = int(max(0,min([prod_loss]))) + + if producer_node is not None: + if producer_node.op_type.startswith("DuplicateStreams"): + ignorable_fifos = 0 + if consumer_node is not None: + if consumer_node.op_type.startswith("AddStreams"): + ignorable_fifos = 0 + + minimized_depth = max(2, fifo_depth_maximum - ignorable_fifos) + minimum_fifos = max(1, minimum_fifos - ignorable_fifos) + + if fifo_slowdown > tolerable_slowdown: + fifos_to_remove = int( + fifo_depth_maximum * tolerable_slowdown / fifo_slowdown + ) + else: + fifos_to_remove = fifo_depth_maximum + + if fifo_slowdown_rate > tolerable_slowdown: + fifos_to_remove_rate = int( + minimum_fifos_true * tolerable_slowdown / fifo_slowdown_rate + ) + else: + fifos_to_remove_rate = minimum_fifos_true + + + delta_fifo_size_post_adjustment = max( + 0, fifo_depth_maximum - max(fifos_to_remove, ignorable_fifos ) + ) + #print("fifos to remove: ", fifos_to_remove) + delta_fifo_size_post_adjustment_rate = max( + 0, minimum_fifos_true - fifos_to_remove_rate + ) + + hybrid_size = max(minimum_fifos, delta_fifo_size_post_adjustment) + hybrid_size_rate = max( + delta_fifo_size_post_adjustment, + delta_fifo_size_post_adjustment_rate, + ) + + self.delta_total_fifo_size += fifo_depth_maximum + self.delta_adjusted_fifo_size += delta_fifo_size_post_adjustment + + self.data_rate_total_fifo_size += minimum_fifos_true + self.data_rate_adjusted_fifo_size += minimum_fifos + self.hybrid_fifo_size += hybrid_size + self.hybrid_fifo_size_rate += hybrid_size_rate + + if self.tav_utilization_strategy == "conservative_relaxation": + # minimized TAV different + fifo_depth = minimized_depth + elif self.tav_utilization_strategy == "aggressive_relaxation": + # minimized delta based, uses slowdown tracking + fifo_depth = delta_fifo_size_post_adjustment + elif self.tav_utilization_strategy == "no_relaxation": + # maximum from TAV comparisons + fifo_depth = fifo_depth_maximum + + # print( + # f"initial size, new sizes: " + # f"{fifo_depth_maximum}, " + # f"{minimized_depth}, " + # f"{self.delta_adjusted_fifo_size}, " + # f"{self.hybrid_fifo_size}, " + # f"{self.hybrid_fifo_size_rate}, " + # f"{self.data_rate_adjusted_fifo_size}" + # ) + + + # override for testing: + #fifo_depth = delta_fifo_size_post_adjustment + + #print(f"sized {node.name} with {fifo_depth} ") + depth_attempts.append(fifo_depth) + fifo_depth = min(depth_attempts) + else: + fifo_depth = 0 + + if node.op_type == "DuplicateStreams_hls": + # propagate slowdown + if indx == 0: + self.slowdown_so_far[1] = self.slowdown_so_far[0] + + extra_volume = prod.get_nodeattr("extra_branch_fifos")[indx] + fifo_depth += extra_volume + else: + extra_volume = prod.get_nodeattr("extra_branch_fifos")[0] + fifo_depth += extra_volume + + out_fifo_depths.append(max(fifo_depth, self.minimum_size)) + + prod.set_nodeattr("outFIFODepths", out_fifo_depths) + + in_fifo_depths = prod.get_nodeattr("inFIFODepths") + for i, input_name in enumerate(node.input): + if input_name in [x.name for x in model.graph.input]: + in_fifo_depths[i] = max(self.io_fifo_depth, in_fifo_depths[i]) + prod.set_nodeattr("inFIFODepths", in_fifo_depths) + + if node.op_type == "AddStreams_hls": + self.slowdown_so_far[0] = max(self.slowdown_so_far) + + except KeyError: + raise Exception("Custom op_type %s is currently not supported." % op_type) + + #print("final sizes for each strategy: ",self.delta_total_fifo_size, self.delta_adjusted_fifo_size, self.data_rate_total_fifo_size,self.data_rate_adjusted_fifo_size,self.hybrid_fifo_size, self.hybrid_fifo_size_rate) + return (model, False) + + diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 164971f0f8..4010f3fc26 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -26,10 +26,15 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import base64 +import gzip +import json +import numpy as np import os import subprocess import sys import tempfile +from qonnx.custom_op.registry import getCustomOp from qonnx.util.basic import roundup_to_integer_multiple # test boards used for bnn pynq tests @@ -311,3 +316,122 @@ def get_dsp_block(fpgapart): return "DSP48E1" else: return "DSP48E2" + + +def stretch(a, new_length): + n = len(a) + x_old = np.arange(n) + x_new = np.linspace(0, n - 1, new_length) + stretched = np.interp(x_new, x_old, a).round().astype(a.dtype) + return stretched + + +class Characteristic_Node: + def __init__(self, name, sub_phases, leaf): + self.name = name + self.sub_phases = sub_phases + self.cycles_eval = None + self.cycles_inputs = None + self.cycles_outputs = None + self.leaf = leaf + self.debug = False + + def sum(self, op): + if self.leaf: + if op == 2: + return sum([x[0] for x in self.sub_phases]) + else: + return sum([x[0] * x[1][op] for x in self.sub_phases]) + else: + return sum([x[0] * x[1].sum(op) for x in self.sub_phases]) + + def traverse_phase_tree(self, op, counter, cycles, ch_fnc): + """ + The tree traversal function to get the token access vector. + We call it multiple times to get input, output and cycle count vectors. + + + op: 0 input, 1 output, 2 cycle count + counter: current count of op + cycles: current cycle count + ch_fnc: list of counter values at each cycle (the token access vector) + """ + + if ( + self.leaf + ): # immediate write out of the counter state to the array due to being a leaf node + for phase in self.sub_phases: + for _ in range(phase[0]): + if op == 2: + counter += 1 + else: + counter += phase[1][op] + cycles += 1 + ch_fnc.append(counter) + return counter, cycles, ch_fnc + else: # recursive call to the next sub-node + for phase in self.sub_phases: + for _ in range(phase[0]): + counter, cycles, ch_fnc = phase[1].traverse_phase_tree( + op, counter, cycles, ch_fnc + ) + return counter, cycles, ch_fnc + + def get_total_cycles(self, op): + """ + Returns the total length of a characterized node period with the final + timesample being either the final input our output transaction. + op ["in", "out"] + """ + counter = 0 + cycles = 0 + ch_fnc = [] + counter, cycles, ch_fnc = self.traverse_phase_tree(op, counter, cycles, ch_fnc) + last_update = 0 + last_val = ch_fnc[op] + for i in range(1, len(ch_fnc[1:]) + 1): + if ch_fnc[i] > last_val: + last_update = i + last_val = ch_fnc[i] + + return cycles, last_update, ch_fnc + + +def compress_numpy_to_string(arr): + metadata = { + "dtype": str(arr.dtype), # Store dtype as string + "shape": arr.shape, # Store shape as a tuple + } + metadata_str = json.dumps(metadata) # Convert metadata to JSON string + metadata_bytes = metadata_str.encode("utf-8") # Convert metadata to bytes + + compressed_data = gzip.compress(arr.tobytes()) # Compress array data + combined_data = ( + metadata_bytes + b"||" + compressed_data + ) # Concatenate metadata & compressed data + s = base64.b64encode(combined_data).decode("utf-8") + return s # Encode to string + + +def decompress_string_to_numpy(s): + combined_data = base64.b64decode(s.encode("utf-8")) # Decode from base64 + metadata_bytes, compressed_data = combined_data.split(b"||", 1) # Split metadata & data + + metadata = json.loads(metadata_bytes.decode("utf-8")) # Decode metadata + dtype = np.dtype(metadata["dtype"]) # Convert dtype back + shape = tuple(metadata["shape"]) # Convert shape back + + decompressed_data = gzip.decompress(compressed_data) # Decompress data + return np.frombuffer(decompressed_data, dtype=dtype).reshape(shape) # Reshape into array + + +def compute_total_model_fifo_size(model): + size = 0 + total_depth = 0 + for node in model.graph.node: + if node.op_type in ["StreamingFIFO", "StreamingFIFO_hls", "StreamingFIFO_rtl"]: + depth = getCustomOp(node).get_nodeattr("depth") + width = getCustomOp(node).get_instream_width() + size += width * depth + total_depth += depth + return size, total_depth diff --git a/src/finn/util/test.py b/src/finn/util/test.py index 2115e058a8..53869fbee8 100644 --- a/src/finn/util/test.py +++ b/src/finn/util/test.py @@ -28,22 +28,45 @@ import pytest +import copy import importlib_resources as importlib +import matplotlib.pyplot as plt import numpy as np import onnx import onnx.numpy_helper as nph import os +import qonnx.custom_op.registry as registry + +# import time import torchvision.transforms.functional as torchvision_util import warnings from brevitas_examples import bnn_pynq, imagenet_classification from pkgutil import get_data from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp +from qonnx.transformation.general import GiveUniqueNodeNames +from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance from finn.core.onnx_exec import execute_onnx +from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles +from finn.transformation.fpgadataflow.derive_characteristic import ( + DeriveTokenAccessVectors, +) from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild +from finn.transformation.fpgadataflow.prepare_ip import _codegen_single_node +from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( + ReplaceVerilogRelPaths, +) +from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers from finn.transformation.fpgadataflow.vitis_build import VitisBuild, VitisOptStrategy -from finn.util.basic import alveo_default_platform, alveo_part_map, pynq_part_map +from finn.util.basic import ( + alveo_default_platform, + alveo_part_map, + decompress_string_to_numpy, + make_build_dir, + pynq_part_map, +) +from finn.util.fpgadataflow import is_hls_node, is_rtl_node # map of (wbits,abits) -> model example_map = { @@ -184,3 +207,391 @@ def resize_smaller_side(target_pixels, img): def crop_center(size, img): """Crop central size*size window out of a PIL image.""" return torchvision_util.center_crop(img, size) + + +def compare_two_chr_funcs(a, b, max_allowed_volume_delta, max_allowed_length_delta): + # relaxation determines how much leeway we allow for the + # analytical implementation to be off from RTL ground truth + # this leeway may produce larger fifos. + # Output delays due to long pipelines generally do not effect + # fifo sizes and so large relaxation factors for them are expected. + + lower_len = min(len(a), len(b)) + if len(a) != len(b): + len_dif = abs(len(a) - len(b)) + print(f"TAV length delta: {len_dif}") + if len_dif > max_allowed_length_delta: + return False + + peak_volume_delta = np.max(np.abs(a[:lower_len] - b[:lower_len])) + print(f"TAV peak volume delta: {peak_volume_delta}") + if peak_volume_delta > max_allowed_volume_delta: + return False + return True + + +def get_characteristic_fnc(model, node0, part, target_clk_ns, strategy, caching=False): + """ + This helper performs FINN node characterization using either rtlsim + or characteristic functions. If chacteristic function strategy is + requested, but the node does not support it, a fallback to rtlsim + is performed. The primary purpose of this helper is for testing purposes + to evaluate characteristic function final dump equivalence between rtlsim + and characteristic functions. + The CACHING flag controls storing the .onnx model in the build dir to reuse, + which is useful for vastly speeding up debugging of characterization trees""" + + model_cache = None + if caching: + # search for prepared model + build_dir = os.environ["FINN_BUILD_DIR"] + for x in os.listdir(build_dir): + if x.startswith(str(node0)): + model_cache = f"{build_dir}/{x}/model_{strategy}.onnx" + if os.path.exists(model_cache): + model = ModelWrapper(model_cache) + else: + model_cache = None + + if model_cache is None: + model = model.transform(SpecializeLayers(part)) + model = model.transform(GiveUniqueNodeNames()) + + node = model.graph.node[0] + inst = registry.getCustomOp(node) + if (is_hls_node(node) or is_rtl_node(node)) and ( + inst.get_tree_model() is None or strategy == "rtlsim" + ): + _codegen_single_node(node, model, part, target_clk_ns) + + op_type = node.op_type + if is_hls_node(node): + try: + # lookup op_type in registry of CustomOps + + # ensure that code is generated + assert ( + inst.get_nodeattr("code_gen_dir_ipgen") != "" + ), """Node + attribute "code_gen_dir_ipgen" is empty. Please run + transformation PrepareIP first.""" + if not os.path.isdir(inst.get_nodeattr("ipgen_path")) or not inst.get_nodeattr( + "code_gen_dir_ipgen" + ) in inst.get_nodeattr("ipgen_path"): + # call the compilation function for this node + inst.ipgen_singlenode_code() + else: + warnings.warn("Using pre-existing IP for %s" % node.name) + # ensure that executable path is now set + assert ( + inst.get_nodeattr("ipgen_path") != "" + ), """Transformation + HLSSynthIP was not successful. Node attribute "ipgen_path" + is empty.""" + except KeyError: + # exception if op_type is not supported + raise Exception("Custom op_type %s is currently not supported." % op_type) + + model = model.transform(ReplaceVerilogRelPaths()) + + node = model.graph.node[0] + inst = registry.getCustomOp(node) + if (is_hls_node(node) or is_rtl_node(node)) and ( + inst.get_tree_model() is None or strategy == "rtlsim" + ): + try: + # lookup op_type in registry of CustomOps + # inst = registry.getCustomOp(node) + inst.prepare_rtlsim() + # ensure that executable path is now set + assert ( + inst.get_nodeattr("rtlsim_so") != "" + ), "Failed to prepare RTLSim, no rtlsim_so attribute found." + except KeyError: + # exception if op_type is not supported + raise Exception("Custom op_type %s is currently not supported." % op_type) + + model = model.transform(AnnotateCycles()) + + period = int(model.analysis(dataflow_performance)["max_cycles"] + 12) + + model = model.transform( + DeriveTokenAccessVectors( + model, + period, + strategy, + part, + target_clk_ns, + ) + ) + if caching: + tmp_caching_output_dir = make_build_dir(str(node0)) + model.save(tmp_caching_output_dir + f"/model_{strategy}.onnx") + + return getCustomOp(model.graph.node[0]) + + +def debug_chr_funcs(chr_in, chr_out, rtlsim_in, rtlsim_out, printout_limit=100): + """This helper prints out characteristic functions for a clean comparison + between the rtlsim-based and characteristic-function-based flows to find bugs + """ + + DEBUG_RAW_FUNCS = True + DEBUG_CONCAT_FUNCS = True + + if DEBUG_RAW_FUNCS or DEBUG_CONCAT_FUNCS: + + def concat_list(a): + b = [] + current = a[0] + b.append(1) + for i in a[1:]: + if i == current: + b[-1] += 1 + else: + b.append(1) + current = i + return b + + chr_in_concat = concat_list(chr_in[0]) + chr_out_concat = concat_list(chr_out[0]) + rtlsim_in_concat = concat_list(rtlsim_in[0]) + rtlsim_out_concat = concat_list(rtlsim_out[0]) + + np.set_printoptions(threshold=np.inf) + + # input port + if DEBUG_RAW_FUNCS: + print(f"\nchr IN: {chr_in[0][:printout_limit]}, {len(chr_in[0])}") + print(f"rtlsim IN: {rtlsim_in[0][:printout_limit]}, {len(rtlsim_in[0])}") + + if DEBUG_CONCAT_FUNCS: + print(f"chr IN CONCAT: {chr_in_concat[:printout_limit]}, {len(chr_in_concat)}") + print(f"rtlsim IN CONCAT: {rtlsim_in_concat[:printout_limit]}, {len(rtlsim_in_concat)}") + + # output port + if DEBUG_RAW_FUNCS: + print(f"\nchr OUT: {chr_out[0][:printout_limit]}, {len(chr_out[0])}") + print(f"rtlsim OUT: {rtlsim_out[0][:printout_limit]}, {len(rtlsim_out[0])}") + + if DEBUG_CONCAT_FUNCS: + print(f"chr OUT CONCAT: {chr_out_concat[:printout_limit]}, {len(chr_out_concat)}") + print( + f"rtlsim OUT CONCAT: {rtlsim_out_concat[:printout_limit]}, {len(rtlsim_out_concat)}" + ) + else: + return True + + +def tree_model_test( + model, + node_details, + part, + target_clk_ns, + max_allowed_volume_delta, + max_allowed_length_delta, + CACHING=False, + DEBUGGING=False, +): + # caching means to run RTLSIM only once and store the model + # so we can reuse the token access vector whenever we + # update the tree model and want to test correctness + # CACHING = True + + # should the token access vectors and + # concatenated token access vectors be printed out? + # useful for debugging + # DEBUGING = False + + # ground truth model to rtlsim + model_rtl = copy.deepcopy(model) + + # t0 = time.time() + node_analytical = get_characteristic_fnc( + model, + (*node_details, "tree_model"), + part, + target_clk_ns, + "tree_model", + False, + ) + + node_rtlsim = get_characteristic_fnc( + model_rtl, + (*node_details, "rtlsim"), + part, + target_clk_ns, + "rtlsim", + CACHING, + ) + + chr_in = decompress_string_to_numpy(node_analytical.get_nodeattr("io_chrc_in")) + chr_out = decompress_string_to_numpy(node_analytical.get_nodeattr("io_chrc_out")) + + rtlsim_in = decompress_string_to_numpy(node_rtlsim.get_nodeattr("io_chrc_in")) + rtlsim_out = decompress_string_to_numpy(node_rtlsim.get_nodeattr("io_chrc_out")) + + if DEBUGGING: + debug_chr_funcs(chr_in, chr_out, rtlsim_in, rtlsim_out) + res = compare_nodes( + node_details, + node_analytical, + node_rtlsim, + subsample=1, + start_cycle=0, + max_cycle=None, + compare_deltas_only=False, + ) + print(res) + # test input port + input_check = compare_two_chr_funcs( + chr_in[0], + rtlsim_in[0], + max_allowed_volume_delta, + max_allowed_length_delta, + ) + + # test output port + output_check = compare_two_chr_funcs( + chr_out[0], + rtlsim_out[0], + max_allowed_volume_delta, + max_allowed_length_delta, + ) + + return input_check and output_check + + +def node_id_finder(m_model, node_id_to_find): + i = 0 + found = False + final_id = 0 + for i in range(len(m_model.graph.node)): + if m_model.graph.node[i].name == node_id_to_find: + final_id = i + found = True + break + if found: + return final_id + else: + return -1 + + +def inter_token_gaps(tav): + if tav is None or tav.size == 0: + return np.array([1]), np.array([0]) # reasonable defaults + + # Find indices where tokens are added (nonzero diff indicates a new token) + token_times = np.flatnonzero(np.diff(tav) > 0) + 1 # +1 to align with time index + + if token_times.size < 2: + # Not enough token events to compute gaps + return np.array([1]), token_times # Default gap of 1 between tokens (or 0 if no tokens) + + # Compute gaps between token emissions + gaps = np.diff(token_times) + return gaps, token_times # ,gaps_min + + +def compare_nodes( + node_details, + model_node, + ref_node, + subsample=1, + start_cycle=0, + max_cycle=None, + compare_deltas_only=False, +): + # Extract and decompress the input/output trace arrays + tav_ref_in = decompress_string_to_numpy(ref_node.get_nodeattr("io_chrc_in"))[0] + tav_ref_out = decompress_string_to_numpy(ref_node.get_nodeattr("io_chrc_out"))[0] + tav_model_in = decompress_string_to_numpy(model_node.get_nodeattr("io_chrc_in"))[0] + tav_model_out = decompress_string_to_numpy(model_node.get_nodeattr("io_chrc_out"))[0] + + # gaps_prod, _ = inter_token_gaps(tav_model_out) + # gaps_cons, _ = inter_token_gaps(tav_model_in) + + # local_max_delay_cons_list = sorted(gaps_cons, reverse=True) + # local_max_delay_prod_list = sorted(gaps_prod, reverse=True) + + # print("top 10 consumption and production data rates of the node:") + # print("tree-model consumption: ", local_max_delay_cons_list[:10]) + # print("tree-model production: ", local_max_delay_prod_list[:10]) + + # gaps_prod, _ = inter_token_gaps(tav_ref_out) + # gaps_cons, _ = inter_token_gaps(tav_ref_in) + + # local_max_delay_prod_list = sorted(gaps_prod, reverse=True) + # local_max_delay_cons_list = sorted(gaps_cons, reverse=True) + + # print("reference consumption: ", local_max_delay_cons_list[:10]) + # print("reference production: ", local_max_delay_prod_list[:10]) + + # Determine max length for slicing + max_len = max(len(tav_ref_in), len(tav_model_in), len(tav_ref_out), len(tav_model_out)) + if max_cycle is None or max_cycle > max_len: + max_cycle = max_len + + # Slice without padding + y_ref_in = tav_ref_in[start_cycle:max_cycle] + y_model_in = tav_model_in[start_cycle:max_cycle] + y_ref_out = tav_ref_out[start_cycle:max_cycle] + y_model_out = tav_model_out[start_cycle:max_cycle] + + # Compute differences over common lengths only + def max_diff(a, b): + common_len = min(len(a), len(b)) + if common_len == 0: + return float("nan") + return np.max(np.abs(a[:common_len] - b[:common_len])) + + in_diff = max_diff(y_ref_in, y_model_in) + out_diff = max_diff(y_ref_out, y_model_out) + if compare_deltas_only: + return {"max_in_diff": in_diff, "max_out_diff": out_diff} + + # Plotting + plt.figure(figsize=(12, 6)) + + def plot_with_subsample(y, label, color, linestyle="-"): + y_slice = y[start_cycle:max_cycle] + y_sub = y_slice[::subsample] + x_sub = np.arange(start_cycle, start_cycle + len(y_sub) * subsample, subsample) + plt.plot(x_sub, y_sub, label=label, color=color, linestyle=linestyle) + if "ref" in label: + y_offset = int(y_sub[-1] * 0.1) + else: + y_offset = 0 + if len(x_sub) > 0: + plt.text( + x_sub[-1], + y_sub[-1] + y_offset, + f" {label} {y_sub[-1]:.2f}", + color=color, + va="center", + fontsize=9, + ) + + plot_with_subsample(tav_ref_in, "in: ref", "blue") + plot_with_subsample(tav_model_in, "in: tree model", "blue", linestyle="--") + plot_with_subsample(tav_ref_out, "out: ref", "red") + plot_with_subsample(tav_model_out, "out: tree model", "red", linestyle="--") + + metrics_ref = f"ref in: {tav_ref_in[-1]}, out: {tav_ref_out[-1]}" + metrics_model = f"model in: {tav_model_in[-1]}, out: {tav_model_out[-1]}" + + plt.legend() + plt.xlabel("Cycle") + plt.ylabel("Accumulated Tokens") + plt.title( + f"Node {node_details} \n max_in_diff:" + f"{in_diff} max_out_diff: {out_diff}\n (Cycles " + f"{start_cycle}:{max_cycle})\n{metrics_ref}\n{metrics_model}" + ) + plt.grid(True) + plt.tight_layout() + plt.show() + folder_path = "tree_modeling_plots" + if not os.path.exists(folder_path): + os.makedirs(folder_path) + plt.savefig(f"{folder_path}/{node_details}.png") diff --git a/tests/fpgadataflow/output.txt b/tests/fpgadataflow/output.txt new file mode 100644 index 0000000000..ab91cba5b8 --- /dev/null +++ b/tests/fpgadataflow/output.txt @@ -0,0 +1,9205 @@ +============================= test session starts ============================== +platform linux -- Python 3.10.12, pytest-6.2.5, py-1.11.0, pluggy-1.6.0 -- /usr/bin/python3 +cachedir: .pytest_cache +metadata: {'Python': '3.10.12', 'Platform': 'Linux-5.4.0-216-generic-x86_64-with-glibc2.35', 'Packages': {'pytest': '6.2.5', 'py': '1.11.0', 'pluggy': '1.6.0'}, 'Plugins': {'cov': '4.1.0', 'html': '3.0.0', 'metadata': '1.7.0', 'parallel': '0.1.1', 'xdist': '3.2.0', 'dependency': '0.5.1', 'anyio': '4.11.0', 'forked': '1.6.0'}} +rootdir: /home/lstasytis/finn_prs/finn, configfile: setup.cfg +plugins: cov-4.1.0, html-3.0.0, metadata-1.7.0, parallel-0.1.1, xdist-3.2.0, dependency-0.5.1, anyio-4.11.0, forked-1.6.0 +collecting ... collected 384 items + +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride0-2-ifm_dim0-k0-idt0] TAV length delta: 16 +TAV peak volume delta: 11 +TAV length delta: 16 +TAV peak volume delta: 16 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride0-2-ifm_dim0-k1-idt0] TAV length delta: 8 +TAV peak volume delta: 8 +TAV length delta: 8 +TAV peak volume delta: 8 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride0-2-ifm_dim0-k2-idt0] TAV peak volume delta: 11 +TAV peak volume delta: 1 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride0-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride0-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride0-2-ifm_dim1-k2-idt0] TAV length delta: 260 +TAV length delta: 260 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride0-4-ifm_dim0-k0-idt0] TAV length delta: 28 +TAV peak volume delta: 20 +TAV length delta: 28 +TAV peak volume delta: 28 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride0-4-ifm_dim0-k1-idt0] TAV length delta: 20 +TAV peak volume delta: 16 +TAV length delta: 20 +TAV peak volume delta: 20 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride0-4-ifm_dim0-k2-idt0] TAV length delta: 4 +TAV peak volume delta: 20 +TAV length delta: 4 +TAV peak volume delta: 4 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride0-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride0-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride0-4-ifm_dim1-k2-idt0] TAV length delta: 516 +TAV length delta: 516 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride1-2-ifm_dim0-k0-idt0] TAV length delta: 4 +TAV peak volume delta: 11 +TAV length delta: 4 +TAV peak volume delta: 4 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride1-2-ifm_dim0-k1-idt0] TAV length delta: 20 +TAV peak volume delta: 14 +TAV length delta: 20 +TAV peak volume delta: 20 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride1-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride1-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride1-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride1-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride1-4-ifm_dim0-k0-idt0] TAV length delta: 4 +TAV peak volume delta: 23 +TAV length delta: 4 +TAV peak volume delta: 7 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride1-4-ifm_dim0-k1-idt0] TAV length delta: 44 +TAV length delta: 44 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride1-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride1-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride1-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride1-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-2-ifm_dim0-k0-idt0] TAV length delta: 48 +TAV length delta: 48 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-2-ifm_dim0-k1-idt0] TAV length delta: 56 +TAV length delta: 56 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-4-ifm_dim0-k0-idt0] TAV length delta: 92 +TAV length delta: 92 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-4-ifm_dim0-k1-idt0] TAV length delta: 108 +TAV length delta: 108 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride1-2-ifm_dim0-k0-idt0] TAV peak volume delta: 0 +TAV peak volume delta: 10 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride1-2-ifm_dim0-k1-idt0] TAV length delta: 40 +TAV peak volume delta: 4 +TAV length delta: 40 +TAV peak volume delta: 26 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride1-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride1-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride1-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride1-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride1-4-ifm_dim0-k0-idt0] TAV peak volume delta: 0 +TAV peak volume delta: 19 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride1-4-ifm_dim0-k1-idt0] TAV length delta: 84 +TAV length delta: 84 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride1-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride1-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride1-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride1-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride0-2-ifm_dim0-k0-idt0] TAV length delta: 16 +TAV peak volume delta: 11 +TAV length delta: 16 +TAV peak volume delta: 16 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride0-2-ifm_dim0-k1-idt0] TAV length delta: 8 +TAV peak volume delta: 8 +TAV length delta: 8 +TAV peak volume delta: 8 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride0-2-ifm_dim0-k2-idt0] TAV peak volume delta: 11 +TAV peak volume delta: 1 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride0-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride0-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride0-2-ifm_dim1-k2-idt0] TAV length delta: 260 +TAV length delta: 260 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride0-4-ifm_dim0-k0-idt0] TAV length delta: 28 +TAV peak volume delta: 20 +TAV length delta: 28 +TAV peak volume delta: 28 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride0-4-ifm_dim0-k1-idt0] TAV length delta: 20 +TAV peak volume delta: 16 +TAV length delta: 20 +TAV peak volume delta: 20 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride0-4-ifm_dim0-k2-idt0] TAV length delta: 4 +TAV peak volume delta: 20 +TAV length delta: 4 +TAV peak volume delta: 4 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride0-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride0-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride0-4-ifm_dim1-k2-idt0] TAV length delta: 516 +TAV length delta: 516 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride1-2-ifm_dim0-k0-idt0] TAV length delta: 4 +TAV peak volume delta: 11 +TAV length delta: 4 +TAV peak volume delta: 4 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride1-2-ifm_dim0-k1-idt0] TAV length delta: 20 +TAV peak volume delta: 14 +TAV length delta: 20 +TAV peak volume delta: 20 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride1-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride1-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride1-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride1-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride1-4-ifm_dim0-k0-idt0] TAV length delta: 4 +TAV peak volume delta: 23 +TAV length delta: 4 +TAV peak volume delta: 7 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride1-4-ifm_dim0-k1-idt0] TAV length delta: 44 +TAV length delta: 44 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride1-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride1-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride1-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride1-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-2-ifm_dim0-k0-idt0] TAV length delta: 48 +TAV length delta: 48 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-2-ifm_dim0-k1-idt0] TAV length delta: 56 +TAV length delta: 56 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-4-ifm_dim0-k0-idt0] TAV length delta: 92 +TAV length delta: 92 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-4-ifm_dim0-k1-idt0] TAV length delta: 108 +TAV length delta: 108 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride1-2-ifm_dim0-k0-idt0] TAV peak volume delta: 0 +TAV peak volume delta: 10 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride1-2-ifm_dim0-k1-idt0] TAV length delta: 40 +TAV peak volume delta: 4 +TAV length delta: 40 +TAV peak volume delta: 26 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride1-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride1-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride1-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride1-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride1-4-ifm_dim0-k0-idt0] TAV peak volume delta: 0 +TAV peak volume delta: 19 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride1-4-ifm_dim0-k1-idt0] TAV length delta: 84 +TAV length delta: 84 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride1-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride1-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride1-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride1-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride0-2-ifm_dim0-k0-idt0] TAV length delta: 6 +TAV peak volume delta: 7 +TAV length delta: 6 +TAV peak volume delta: 6 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride0-2-ifm_dim0-k1-idt0] TAV length delta: 16 +TAV peak volume delta: 8 +TAV length delta: 16 +TAV peak volume delta: 16 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride0-2-ifm_dim0-k2-idt0] TAV length delta: 8 +TAV peak volume delta: 13 +TAV length delta: 8 +TAV peak volume delta: 8 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride0-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride0-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride0-2-ifm_dim1-k2-idt0] TAV length delta: 4 +TAV peak volume delta: 25 +TAV length delta: 4 +TAV peak volume delta: 4 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride0-4-ifm_dim0-k0-idt0] TAV length delta: 10 +TAV peak volume delta: 13 +TAV length delta: 10 +TAV peak volume delta: 10 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride0-4-ifm_dim0-k1-idt0] TAV length delta: 20 +TAV peak volume delta: 14 +TAV length delta: 20 +TAV peak volume delta: 20 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride0-4-ifm_dim0-k2-idt0] TAV length delta: 72 +TAV length delta: 72 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride0-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride0-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride0-4-ifm_dim1-k2-idt0] TAV length delta: 4 +TAV peak volume delta: 50 +TAV length delta: 4 +TAV peak volume delta: 5 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-2-ifm_dim0-k0-idt0] TAV length delta: 50 +TAV length delta: 50 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-2-ifm_dim0-k1-idt0] TAV length delta: 24 +TAV peak volume delta: 19 +TAV length delta: 24 +TAV peak volume delta: 17 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-4-ifm_dim0-k0-idt0] TAV length delta: 74 +TAV length delta: 74 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-4-ifm_dim0-k1-idt0] TAV length delta: 132 +TAV length delta: 132 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-2-ifm_dim0-k0-idt0] TAV length delta: 30 +TAV peak volume delta: 16 +TAV length delta: 30 +TAV peak volume delta: 30 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-2-ifm_dim0-k1-idt0] TAV length delta: 56 +TAV length delta: 56 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-4-ifm_dim0-k0-idt0] TAV length delta: 62 +TAV length delta: 62 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-4-ifm_dim0-k1-idt0] TAV length delta: 124 +TAV length delta: 124 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride1-2-ifm_dim0-k0-idt0] TAV peak volume delta: 0 +TAV peak volume delta: 10 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride1-2-ifm_dim0-k1-idt0] TAV length delta: 8 +TAV peak volume delta: 56 +TAV length delta: 8 +TAV peak volume delta: 15 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride1-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride1-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride1-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride1-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride1-4-ifm_dim0-k0-idt0] TAV length delta: 24 +TAV peak volume delta: 20 +TAV length delta: 24 +TAV peak volume delta: 28 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride1-4-ifm_dim0-k1-idt0] TAV length delta: 72 +TAV length delta: 72 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride1-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride1-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride1-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride1-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride0-2-ifm_dim0-k0-idt0] TAV length delta: 6 +TAV peak volume delta: 7 +TAV length delta: 6 +TAV peak volume delta: 6 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride0-2-ifm_dim0-k1-idt0] TAV length delta: 16 +TAV peak volume delta: 8 +TAV length delta: 16 +TAV peak volume delta: 16 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride0-2-ifm_dim0-k2-idt0] TAV length delta: 8 +TAV peak volume delta: 13 +TAV length delta: 8 +TAV peak volume delta: 8 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride0-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride0-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride0-2-ifm_dim1-k2-idt0] TAV length delta: 4 +TAV peak volume delta: 25 +TAV length delta: 4 +TAV peak volume delta: 4 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride0-4-ifm_dim0-k0-idt0] TAV length delta: 10 +TAV peak volume delta: 13 +TAV length delta: 10 +TAV peak volume delta: 10 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride0-4-ifm_dim0-k1-idt0] TAV length delta: 20 +TAV peak volume delta: 14 +TAV length delta: 20 +TAV peak volume delta: 20 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride0-4-ifm_dim0-k2-idt0] TAV length delta: 72 +TAV length delta: 72 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride0-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride0-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride0-4-ifm_dim1-k2-idt0] TAV length delta: 4 +TAV peak volume delta: 50 +TAV length delta: 4 +TAV peak volume delta: 5 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-2-ifm_dim0-k0-idt0] TAV length delta: 50 +TAV length delta: 50 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-2-ifm_dim0-k1-idt0] TAV length delta: 24 +TAV peak volume delta: 19 +TAV length delta: 24 +TAV peak volume delta: 17 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-4-ifm_dim0-k0-idt0] TAV length delta: 74 +TAV length delta: 74 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-4-ifm_dim0-k1-idt0] TAV length delta: 132 +TAV length delta: 132 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-2-ifm_dim0-k0-idt0] TAV length delta: 30 +TAV peak volume delta: 16 +TAV length delta: 30 +TAV peak volume delta: 30 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-2-ifm_dim0-k1-idt0] TAV length delta: 56 +TAV length delta: 56 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-4-ifm_dim0-k0-idt0] TAV length delta: 62 +TAV length delta: 62 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-4-ifm_dim0-k1-idt0] TAV length delta: 124 +TAV length delta: 124 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride1-2-ifm_dim0-k0-idt0] TAV peak volume delta: 0 +TAV peak volume delta: 10 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride1-2-ifm_dim0-k1-idt0] TAV length delta: 8 +TAV peak volume delta: 56 +TAV length delta: 8 +TAV peak volume delta: 15 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride1-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride1-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride1-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride1-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride1-4-ifm_dim0-k0-idt0] TAV length delta: 24 +TAV peak volume delta: 20 +TAV length delta: 24 +TAV peak volume delta: 28 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride1-4-ifm_dim0-k1-idt0] TAV length delta: 72 +TAV length delta: 72 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride1-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride1-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride1-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride1-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride0-2-ifm_dim0-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride0-2-ifm_dim0-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride0-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride0-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride0-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride0-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride0-4-ifm_dim0-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride0-4-ifm_dim0-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride0-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride0-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride0-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride0-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride1-2-ifm_dim0-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride1-2-ifm_dim0-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride1-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride1-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride1-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride1-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride1-4-ifm_dim0-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride1-4-ifm_dim0-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride1-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride1-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride1-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation0-stride1-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride0-2-ifm_dim0-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride0-2-ifm_dim0-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride0-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride0-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride0-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride0-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride0-4-ifm_dim0-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride0-4-ifm_dim0-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride0-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride0-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride0-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride0-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride1-2-ifm_dim0-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride1-2-ifm_dim0-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride1-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride1-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride1-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride1-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride1-4-ifm_dim0-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride1-4-ifm_dim0-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride1-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride1-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride1-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-1-dilation1-stride1-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride0-2-ifm_dim0-k0-idt0] TAV length delta: 16 +TAV peak volume delta: 11 +TAV length delta: 16 +TAV peak volume delta: 16 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride0-2-ifm_dim0-k1-idt0] TAV length delta: 8 +TAV peak volume delta: 8 +TAV length delta: 8 +TAV peak volume delta: 8 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride0-2-ifm_dim0-k2-idt0] TAV peak volume delta: 11 +TAV peak volume delta: 1 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride0-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride0-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride0-2-ifm_dim1-k2-idt0] TAV length delta: 260 +TAV length delta: 260 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride0-4-ifm_dim0-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride0-4-ifm_dim0-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride0-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride0-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride0-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride0-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride1-2-ifm_dim0-k0-idt0] TAV length delta: 4 +TAV peak volume delta: 11 +TAV length delta: 4 +TAV peak volume delta: 4 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride1-2-ifm_dim0-k1-idt0] TAV length delta: 20 +TAV peak volume delta: 14 +TAV length delta: 20 +TAV peak volume delta: 20 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride1-2-ifm_dim0-k2-idt0] FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride1-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride1-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride1-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride1-4-ifm_dim0-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride1-4-ifm_dim0-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride1-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride1-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride1-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride1-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride0-2-ifm_dim0-k0-idt0] TAV length delta: 48 +TAV length delta: 48 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride0-2-ifm_dim0-k1-idt0] TAV length delta: 56 +TAV length delta: 56 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride0-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride0-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride0-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride0-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride0-4-ifm_dim0-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride0-4-ifm_dim0-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride0-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride0-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride0-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride0-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride1-2-ifm_dim0-k0-idt0] TAV peak volume delta: 0 +TAV peak volume delta: 10 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride1-2-ifm_dim0-k1-idt0] TAV length delta: 40 +TAV peak volume delta: 4 +TAV length delta: 40 +TAV peak volume delta: 26 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride1-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride1-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride1-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride1-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride1-4-ifm_dim0-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride1-4-ifm_dim0-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride1-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride1-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride1-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride1-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride0-2-ifm_dim0-k0-idt0] TAV length delta: 6 +TAV peak volume delta: 7 +TAV length delta: 6 +TAV peak volume delta: 6 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride0-2-ifm_dim0-k1-idt0] TAV length delta: 16 +TAV peak volume delta: 8 +TAV length delta: 16 +TAV peak volume delta: 16 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride0-2-ifm_dim0-k2-idt0] TAV length delta: 8 +TAV peak volume delta: 13 +TAV length delta: 8 +TAV peak volume delta: 8 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride0-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride0-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride0-2-ifm_dim1-k2-idt0] TAV length delta: 4 +TAV peak volume delta: 25 +TAV length delta: 4 +TAV peak volume delta: 4 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride0-4-ifm_dim0-k0-idt0] TAV length delta: 10 +TAV peak volume delta: 13 +TAV length delta: 10 +TAV peak volume delta: 10 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride0-4-ifm_dim0-k1-idt0] TAV length delta: 20 +TAV peak volume delta: 14 +TAV length delta: 20 +TAV peak volume delta: 20 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride0-4-ifm_dim0-k2-idt0] TAV length delta: 72 +TAV length delta: 72 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride0-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride0-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride0-4-ifm_dim1-k2-idt0] TAV length delta: 4 +TAV peak volume delta: 50 +TAV length delta: 4 +TAV peak volume delta: 5 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-2-ifm_dim0-k0-idt0] TAV length delta: 50 +TAV length delta: 50 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-2-ifm_dim0-k1-idt0] TAV length delta: 24 +TAV peak volume delta: 19 +TAV length delta: 24 +TAV peak volume delta: 17 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-2-ifm_dim0-k2-idt0] FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-4-ifm_dim0-k0-idt0] TAV length delta: 74 +TAV length delta: 74 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-4-ifm_dim0-k1-idt0] TAV length delta: 132 +TAV length delta: 132 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-4-ifm_dim0-k2-idt0] FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-2-ifm_dim0-k0-idt0] TAV length delta: 30 +TAV peak volume delta: 16 +TAV length delta: 30 +TAV peak volume delta: 30 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-2-ifm_dim0-k1-idt0] TAV length delta: 56 +TAV length delta: 56 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-4-ifm_dim0-k0-idt0] TAV length delta: 62 +TAV length delta: 62 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-4-ifm_dim0-k1-idt0] TAV length delta: 124 +TAV length delta: 124 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride1-2-ifm_dim0-k0-idt0] TAV peak volume delta: 0 +TAV peak volume delta: 10 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride1-2-ifm_dim0-k1-idt0] TAV length delta: 8 +TAV peak volume delta: 56 +TAV length delta: 8 +TAV peak volume delta: 15 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride1-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride1-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride1-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride1-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride1-4-ifm_dim0-k0-idt0] TAV length delta: 24 +TAV peak volume delta: 20 +TAV length delta: 24 +TAV peak volume delta: 28 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride1-4-ifm_dim0-k1-idt0] TAV length delta: 72 +TAV length delta: 72 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride1-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride1-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride1-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride1-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride0-2-ifm_dim0-k0-idt0] TAV length delta: 6 +TAV peak volume delta: 7 +TAV length delta: 6 +TAV peak volume delta: 6 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride0-2-ifm_dim0-k1-idt0] TAV length delta: 16 +TAV peak volume delta: 8 +TAV length delta: 16 +TAV peak volume delta: 16 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride0-2-ifm_dim0-k2-idt0] TAV length delta: 8 +TAV peak volume delta: 13 +TAV length delta: 8 +TAV peak volume delta: 8 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride0-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride0-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride0-2-ifm_dim1-k2-idt0] TAV length delta: 4 +TAV peak volume delta: 25 +TAV length delta: 4 +TAV peak volume delta: 4 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride0-4-ifm_dim0-k0-idt0] TAV length delta: 10 +TAV peak volume delta: 13 +TAV length delta: 10 +TAV peak volume delta: 10 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride0-4-ifm_dim0-k1-idt0] TAV length delta: 20 +TAV peak volume delta: 14 +TAV length delta: 20 +TAV peak volume delta: 20 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride0-4-ifm_dim0-k2-idt0] TAV length delta: 72 +TAV length delta: 72 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride0-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride0-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride0-4-ifm_dim1-k2-idt0] TAV length delta: 4 +TAV peak volume delta: 50 +TAV length delta: 4 +TAV peak volume delta: 5 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-2-ifm_dim0-k0-idt0] TAV length delta: 50 +TAV length delta: 50 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-2-ifm_dim0-k1-idt0] TAV length delta: 24 +TAV peak volume delta: 19 +TAV length delta: 24 +TAV peak volume delta: 17 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-2-ifm_dim0-k2-idt0] FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-4-ifm_dim0-k0-idt0] TAV length delta: 74 +TAV length delta: 74 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-4-ifm_dim0-k1-idt0] TAV length delta: 132 +TAV length delta: 132 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-4-ifm_dim0-k2-idt0] FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-2-ifm_dim0-k0-idt0] TAV length delta: 30 +TAV peak volume delta: 16 +TAV length delta: 30 +TAV peak volume delta: 30 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-2-ifm_dim0-k1-idt0] TAV length delta: 56 +TAV length delta: 56 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-4-ifm_dim0-k0-idt0] TAV length delta: 62 +TAV length delta: 62 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-4-ifm_dim0-k1-idt0] TAV length delta: 124 +TAV length delta: 124 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-4-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride1-2-ifm_dim0-k0-idt0] TAV peak volume delta: 0 +TAV peak volume delta: 10 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride1-2-ifm_dim0-k1-idt0] TAV length delta: 8 +TAV peak volume delta: 56 +TAV length delta: 8 +TAV peak volume delta: 15 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride1-2-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride1-2-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride1-2-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride1-2-ifm_dim1-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride1-4-ifm_dim0-k0-idt0] TAV length delta: 24 +TAV peak volume delta: 20 +TAV length delta: 24 +TAV peak volume delta: 28 +PASSED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride1-4-ifm_dim0-k1-idt0] TAV length delta: 72 +TAV length delta: 72 +FAILED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride1-4-ifm_dim0-k2-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride1-4-ifm_dim1-k0-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride1-4-ifm_dim1-k1-idt0] SKIPPED +test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride1-4-ifm_dim1-k2-idt0] SKIPPED + +=================================== FAILURES =================================== +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride0-2-ifm_dim1-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [1, 21], ifm_ch = 2, stride = [1, 1] +dilation = [1, 1], simd = 1, dw = 0, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [1, 5], 2, [1, 21], [1, 17], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride0-4-ifm_dim1-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [1, 21], ifm_ch = 4, stride = [1, 1] +dilation = [1, 1], simd = 1, dw = 0, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [1, 5], 4, [1, 21], [1, 17], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride1-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [1, 1], simd = 1, dw = 0, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [3, 3], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-2-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 2, stride = [1, 1] +dilation = [2, 2], simd = 1, dw = 0, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 2, [8, 8], [6, 6], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-2-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 2, stride = [1, 1] +dilation = [2, 2], simd = 1, dw = 0, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 2, [8, 8], [4, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-4-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 4, stride = [1, 1] +dilation = [2, 2], simd = 1, dw = 0, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 4, [8, 8], [6, 6], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [1, 1] +dilation = [2, 2], simd = 1, dw = 0, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [4, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride1-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [2, 2], simd = 1, dw = 0, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [2, 2], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride0-2-ifm_dim1-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [1, 21], ifm_ch = 2, stride = [1, 1] +dilation = [1, 1], simd = 2, dw = 0, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [1, 5], 2, [1, 21], [1, 17], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride0-4-ifm_dim1-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [1, 21], ifm_ch = 4, stride = [1, 1] +dilation = [1, 1], simd = 2, dw = 0, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [1, 5], 4, [1, 21], [1, 17], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride1-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [1, 1], simd = 2, dw = 0, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [3, 3], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-2-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 2, stride = [1, 1] +dilation = [2, 2], simd = 2, dw = 0, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 2, [8, 8], [6, 6], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-2-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 2, stride = [1, 1] +dilation = [2, 2], simd = 2, dw = 0, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 2, [8, 8], [4, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-4-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 4, stride = [1, 1] +dilation = [2, 2], simd = 2, dw = 0, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 4, [8, 8], [6, 6], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [1, 1] +dilation = [2, 2], simd = 2, dw = 0, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [4, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride1-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [2, 2], simd = 2, dw = 0, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [2, 2], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride0-4-ifm_dim0-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [8, 8], ifm_ch = 4, stride = [1, 1] +dilation = [1, 1], simd = 1, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [1, 5], 4, [8, 8], [8, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride0-4-ifm_dim1-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [1, 21], ifm_ch = 4, stride = [1, 1] +dilation = [1, 1], simd = 1, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [1, 5], 4, [1, 21], [1, 17], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-2-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 2, stride = [2, 2] +dilation = [1, 1], simd = 1, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 2, [8, 8], [4, 4], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-4-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [1, 1], simd = 1, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 4, [8, 8], [4, 4], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [1, 1], simd = 1, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [3, 3], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-2-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 2, stride = [1, 1] +dilation = [2, 2], simd = 1, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 2, [8, 8], [4, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-4-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 4, stride = [1, 1] +dilation = [2, 2], simd = 1, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 4, [8, 8], [6, 6], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [1, 1] +dilation = [2, 2], simd = 1, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [4, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride1-2-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 2, stride = [2, 2] +dilation = [2, 2], simd = 1, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 2, [8, 8], [2, 2], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride1-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [2, 2], simd = 1, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [2, 2], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride0-4-ifm_dim0-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [8, 8], ifm_ch = 4, stride = [1, 1] +dilation = [1, 1], simd = 2, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [1, 5], 4, [8, 8], [8, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride0-4-ifm_dim1-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [1, 21], ifm_ch = 4, stride = [1, 1] +dilation = [1, 1], simd = 2, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [1, 5], 4, [1, 21], [1, 17], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-2-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 2, stride = [2, 2] +dilation = [1, 1], simd = 2, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 2, [8, 8], [4, 4], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-4-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [1, 1], simd = 2, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 4, [8, 8], [4, 4], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [1, 1], simd = 2, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [3, 3], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-2-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 2, stride = [1, 1] +dilation = [2, 2], simd = 2, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 2, [8, 8], [4, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-4-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 4, stride = [1, 1] +dilation = [2, 2], simd = 2, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 4, [8, 8], [6, 6], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [1, 1] +dilation = [2, 2], simd = 2, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [4, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride1-2-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 2, stride = [2, 2] +dilation = [2, 2], simd = 2, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 2, [8, 8], [2, 2], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride1-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [2, 2], simd = 2, dw = 1, parallel_window = 0, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [2, 2], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride0-2-ifm_dim1-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [1, 21], ifm_ch = 2, stride = [1, 1] +dilation = [1, 1], simd = 2, dw = 0, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [1, 5], 2, [1, 21], [1, 17], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride1-2-ifm_dim0-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [8, 8], ifm_ch = 2, stride = [2, 2] +dilation = [1, 1], simd = 2, dw = 0, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" + +test_fpgadataflow_convinputgenerator.py:343: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../src/finn/util/test.py:423: in tree_model_test + node_rtlsim = get_characteristic_fnc( +../../src/finn/util/test.py:264: in get_characteristic_fnc + _codegen_single_node(node, model, part, target_clk_ns) +../../src/finn/transformation/fpgadataflow/prepare_ip.py:54: in _codegen_single_node + inst.code_generation_ipgen(model, fpgapart, clk) +../../src/finn/custom_op/fpgadataflow/rtlbackend.py:88: in code_generation_ipgen + self.generate_hdl(model, fpgapart, clk) +../../src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py:798: in generate_hdl + template_path, code_gen_dict = self.prepare_codegen_default() +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = + + def prepare_codegen_default(self): + """Fills code generation dict for the default implementation style by computing + the incremental addressing scheme for the circular buffer.""" + if self.get_nodeattr("dynamic_mode"): + template_select = "/finn-rtllib/swg/swg_template_default_dynamic.sv" + else: + template_select = "/finn-rtllib/swg/swg_template_default.sv" + template_path = os.environ["FINN_ROOT"] + template_select + code_gen_dict = {} + + ifm_ch = self.get_nodeattr("IFMChannels") + k = self.get_nodeattr("ConvKernelDim") + ifm_dim = self.get_nodeattr("IFMDim") + stride = self.get_nodeattr("Stride") + dilation = self.get_nodeattr("Dilation") + depthwise = self.get_nodeattr("depthwise") + simd = self.get_nodeattr("SIMD") + + k_h, k_w = k + h, w = ifm_dim + pad = [0, 0, 0, 0] # padding happens in separate padding node for now + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + pad_h = pad[0] + pad[2] + pad_w = pad[1] + pad[3] + out_dim_h = im2col.compute_conv_output_dim(h, k_h, stride_h, pad_h, dilation_h) + out_dim_w = im2col.compute_conv_output_dim(w, k_w, stride_w, pad_w, dilation_w) + mmv_in = 1 + mmv_out = 1 + channel_factor = int(ifm_ch / simd) + + # compute minimal buffer length (assuming it holds 1 complete window) + buffer_min_size = ((k_h - 1) * dilation_h * w + (k_w - 1) * dilation_w + 1) * channel_factor + + buffer_actual_size = self.get_buffer_depth() + code_gen_dict["$BUF_ELEM_TOTAL$"] = [str(buffer_actual_size)] + + # compute some intermediate values, e.g., kernel "width" = k_w incl. dilation + # or cols/rows that are skipped due to imperfect stride<->dim combination + kernel_width = (k_w - 1) * dilation_w + 1 + kernel_height = (k_h - 1) * dilation_h + 1 + skip_columns = w % (kernel_width + (out_dim_w - 1) * stride_w) + skip_rows = h % (kernel_height + (out_dim_h - 1) * stride_h) + + # compute address increment values for 5-loop nest + addr_incr_end_simd = 1 + addr_incr_end_window_elem = (dilation_w - 1) * channel_factor + 1 + addr_incr_end_window_row = ( + ((w - kernel_width) * channel_factor) # remaining line + + ((dilation_h - 1) * w * channel_factor) # skip lines + + 1 # wrap-around of minimally sized buffer + ) + addr_incr_end_window = -buffer_min_size + stride_w * channel_factor + 1 + addr_incr_end_row = ( + -buffer_min_size + + ((skip_columns + kernel_width) * channel_factor) # remaining line + + ((stride_h - 1) * w * channel_factor) # skip lines + + 1 + ) + + # re-use same controller structure -> re-assign address increments + if depthwise: + addr_incr_end_window_elem = dilation_w * channel_factor + addr_incr_end_window_row = ( + channel_factor + + (w - kernel_width) * channel_factor + + (dilation_h - 1) * w * channel_factor + ) + addr_incr_end_simd = -buffer_min_size + (channel_factor + 1) + + # sanity check for wrap logic + assert not ( + abs(addr_incr_end_window) > buffer_actual_size + ), "ERROR: W increment > buffer size, try setting parallel_window=1" + assert not ( +> abs(addr_incr_end_row) > buffer_actual_size + ), "ERROR: H increment > buffer size, try setting parallel_window=1" +E AssertionError: ERROR: H increment > buffer size, try setting parallel_window=1 + +../../src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py:378: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride0-2-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 2, stride = [1, 1] +dilation = [2, 2], simd = 2, dw = 0, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 2, [8, 8], [6, 6], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride0-2-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 2, stride = [1, 1] +dilation = [2, 2], simd = 2, dw = 0, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 2, [8, 8], [4, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride0-4-ifm_dim0-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [8, 8], ifm_ch = 4, stride = [1, 1] +dilation = [1, 1], simd = 1, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [1, 5], 4, [8, 8], [8, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride0-4-ifm_dim1-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [1, 21], ifm_ch = 4, stride = [1, 1] +dilation = [1, 1], simd = 1, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [1, 5], 4, [1, 21], [1, 17], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-2-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 2, stride = [2, 2] +dilation = [1, 1], simd = 1, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 2, [8, 8], [4, 4], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-2-ifm_dim0-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [8, 8], ifm_ch = 2, stride = [2, 2] +dilation = [1, 1], simd = 1, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" + +test_fpgadataflow_convinputgenerator.py:343: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../src/finn/util/test.py:423: in tree_model_test + node_rtlsim = get_characteristic_fnc( +../../src/finn/util/test.py:264: in get_characteristic_fnc + _codegen_single_node(node, model, part, target_clk_ns) +../../src/finn/transformation/fpgadataflow/prepare_ip.py:54: in _codegen_single_node + inst.code_generation_ipgen(model, fpgapart, clk) +../../src/finn/custom_op/fpgadataflow/rtlbackend.py:88: in code_generation_ipgen + self.generate_hdl(model, fpgapart, clk) +../../src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py:798: in generate_hdl + template_path, code_gen_dict = self.prepare_codegen_default() +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = + + def prepare_codegen_default(self): + """Fills code generation dict for the default implementation style by computing + the incremental addressing scheme for the circular buffer.""" + if self.get_nodeattr("dynamic_mode"): + template_select = "/finn-rtllib/swg/swg_template_default_dynamic.sv" + else: + template_select = "/finn-rtllib/swg/swg_template_default.sv" + template_path = os.environ["FINN_ROOT"] + template_select + code_gen_dict = {} + + ifm_ch = self.get_nodeattr("IFMChannels") + k = self.get_nodeattr("ConvKernelDim") + ifm_dim = self.get_nodeattr("IFMDim") + stride = self.get_nodeattr("Stride") + dilation = self.get_nodeattr("Dilation") + depthwise = self.get_nodeattr("depthwise") + simd = self.get_nodeattr("SIMD") + + k_h, k_w = k + h, w = ifm_dim + pad = [0, 0, 0, 0] # padding happens in separate padding node for now + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + pad_h = pad[0] + pad[2] + pad_w = pad[1] + pad[3] + out_dim_h = im2col.compute_conv_output_dim(h, k_h, stride_h, pad_h, dilation_h) + out_dim_w = im2col.compute_conv_output_dim(w, k_w, stride_w, pad_w, dilation_w) + mmv_in = 1 + mmv_out = 1 + channel_factor = int(ifm_ch / simd) + + # compute minimal buffer length (assuming it holds 1 complete window) + buffer_min_size = ((k_h - 1) * dilation_h * w + (k_w - 1) * dilation_w + 1) * channel_factor + + buffer_actual_size = self.get_buffer_depth() + code_gen_dict["$BUF_ELEM_TOTAL$"] = [str(buffer_actual_size)] + + # compute some intermediate values, e.g., kernel "width" = k_w incl. dilation + # or cols/rows that are skipped due to imperfect stride<->dim combination + kernel_width = (k_w - 1) * dilation_w + 1 + kernel_height = (k_h - 1) * dilation_h + 1 + skip_columns = w % (kernel_width + (out_dim_w - 1) * stride_w) + skip_rows = h % (kernel_height + (out_dim_h - 1) * stride_h) + + # compute address increment values for 5-loop nest + addr_incr_end_simd = 1 + addr_incr_end_window_elem = (dilation_w - 1) * channel_factor + 1 + addr_incr_end_window_row = ( + ((w - kernel_width) * channel_factor) # remaining line + + ((dilation_h - 1) * w * channel_factor) # skip lines + + 1 # wrap-around of minimally sized buffer + ) + addr_incr_end_window = -buffer_min_size + stride_w * channel_factor + 1 + addr_incr_end_row = ( + -buffer_min_size + + ((skip_columns + kernel_width) * channel_factor) # remaining line + + ((stride_h - 1) * w * channel_factor) # skip lines + + 1 + ) + + # re-use same controller structure -> re-assign address increments + if depthwise: + addr_incr_end_window_elem = dilation_w * channel_factor + addr_incr_end_window_row = ( + channel_factor + + (w - kernel_width) * channel_factor + + (dilation_h - 1) * w * channel_factor + ) + addr_incr_end_simd = -buffer_min_size + (channel_factor + 1) + + # sanity check for wrap logic + assert not ( + abs(addr_incr_end_window) > buffer_actual_size + ), "ERROR: W increment > buffer size, try setting parallel_window=1" + assert not ( +> abs(addr_incr_end_row) > buffer_actual_size + ), "ERROR: H increment > buffer size, try setting parallel_window=1" +E AssertionError: ERROR: H increment > buffer size, try setting parallel_window=1 + +../../src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py:378: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-4-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [1, 1], simd = 1, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 4, [8, 8], [4, 4], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [1, 1], simd = 1, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [3, 3], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-4-ifm_dim0-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [1, 1], simd = 1, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" + +test_fpgadataflow_convinputgenerator.py:343: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../src/finn/util/test.py:423: in tree_model_test + node_rtlsim = get_characteristic_fnc( +../../src/finn/util/test.py:264: in get_characteristic_fnc + _codegen_single_node(node, model, part, target_clk_ns) +../../src/finn/transformation/fpgadataflow/prepare_ip.py:54: in _codegen_single_node + inst.code_generation_ipgen(model, fpgapart, clk) +../../src/finn/custom_op/fpgadataflow/rtlbackend.py:88: in code_generation_ipgen + self.generate_hdl(model, fpgapart, clk) +../../src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py:798: in generate_hdl + template_path, code_gen_dict = self.prepare_codegen_default() +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = + + def prepare_codegen_default(self): + """Fills code generation dict for the default implementation style by computing + the incremental addressing scheme for the circular buffer.""" + if self.get_nodeattr("dynamic_mode"): + template_select = "/finn-rtllib/swg/swg_template_default_dynamic.sv" + else: + template_select = "/finn-rtllib/swg/swg_template_default.sv" + template_path = os.environ["FINN_ROOT"] + template_select + code_gen_dict = {} + + ifm_ch = self.get_nodeattr("IFMChannels") + k = self.get_nodeattr("ConvKernelDim") + ifm_dim = self.get_nodeattr("IFMDim") + stride = self.get_nodeattr("Stride") + dilation = self.get_nodeattr("Dilation") + depthwise = self.get_nodeattr("depthwise") + simd = self.get_nodeattr("SIMD") + + k_h, k_w = k + h, w = ifm_dim + pad = [0, 0, 0, 0] # padding happens in separate padding node for now + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + pad_h = pad[0] + pad[2] + pad_w = pad[1] + pad[3] + out_dim_h = im2col.compute_conv_output_dim(h, k_h, stride_h, pad_h, dilation_h) + out_dim_w = im2col.compute_conv_output_dim(w, k_w, stride_w, pad_w, dilation_w) + mmv_in = 1 + mmv_out = 1 + channel_factor = int(ifm_ch / simd) + + # compute minimal buffer length (assuming it holds 1 complete window) + buffer_min_size = ((k_h - 1) * dilation_h * w + (k_w - 1) * dilation_w + 1) * channel_factor + + buffer_actual_size = self.get_buffer_depth() + code_gen_dict["$BUF_ELEM_TOTAL$"] = [str(buffer_actual_size)] + + # compute some intermediate values, e.g., kernel "width" = k_w incl. dilation + # or cols/rows that are skipped due to imperfect stride<->dim combination + kernel_width = (k_w - 1) * dilation_w + 1 + kernel_height = (k_h - 1) * dilation_h + 1 + skip_columns = w % (kernel_width + (out_dim_w - 1) * stride_w) + skip_rows = h % (kernel_height + (out_dim_h - 1) * stride_h) + + # compute address increment values for 5-loop nest + addr_incr_end_simd = 1 + addr_incr_end_window_elem = (dilation_w - 1) * channel_factor + 1 + addr_incr_end_window_row = ( + ((w - kernel_width) * channel_factor) # remaining line + + ((dilation_h - 1) * w * channel_factor) # skip lines + + 1 # wrap-around of minimally sized buffer + ) + addr_incr_end_window = -buffer_min_size + stride_w * channel_factor + 1 + addr_incr_end_row = ( + -buffer_min_size + + ((skip_columns + kernel_width) * channel_factor) # remaining line + + ((stride_h - 1) * w * channel_factor) # skip lines + + 1 + ) + + # re-use same controller structure -> re-assign address increments + if depthwise: + addr_incr_end_window_elem = dilation_w * channel_factor + addr_incr_end_window_row = ( + channel_factor + + (w - kernel_width) * channel_factor + + (dilation_h - 1) * w * channel_factor + ) + addr_incr_end_simd = -buffer_min_size + (channel_factor + 1) + + # sanity check for wrap logic + assert not ( + abs(addr_incr_end_window) > buffer_actual_size + ), "ERROR: W increment > buffer size, try setting parallel_window=1" + assert not ( +> abs(addr_incr_end_row) > buffer_actual_size + ), "ERROR: H increment > buffer size, try setting parallel_window=1" +E AssertionError: ERROR: H increment > buffer size, try setting parallel_window=1 + +../../src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py:378: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-2-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 2, stride = [1, 1] +dilation = [2, 2], simd = 1, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 2, [8, 8], [4, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-4-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 4, stride = [1, 1] +dilation = [2, 2], simd = 1, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 4, [8, 8], [6, 6], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [1, 1] +dilation = [2, 2], simd = 1, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [4, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride1-2-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 2, stride = [2, 2] +dilation = [2, 2], simd = 1, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 2, [8, 8], [2, 2], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride1-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [2, 2], simd = 1, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [2, 2], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride0-4-ifm_dim0-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [8, 8], ifm_ch = 4, stride = [1, 1] +dilation = [1, 1], simd = 2, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [1, 5], 4, [8, 8], [8, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride0-4-ifm_dim1-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [1, 21], ifm_ch = 4, stride = [1, 1] +dilation = [1, 1], simd = 2, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [1, 5], 4, [1, 21], [1, 17], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-2-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 2, stride = [2, 2] +dilation = [1, 1], simd = 2, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 2, [8, 8], [4, 4], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-2-ifm_dim0-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [8, 8], ifm_ch = 2, stride = [2, 2] +dilation = [1, 1], simd = 2, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" + +test_fpgadataflow_convinputgenerator.py:343: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../src/finn/util/test.py:423: in tree_model_test + node_rtlsim = get_characteristic_fnc( +../../src/finn/util/test.py:264: in get_characteristic_fnc + _codegen_single_node(node, model, part, target_clk_ns) +../../src/finn/transformation/fpgadataflow/prepare_ip.py:54: in _codegen_single_node + inst.code_generation_ipgen(model, fpgapart, clk) +../../src/finn/custom_op/fpgadataflow/rtlbackend.py:88: in code_generation_ipgen + self.generate_hdl(model, fpgapart, clk) +../../src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py:798: in generate_hdl + template_path, code_gen_dict = self.prepare_codegen_default() +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = + + def prepare_codegen_default(self): + """Fills code generation dict for the default implementation style by computing + the incremental addressing scheme for the circular buffer.""" + if self.get_nodeattr("dynamic_mode"): + template_select = "/finn-rtllib/swg/swg_template_default_dynamic.sv" + else: + template_select = "/finn-rtllib/swg/swg_template_default.sv" + template_path = os.environ["FINN_ROOT"] + template_select + code_gen_dict = {} + + ifm_ch = self.get_nodeattr("IFMChannels") + k = self.get_nodeattr("ConvKernelDim") + ifm_dim = self.get_nodeattr("IFMDim") + stride = self.get_nodeattr("Stride") + dilation = self.get_nodeattr("Dilation") + depthwise = self.get_nodeattr("depthwise") + simd = self.get_nodeattr("SIMD") + + k_h, k_w = k + h, w = ifm_dim + pad = [0, 0, 0, 0] # padding happens in separate padding node for now + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + pad_h = pad[0] + pad[2] + pad_w = pad[1] + pad[3] + out_dim_h = im2col.compute_conv_output_dim(h, k_h, stride_h, pad_h, dilation_h) + out_dim_w = im2col.compute_conv_output_dim(w, k_w, stride_w, pad_w, dilation_w) + mmv_in = 1 + mmv_out = 1 + channel_factor = int(ifm_ch / simd) + + # compute minimal buffer length (assuming it holds 1 complete window) + buffer_min_size = ((k_h - 1) * dilation_h * w + (k_w - 1) * dilation_w + 1) * channel_factor + + buffer_actual_size = self.get_buffer_depth() + code_gen_dict["$BUF_ELEM_TOTAL$"] = [str(buffer_actual_size)] + + # compute some intermediate values, e.g., kernel "width" = k_w incl. dilation + # or cols/rows that are skipped due to imperfect stride<->dim combination + kernel_width = (k_w - 1) * dilation_w + 1 + kernel_height = (k_h - 1) * dilation_h + 1 + skip_columns = w % (kernel_width + (out_dim_w - 1) * stride_w) + skip_rows = h % (kernel_height + (out_dim_h - 1) * stride_h) + + # compute address increment values for 5-loop nest + addr_incr_end_simd = 1 + addr_incr_end_window_elem = (dilation_w - 1) * channel_factor + 1 + addr_incr_end_window_row = ( + ((w - kernel_width) * channel_factor) # remaining line + + ((dilation_h - 1) * w * channel_factor) # skip lines + + 1 # wrap-around of minimally sized buffer + ) + addr_incr_end_window = -buffer_min_size + stride_w * channel_factor + 1 + addr_incr_end_row = ( + -buffer_min_size + + ((skip_columns + kernel_width) * channel_factor) # remaining line + + ((stride_h - 1) * w * channel_factor) # skip lines + + 1 + ) + + # re-use same controller structure -> re-assign address increments + if depthwise: + addr_incr_end_window_elem = dilation_w * channel_factor + addr_incr_end_window_row = ( + channel_factor + + (w - kernel_width) * channel_factor + + (dilation_h - 1) * w * channel_factor + ) + addr_incr_end_simd = -buffer_min_size + (channel_factor + 1) + + # sanity check for wrap logic + assert not ( + abs(addr_incr_end_window) > buffer_actual_size + ), "ERROR: W increment > buffer size, try setting parallel_window=1" + assert not ( +> abs(addr_incr_end_row) > buffer_actual_size + ), "ERROR: H increment > buffer size, try setting parallel_window=1" +E AssertionError: ERROR: H increment > buffer size, try setting parallel_window=1 + +../../src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py:378: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-4-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [1, 1], simd = 2, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 4, [8, 8], [4, 4], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [1, 1], simd = 2, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [3, 3], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-4-ifm_dim0-k2-idt0] _ + +idt = INT2, k = [1, 5], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [1, 1], simd = 2, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" + +test_fpgadataflow_convinputgenerator.py:343: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +../../src/finn/util/test.py:423: in tree_model_test + node_rtlsim = get_characteristic_fnc( +../../src/finn/util/test.py:264: in get_characteristic_fnc + _codegen_single_node(node, model, part, target_clk_ns) +../../src/finn/transformation/fpgadataflow/prepare_ip.py:54: in _codegen_single_node + inst.code_generation_ipgen(model, fpgapart, clk) +../../src/finn/custom_op/fpgadataflow/rtlbackend.py:88: in code_generation_ipgen + self.generate_hdl(model, fpgapart, clk) +../../src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py:798: in generate_hdl + template_path, code_gen_dict = self.prepare_codegen_default() +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = + + def prepare_codegen_default(self): + """Fills code generation dict for the default implementation style by computing + the incremental addressing scheme for the circular buffer.""" + if self.get_nodeattr("dynamic_mode"): + template_select = "/finn-rtllib/swg/swg_template_default_dynamic.sv" + else: + template_select = "/finn-rtllib/swg/swg_template_default.sv" + template_path = os.environ["FINN_ROOT"] + template_select + code_gen_dict = {} + + ifm_ch = self.get_nodeattr("IFMChannels") + k = self.get_nodeattr("ConvKernelDim") + ifm_dim = self.get_nodeattr("IFMDim") + stride = self.get_nodeattr("Stride") + dilation = self.get_nodeattr("Dilation") + depthwise = self.get_nodeattr("depthwise") + simd = self.get_nodeattr("SIMD") + + k_h, k_w = k + h, w = ifm_dim + pad = [0, 0, 0, 0] # padding happens in separate padding node for now + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + pad_h = pad[0] + pad[2] + pad_w = pad[1] + pad[3] + out_dim_h = im2col.compute_conv_output_dim(h, k_h, stride_h, pad_h, dilation_h) + out_dim_w = im2col.compute_conv_output_dim(w, k_w, stride_w, pad_w, dilation_w) + mmv_in = 1 + mmv_out = 1 + channel_factor = int(ifm_ch / simd) + + # compute minimal buffer length (assuming it holds 1 complete window) + buffer_min_size = ((k_h - 1) * dilation_h * w + (k_w - 1) * dilation_w + 1) * channel_factor + + buffer_actual_size = self.get_buffer_depth() + code_gen_dict["$BUF_ELEM_TOTAL$"] = [str(buffer_actual_size)] + + # compute some intermediate values, e.g., kernel "width" = k_w incl. dilation + # or cols/rows that are skipped due to imperfect stride<->dim combination + kernel_width = (k_w - 1) * dilation_w + 1 + kernel_height = (k_h - 1) * dilation_h + 1 + skip_columns = w % (kernel_width + (out_dim_w - 1) * stride_w) + skip_rows = h % (kernel_height + (out_dim_h - 1) * stride_h) + + # compute address increment values for 5-loop nest + addr_incr_end_simd = 1 + addr_incr_end_window_elem = (dilation_w - 1) * channel_factor + 1 + addr_incr_end_window_row = ( + ((w - kernel_width) * channel_factor) # remaining line + + ((dilation_h - 1) * w * channel_factor) # skip lines + + 1 # wrap-around of minimally sized buffer + ) + addr_incr_end_window = -buffer_min_size + stride_w * channel_factor + 1 + addr_incr_end_row = ( + -buffer_min_size + + ((skip_columns + kernel_width) * channel_factor) # remaining line + + ((stride_h - 1) * w * channel_factor) # skip lines + + 1 + ) + + # re-use same controller structure -> re-assign address increments + if depthwise: + addr_incr_end_window_elem = dilation_w * channel_factor + addr_incr_end_window_row = ( + channel_factor + + (w - kernel_width) * channel_factor + + (dilation_h - 1) * w * channel_factor + ) + addr_incr_end_simd = -buffer_min_size + (channel_factor + 1) + + # sanity check for wrap logic + assert not ( + abs(addr_incr_end_window) > buffer_actual_size + ), "ERROR: W increment > buffer size, try setting parallel_window=1" + assert not ( +> abs(addr_incr_end_row) > buffer_actual_size + ), "ERROR: H increment > buffer size, try setting parallel_window=1" +E AssertionError: ERROR: H increment > buffer size, try setting parallel_window=1 + +../../src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py:378: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-2-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 2, stride = [1, 1] +dilation = [2, 2], simd = 2, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 2, [8, 8], [4, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-4-ifm_dim0-k0-idt0] _ + +idt = INT2, k = [2, 2], ifm_dim = [8, 8], ifm_ch = 4, stride = [1, 1] +dilation = [2, 2], simd = 2, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [2, 2], 4, [8, 8], [6, 6], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [1, 1] +dilation = [2, 2], simd = 2, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [4, 4], [1, 1], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride1-2-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 2, stride = [2, 2] +dilation = [2, 2], simd = 2, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 2, [8, 8], [2, 2], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +_ test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride1-4-ifm_dim0-k1-idt0] _ + +idt = INT2, k = [3, 3], ifm_dim = [8, 8], ifm_ch = 4, stride = [2, 2] +dilation = [2, 2], simd = 2, dw = 1, parallel_window = 1, m = 1, flip = False + + @pytest.mark.parametrize("idt", [DataType["INT2"]]) + # kernel size + @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) + # input dimension + @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) + # input channels + @pytest.mark.parametrize("ifm_ch", [2, 4]) + # Stride + @pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) + # Dilation + @pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) + # input channel parallelism ("SIMD") + @pytest.mark.parametrize("simd", [1, 2]) + # depthwise + @pytest.mark.parametrize("dw", [0, 1]) + # parallel_window enable (MMV_out = M*K) + @pytest.mark.parametrize("parallel_window", [0, 1]) + # in/out MMV ("M") + @pytest.mark.parametrize("m", [1]) + # Flip dimensions + @pytest.mark.parametrize("flip", [False]) + @pytest.mark.fpgadataflow + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.node_tree_modeling + def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + ): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + + model = model.transform(to_hw.InferConvInpGen()) + + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 40 + +> assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta,0,True + ), "characterized TAV does not match RTLsim'd one!" +E AssertionError: characterized TAV does not match RTLsim'd one! +E assert False +E + where False = tree_model_test(, ('ConvolutionInputGenerator', [3, 3], 4, [8, 8], [2, 2], [2, 2], ...), 'xc7z020clg400-1', 4, 40, 0, True) + +test_fpgadataflow_convinputgenerator.py:343: AssertionError +=============================== warnings summary =============================== +test_fpgadataflow_convinputgenerator.py:257 + /home/lstasytis/finn_prs/finn/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py:257: PytestUnknownMarkWarning: Unknown pytest.mark.node_tree_modeling - is this a typo? You can register custom marks to avoid this warning - for details, see https://docs.pytest.org/en/stable/mark.html + @pytest.mark.node_tree_modeling + +tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py: 135 warnings + /home/lstasytis/finn_prs/finn/deps/qonnx/src/qonnx/core/modelwrapper.py:98: UserWarning: Some old-style domain attributes were automatically converted to new-style, + i.e. domain=finn to domain=qonnx.custom_op. + warnings.warn( + +tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride1-2-ifm_dim0-k2-idt0] +tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-2-ifm_dim0-k2-idt0] +tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-4-ifm_dim0-k2-idt0] +tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-2-ifm_dim0-k2-idt0] +tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-4-ifm_dim0-k2-idt0] + /home/lstasytis/finn_prs/finn/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py:302: DeprecationWarning: In future, it will be an error for 'np.bool_' scalars to be interpreted as an index + adjustments = sorted( + +-- Docs: https://docs.pytest.org/en/stable/warnings.html +=========================== short test summary info ============================ +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride0-2-ifm_dim1-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride0-4-ifm_dim1-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation0-stride1-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-2-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-2-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-4-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride0-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-1-dilation1-stride1-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride0-2-ifm_dim1-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride0-4-ifm_dim1-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation0-stride1-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-2-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-2-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-4-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride0-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-0-2-dilation1-stride1-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride0-4-ifm_dim0-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride0-4-ifm_dim1-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-2-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-4-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation0-stride1-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-2-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-4-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride0-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride1-2-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-1-dilation1-stride1-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride0-4-ifm_dim0-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride0-4-ifm_dim1-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-2-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-4-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation0-stride1-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-2-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-4-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride0-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride1-2-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-0-1-2-dilation1-stride1-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride0-2-ifm_dim1-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation0-stride1-2-ifm_dim0-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride0-2-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-0-2-dilation1-stride0-2-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride0-4-ifm_dim0-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride0-4-ifm_dim1-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-2-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-2-ifm_dim0-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-4-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation0-stride1-4-ifm_dim0-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-2-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-4-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride0-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride1-2-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-1-dilation1-stride1-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride0-4-ifm_dim0-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride0-4-ifm_dim1-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-2-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-2-ifm_dim0-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-4-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation0-stride1-4-ifm_dim0-k2-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-2-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-4-ifm_dim0-k0-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride0-4-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride1-2-ifm_dim0-k1-idt0] +FAILED test_fpgadataflow_convinputgenerator.py::test_fpgadataflow_analytical_characterization_slidingwindow[False-1-1-1-2-dilation1-stride1-4-ifm_dim0-k1-idt0] +=========== 64 failed, 71 passed, 249 skipped, 141 warnings in 7.15s =========== diff --git a/tests/fpgadataflow/test_convert_to_hw_pool_batch.py b/tests/fpgadataflow/test_convert_to_hw_pool_batch.py index e155053b8b..4e174fb941 100644 --- a/tests/fpgadataflow/test_convert_to_hw_pool_batch.py +++ b/tests/fpgadataflow/test_convert_to_hw_pool_batch.py @@ -47,6 +47,7 @@ from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.util.test import tree_model_test def make_single_maxpool_modelwrapper(k, stride, pad, ifm_ch, ifm_dim, ofm_dim, idt, use_1d=False): @@ -242,3 +243,96 @@ def test_convert_to_hw_pool(idt, odt, pool_config, ifm_ch, pe, op_type, exec_mod exp_cycles_dict = new_model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) + + +# input datatype +@pytest.mark.parametrize("idt", [DataType["UINT4"]]) +# output datatype +@pytest.mark.parametrize("odt", [DataType["UINT4"]]) +# pool configuration: ( k,stride, pad, ifm_dim ) +# @pytest.mark.parametrize("pool_config", [(7, 7, 0, 7), (3, 2, 1, 5)]) +# @pytest.mark.parametrize("pool_config", [(7, 7, 0, 128), (3, 2, 1, 5)]) +@pytest.mark.parametrize("pool_config", [(2, 1, 0, 512)]) +# input channels +@pytest.mark.parametrize("ifm_ch", [32]) +# number of out channel computed in parallel +@pytest.mark.parametrize("pe", [32]) +# pool type +# @pytest.mark.parametrize("op_type", ["QuantAvgPool2d", "MaxPool", "MaxPool1D"]) +@pytest.mark.parametrize("op_type", ["MaxPool1D"]) +@pytest.mark.fpgadataflow +@pytest.mark.slow +@pytest.mark.vivado +@pytest.mark.node_tree_modeling +def test_analytical_characterization_pool(idt, odt, pool_config, ifm_ch, pe, op_type): + k, stride, pad, ifm_dim = pool_config + + if ifm_ch % pe != 0: + pytest.skip("ifm_ch%pe != 0. Skipping") + + if pad != 0 and idt.signed(): + pytest.skip("No support for pal_val != 0. Skipping") + + np.random.seed(0) + + part = "xc7z020clg400-1" + + ofm_dim = int(((ifm_dim + 2 * pad - k) / stride) + 1) + + ishape = (1, ifm_ch, ifm_dim, ifm_dim) + use_1d = False + if op_type == "MaxPool1D": + use_1d = True + ishape = (1, ifm_ch, 1, ifm_dim) + op_type = "MaxPool" + + if op_type == "MaxPool": + if idt != odt: + pytest.skip("Skipping Maxpool with idt != odt") + + model = make_single_maxpool_modelwrapper( + k, stride, pad, ifm_ch, ifm_dim, ofm_dim, idt, use_1d + ) + elif op_type == "QuantAvgPool2d": + if pad != 0: + pytest.skip("No padding support for QuantAvgPool2d. Skipping") + + if idt.signed() != odt.signed(): + pytest.skip("Skipping QuantAvgPool2d with idt.signed() != odt.signed()") + model = make_single_quantavpool_modelwrapper(k, stride, ifm_ch, ifm_dim, ofm_dim, idt, odt) + else: + assert False, "{} is not a supported op_type".format(op_type) + + model = model.transform(to_hw.InferPool()) + + # Folding + for n in model.graph.node: + if n.op_type.startswith("Pool"): + inst = getCustomOp(n) + + ishape = inst.get_folded_input_shape() + oshape = inst.get_folded_output_shape() + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) + + graph = helper.make_graph(nodes=[n], name="mp_graph", inputs=[inp], outputs=[outp]) + model = qonnx_make_model(graph, producer_name="mp-model") + model = ModelWrapper(model) + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", odt) + model = model.transform(InferShapes()) + + inst.set_nodeattr("PE", pe) + model = model.transform(SpecializeLayers(part)) + + node_details = ("Pool", op_type, k, ifm_ch, ifm_dim, ofm_dim, pe, idt) + + target_clk_ns = 4 + + max_allowed_volume_delta = 5000 + max_allowed_length_delta = 5000 + + assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta, max_allowed_length_delta + ), "characterized TAV does not match RTLsim'd one!" diff --git a/tests/fpgadataflow/test_fifosizing.py b/tests/fpgadataflow/test_fifosizing.py index 9b36e1c6f7..dd723972cb 100644 --- a/tests/fpgadataflow/test_fifosizing.py +++ b/tests/fpgadataflow/test_fifosizing.py @@ -61,14 +61,38 @@ def fetch_test_model(topology, wbits=2, abits=2): @pytest.mark.slow @pytest.mark.vivado @pytest.mark.fpgadataflow -@pytest.mark.parametrize("method", ["largefifo_rtlsim", "characterize"]) -@pytest.mark.parametrize("topology", ["tfc", "cnv"]) +@pytest.mark.parametrize( + "method", + [ + "analytic_model_based", + "analytic_rtlsim", + "largefifo_rtlsim", + ], +) +@pytest.mark.parametrize( + "topology", + [ + "tfc", + "cnv", + ], +) def test_fifosizing_linear(method, topology): tmp_output_dir = fetch_test_model(topology) + if method == "analytic_model_based": + auto_fifo_strategy = "analytical" + tav_generation_strategy_key = "tree_model" + elif method == "analytic_rtlsim": + auto_fifo_strategy = "analytical" + tav_generation_strategy_key = "rtlsim" + else: + auto_fifo_strategy = "largefifo_rtlsim" + tav_generation_strategy_key = "rtlsim" + cfg = build_cfg.DataflowBuildConfig( output_dir=tmp_output_dir, auto_fifo_depths=True, - auto_fifo_strategy=method, + auto_fifo_strategy=auto_fifo_strategy, + tav_generation_strategy=tav_generation_strategy_key, target_fps=10000 if topology == "tfc" else 1000, synth_clk_period_ns=10.0, board="Pynq-Z1", @@ -100,7 +124,107 @@ def test_fifosizing_linear(method, topology): model0 = ModelWrapper(tmp_output_dir + "/intermediate_models/step_create_stitched_ip.onnx") model1 = ModelWrapper(tmp_output_dir_cmp + "/intermediate_models/step_create_stitched_ip.onnx") + assert len(model0.graph.node) == len(model1.graph.node) + for i in range(len(model0.graph.node)): + node0 = model0.graph.node[i] + node1 = model1.graph.node[i] + assert node0.op_type == node1.op_type + if node0.op_type == "StreamingFIFO": + node0_inst = getCustomOp(node0) + node1_inst = getCustomOp(node1) + assert node0_inst.get_nodeattr("depth") == node1_inst.get_nodeattr("depth") + + shutil.rmtree(tmp_output_dir) + shutil.rmtree(tmp_output_dir_cmp) + + +@pytest.mark.slow +@pytest.mark.vivado +@pytest.mark.fpgadataflow +@pytest.mark.parametrize( + "method", + [ + "analytic_model_based", + "analytic_rtlsim", + # "largefifo_rtlsim_python", + # "largefifo_rtlsim_cpp", + ], +) +@pytest.mark.parametrize( + "topology", + [ + "tfc", + # "cnv" + ], +) +def test_fifosizing_fast(method, topology): + force_python_rtlsim = "python" in method + + tmp_output_dir = fetch_test_model(topology) + if method == "analytic_model_based": + auto_fifo_strategy = "analytical" + tav_generation_strategy_key = "tree_model" + elif method == "characterize_rtlsim": + auto_fifo_strategy = "analytical" + tav_generation_strategy_key = "rtlsim" + else: + auto_fifo_strategy = "largefifo_rtlsim" + tav_generation_strategy_key = "rtlsim" + + cfg = build_cfg.DataflowBuildConfig( + output_dir=tmp_output_dir, + auto_fifo_depths=True, + auto_fifo_strategy=auto_fifo_strategy, + tav_generation_strategy=tav_generation_strategy_key, + target_fps=10000 if topology == "tfc" else 1000, + force_python_rtlsim=force_python_rtlsim, + synth_clk_period_ns=10.0, + steps=[ + "step_qonnx_to_finn", + "step_tidy_up", + "step_streamline", + "step_convert_to_hw", + "step_create_dataflow_partition", + "step_specialize_layers", + "step_target_fps_parallelization", + "step_apply_folding_config", + "step_minimize_bit_width", + "step_generate_estimate_reports", + "step_set_fifo_depths", + ], + board="Pynq-Z1", + rtlsim_batch_size=100 if topology == "tfc" else 2, + shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ, + generate_outputs=[ + build_cfg.DataflowOutputType.ESTIMATE_REPORTS, + ], + ) + build.build_dataflow_cfg(tmp_output_dir + "/model.onnx", cfg) + + # now run the same build using the generated folding and FIFO config + tmp_output_dir_cmp = fetch_test_model(topology) + cfg_cmp = cfg + cfg_cmp.output_dir = tmp_output_dir_cmp + cfg_cmp.auto_fifo_depths = False + cfg_cmp.target_fps = None + cfg_cmp.steps = [ + "step_qonnx_to_finn", + "step_tidy_up", + "step_streamline", + "step_convert_to_hw", + "step_create_dataflow_partition", + "step_specialize_layers", + "step_target_fps_parallelization", + "step_apply_folding_config", + "step_minimize_bit_width", + "step_generate_estimate_reports", + "step_set_fifo_depths", + ] + cfg_cmp.folding_config_file = tmp_output_dir + "/final_hw_config.json" + build.build_dataflow_cfg(tmp_output_dir_cmp + "/model.onnx", cfg_cmp) + model0 = ModelWrapper(tmp_output_dir + "/intermediate_models/step_set_fifo_depths.onnx") + model1 = ModelWrapper(tmp_output_dir_cmp + "/intermediate_models/step_set_fifo_depths.onnx") assert len(model0.graph.node) == len(model1.graph.node) for i in range(len(model0.graph.node)): node0 = model0.graph.node[i] diff --git a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py index 2ad49ae58b..e0662c0b72 100644 --- a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py +++ b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py @@ -47,6 +47,7 @@ from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.util.test import tree_model_test def make_modelwrapper(C, pe, idt, odt, pdt, func, vecs): @@ -172,3 +173,48 @@ def test_fpgadataflow_channelwise_ops(idt, act, pdt, nf, ich, func, vecs, exec_m exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0 + + +# activation: None or DataType +@pytest.mark.parametrize("act", [DataType["INT8"]]) +# input datatype +@pytest.mark.parametrize("idt", [DataType["INT4"]]) +# param datatype +@pytest.mark.parametrize("pdt", [DataType["INT4"]]) +# folding, -1 is maximum possible +@pytest.mark.parametrize("nf", [-1, 2]) +# number of input features +@pytest.mark.parametrize("ich", [16]) +# vecs +@pytest.mark.parametrize("vecs", [[1], [1, 7, 7]]) +# function +@pytest.mark.parametrize("func", ["add"]) +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow +@pytest.mark.node_tree_modeling +def test_fpgadataflow_analytical_characterization_channelwise_ops( + idt, act, pdt, nf, ich, func, vecs +): + if nf == -1: + nf = ich + pe = ich // nf + assert ich % pe == 0 + + # generate param data + C = gen_finn_dt_tensor(pdt, (ich)) + + odt = act + + # create model + model = make_modelwrapper(C, pe, idt, odt, pdt, func, vecs) + node_details = ("ChannelWiseOp", C, pe, idt, odt, pdt, func, "hls") + part = "xc7z020clg400-1" + target_clk_ns = 4 + + max_allowed_volume_delta = 14 + max_allowed_length_delta = 14 + + assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta, max_allowed_length_delta + ), "characterized TAV does not match RTLsim'd one!" diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py index 93860b87ed..dc9b6331ee 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py @@ -48,6 +48,7 @@ from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.util.test import tree_model_test def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw): @@ -225,3 +226,243 @@ def test_fpgadataflow_slidingwindow( exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10, rtol=1.1) assert exp_cycles != 0 + + +# input datatype +@pytest.mark.parametrize("idt", [DataType["INT2"]]) +# kernel size +# @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) +@pytest.mark.parametrize("k", [[1, 1], [2, 2]]) +# input dimension +# @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) +@pytest.mark.parametrize("ifm_dim", [[10, 6]]) +# input channels +# @pytest.mark.parametrize("ifm_ch", [2, 4]) +@pytest.mark.parametrize("ifm_ch", [1, 10]) +# Stride +# @pytest.mark.parametrize("stride", [[1, 1]]) +@pytest.mark.parametrize("stride", [[1, 1], [2, 2]]) +# Dilation +# @pytest.mark.parametrize("dilation", [[1, 1]]) +@pytest.mark.parametrize("dilation", [[1, 1], [2, 2]]) +# input channel parallelism ("SIMD") +@pytest.mark.parametrize("simd", [1, 10]) +# depthwise +@pytest.mark.parametrize("dw", [0, 1]) +# parallel_window enable (MMV_out = M*K) +@pytest.mark.parametrize("parallel_window", [0, 1]) +# in/out MMV ("M") +@pytest.mark.parametrize("m", [1]) +# Flip dimensions +@pytest.mark.parametrize("flip", [False]) +@pytest.mark.fpgadataflow +@pytest.mark.slow +@pytest.mark.vivado +@pytest.mark.node_tree_modeling +def test_fpgadataflow_analytical_characterization_slidingwindow( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, +): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + model = model.transform(to_hw.InferConvInpGen()) + model = model.transform(SpecializeLayers("xc7z020clg400-1")) + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + + node_details = ( + "ConvolutionInputGenerator", + ifm_dim, + k, + stride, + dilation, + ifm_ch, + simd, + dw, + parallel_window, + idt, + ofm_dim, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 5000 + max_allowed_length_delta = 5000 + + assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta, max_allowed_length_delta + ), "characterized TAV does not match RTLsim'd one!" + + +# input datatype +@pytest.mark.parametrize("idt", [DataType["INT2"]]) +# kernel size +# @pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) +@pytest.mark.parametrize("k", [[7, 7]]) +# input dimension +# @pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) +@pytest.mark.parametrize("ifm_dim", [[7, 7]]) +# input channels +# @pytest.mark.parametrize("ifm_ch", [2, 4]) +@pytest.mark.parametrize("ifm_ch", [1024]) +# Stride +# @pytest.mark.parametrize("stride", [[1, 1]]) +@pytest.mark.parametrize("stride", [[1, 1]]) +# Dilation +# @pytest.mark.parametrize("dilation", [[1, 1]]) +@pytest.mark.parametrize("dilation", [[1, 1]]) +# input channel parallelism ("SIMD") +@pytest.mark.parametrize("simd", [1]) +# depthwise +@pytest.mark.parametrize("dw", [1]) +# parallel_window enable (MMV_out = M*K) +@pytest.mark.parametrize("parallel_window", [0]) +# in/out MMV ("M") +@pytest.mark.parametrize("m", [1]) +# Flip dimensions +@pytest.mark.parametrize("flip", [False]) +@pytest.mark.fpgadataflow +@pytest.mark.slow +@pytest.mark.vivado +@pytest.mark.node_tree_modeling +def test_fpgadataflow_analytical_characterization_slidingwindow_mobilenet( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, +): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + model = model.transform(to_hw.InferConvInpGen()) + model = model.transform(SpecializeLayers("xc7z020clg400-1")) + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + + node_details = ( + "ConvolutionInputGenerator", + ifm_dim, + k, + stride, + dilation, + ifm_ch, + simd, + dw, + parallel_window, + idt, + ofm_dim, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 2140 # should change to 20% of peak volume + max_allowed_length_delta = 2140 # should change to 20% of peak volume + + assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta, max_allowed_length_delta + ), "characterized TAV does not match RTLsim'd one!" diff --git a/tests/fpgadataflow/test_fpgadataflow_downsampler.py b/tests/fpgadataflow/test_fpgadataflow_downsampler.py index ce04af74ed..858271e189 100644 --- a/tests/fpgadataflow/test_fpgadataflow_downsampler.py +++ b/tests/fpgadataflow/test_fpgadataflow_downsampler.py @@ -30,6 +30,7 @@ import numpy as np import onnx.parser as oprs +from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.im2col import compute_conv_output_dim @@ -37,7 +38,7 @@ from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.transformation.infer_shapes import InferShapes from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer @@ -49,6 +50,7 @@ from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.util.test import tree_model_test def build_model(is_1d, in_dim, k, stride, dt_in, dt_w, pad_half=0, flip_1d=False): @@ -160,3 +162,53 @@ def test_fpgadataflow_downsampler(is_1d, flip_1d, exec_mode): exp_cycles = exp_cycles - in_dim assert np.isclose(exp_cycles, cycles_rtlsim, atol=10, rtol=1.1) assert exp_cycles != 0 + + +@pytest.mark.parametrize("is_1d", [True, False]) +@pytest.mark.parametrize("flip_1d", [True, False]) +@pytest.mark.slow +@pytest.mark.vivado +@pytest.mark.fpgadataflow +@pytest.mark.node_tree_modeling +def test_fpgadataflow_analytical_characterization_downsampler(is_1d, flip_1d): + if flip_1d and not is_1d: + pytest.skip("flip_1d only applicable for is_1d") + in_dim = 32 + k = 1 + stride = 2 + dt_in = DataType["UINT8"] + dt_w = DataType["INT2"] + model = build_model(is_1d, in_dim, k, stride, dt_in, dt_w, pad_half=0, flip_1d=flip_1d) + + model = model.transform(to_hw.InferConvInpGen()) + + # Folding + for n in model.graph.node: + if n.op_type.startswith("ConvolutionInputGenerator"): + inst = getCustomOp(n) + + ishape = inst.get_normal_input_shape() + oshape = inst.get_normal_output_shape() + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, ishape) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, oshape) + + graph = helper.make_graph(nodes=[n], name="mp_graph", inputs=[inp], outputs=[outp]) + model = qonnx_make_model(graph, producer_name="mp-model") + model = ModelWrapper(model) + model.set_tensor_datatype("inp", dt_in) + model.set_tensor_datatype("outp", dt_in) + model = model.transform(InferShapes()) + + node_details = ("Downsampler", is_1d, flip_1d, in_dim, k, stride) + part = "xc7z020clg400-1" + target_clk_ns = 4 + + model = model.transform(SpecializeLayers(part)) + + max_allowed_volume_delta = 30 + max_allowed_length_delta = 30 + + assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta, max_allowed_length_delta + ), "characterized TAV does not match RTLsim'd one!" diff --git a/tests/fpgadataflow/test_fpgadataflow_dwc.py b/tests/fpgadataflow/test_fpgadataflow_dwc.py index 6b79a39ed5..a7cf0972a0 100644 --- a/tests/fpgadataflow/test_fpgadataflow_dwc.py +++ b/tests/fpgadataflow/test_fpgadataflow_dwc.py @@ -45,6 +45,7 @@ from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.util.test import tree_model_test def make_single_dwc_modelwrapper(shape, inWidth, outWidth, finn_dtype, impl_style): @@ -172,3 +173,40 @@ def test_fpgadataflow_dwc_stitched_rtlsim(config, impl_style): ).all(), """The output values are not the same as the input values anymore.""" assert y.shape == tuple(shape), """The output shape is incorrect.""" + + +@pytest.mark.parametrize( + "config", + [ + ([1, 24], 8, 4, DataType["INT2"]), + ([1, 4], 2, 4, DataType["BIPOLAR"]), + ([1, 4], 4, 2, DataType["INT2"]), + ([1, 2, 8], 4, 4, DataType["INT2"]), + ([1, 2, 8], 8, 16, DataType["INT2"]), + ], +) +@pytest.mark.parametrize("impl_style", ["hls", "rtl"]) +@pytest.mark.fpgadataflow +@pytest.mark.slow +@pytest.mark.vivado +@pytest.mark.node_tree_modeling +def test_fpgadataflow_analytical_characterization_dwc(config, impl_style): + shape, inWidth, outWidth, finn_dtype = config + + part = "xc7z020clg400-1" + model = make_single_dwc_modelwrapper(shape, inWidth, outWidth, finn_dtype, impl_style) + model = model.transform(SpecializeLayers(part)) + # model = model.transform(InferShapes()) + # model = model.transform(SetExecMode(mode)) + + node_details = ("DWC", config, impl_style) + # part = "xc7z020clg400-1" + + target_clk_ns = 4 + + max_allowed_volume_delta = 5 + max_allowed_length_delta = 20 + + assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta, max_allowed_length_delta + ), "characterized TAV does not match RTLsim'd one!" diff --git a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py index 1e9474677f..b76d6c5c99 100644 --- a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py +++ b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py @@ -48,6 +48,7 @@ from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers from finn.util.basic import pynq_part_map +from finn.util.test import tree_model_test test_pynq_board = "Pynq-Z1" test_fpga_part = pynq_part_map[test_pynq_board] @@ -158,3 +159,43 @@ def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, idt, mode): exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0 + + +# input image dimension +@pytest.mark.parametrize("idim", [[10, 8]]) +# number of rows and number of cols to add +@pytest.mark.parametrize("pad", [[1, 1, 1, 1], [1, 1, 2, 2], [7, 0, 8, 0]]) +# number of channels +@pytest.mark.parametrize("num_ch", [2, 4]) +# Input parallelism +@pytest.mark.parametrize("simd", [1, 2]) +# FINN input datatype +@pytest.mark.parametrize("idt", [DataType["INT2"]]) +# execution mode +@pytest.mark.parametrize("mode", ["rtlsim"]) +# implementation style +@pytest.mark.parametrize("impl_style", ["rtl", "hls"]) +@pytest.mark.fpgadataflow +@pytest.mark.slow +@pytest.mark.vivado +@pytest.mark.node_tree_modeling +def test_fpgadataflow_analytical_characterization_fmpadding( + idim, pad, num_ch, simd, idt, mode, impl_style +): + if num_ch % simd != 0: + pytest.skip(" num_ch % simd != 0, skipping") + + model = make_single_fmpadding_modelwrapper(idim, pad, num_ch, simd, idt) + model = model.transform(InferShapes()) + model = model.transform(SetExecMode(mode)) + + node_details = ("FMPadding", idim, pad, num_ch, simd, idt, mode, impl_style) + part = "xc7z020clg400-1" + target_clk_ns = 4 + + max_allowed_volume_delta = 2 + max_allowed_length_delta = 2 + + assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta, max_allowed_length_delta + ), "characterized TAV does not match RTLsim'd one!" diff --git a/tests/fpgadataflow/test_fpgadataflow_labelselect.py b/tests/fpgadataflow/test_fpgadataflow_labelselect.py index 83ab2ddcaf..a55698bed8 100644 --- a/tests/fpgadataflow/test_fpgadataflow_labelselect.py +++ b/tests/fpgadataflow/test_fpgadataflow_labelselect.py @@ -44,7 +44,7 @@ from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers -from finn.util.test import soft_verify_topk +from finn.util.test import soft_verify_topk, tree_model_test def make_labelselect_modelwrapper(labels, pe, k, idt, impl_style): @@ -136,3 +136,40 @@ def test_fpgadataflow_labelselect(idt, labels, fold, k, exec_mode, impl_style): y = oxe.execute_onnx(model, input_dict)["outp"] assert soft_verify_topk(x, y, k), exec_mode + " failed" + + +# which port to test +@pytest.mark.parametrize("idt", [DataType["UINT8"]]) +# labels +@pytest.mark.parametrize("labels", [10, 100]) +# folding +@pytest.mark.parametrize("fold", [1, 10]) +# number of top labels to select +@pytest.mark.parametrize("k", [1, 5]) +# impl style +@pytest.mark.parametrize("impl_style", ["hls"]) +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow +@pytest.mark.node_tree_modeling +def test_fpgadataflow_analytical_characterization_labelselect(idt, labels, fold, k, impl_style): + np.random.seed(0) + if fold == -1: + pe = 1 + else: + pe = labels // fold + assert labels % pe == 0 + + if k == -1: + k = labels + + model = make_labelselect_modelwrapper(labels, pe, k, idt, impl_style) + node_details = ("LabelSelect", idt, labels, fold, k, impl_style) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 10 + max_allowed_length_delta = 398 # RTLSIM is inconsistent + + assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta, max_allowed_length_delta + ), "characterized TAV does not match RTLsim'd one!" diff --git a/tests/fpgadataflow/test_fpgadataflow_mvau.py b/tests/fpgadataflow/test_fpgadataflow_mvau.py index d079578e72..8144cbde99 100644 --- a/tests/fpgadataflow/test_fpgadataflow_mvau.py +++ b/tests/fpgadataflow/test_fpgadataflow_mvau.py @@ -51,7 +51,6 @@ from finn.core.rtlsim_exec import rtlsim_exec from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP -from finn.transformation.fpgadataflow.derive_characteristic import DeriveCharacteristic from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.minimize_accumulator_width import ( MinimizeAccumulatorWidth, @@ -67,6 +66,7 @@ from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers from finn.transformation.general import ApplyConfig from finn.util.basic import is_versal +from finn.util.test import tree_model_test def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=None): @@ -661,84 +661,6 @@ def read_weights(sim): ).all(), "Output of ONNX model not matching output of stitched-IP RTL model!" -# mem_mode: internal_embedded or internal_decoupled -@pytest.mark.parametrize("mem_mode", ["internal_decoupled", "internal_embedded"]) -# activation: None or DataType -@pytest.mark.parametrize("act", [None, DataType["INT4"]]) -# weight datatype -@pytest.mark.parametrize("wdt", [DataType["INT4"]]) -# input datatype -@pytest.mark.parametrize("idt", [DataType["INT4"]]) -# neuron folding, -1 is maximum possible -@pytest.mark.parametrize("nf", [8]) -# synapse folding, -1 is maximum possible -@pytest.mark.parametrize("sf", [8]) -# HLS matrix width (input features) -@pytest.mark.parametrize("mw", [32]) -# HLS matrix height (output features) -@pytest.mark.parametrize("mh", [32]) -# Backend -@pytest.mark.parametrize("preferred_impl_style", ["hls", "rtl"]) -@pytest.mark.fpgadataflow -@pytest.mark.vivado -def test_mvau_fifocharacterize_rtlsim( - mem_mode, idt, wdt, act, nf, sf, mw, mh, preferred_impl_style -): - if preferred_impl_style == "rtl" and (mem_mode == "internal_embedded" or act is not None): - pytest.skip("RTL-MVAU doesn't support const mem mode or embedded activations") - if nf == -1: - nf = mh - if sf == -1: - sf = mw - pe = mh // nf - simd = mw // sf - assert mh % pe == 0 - assert mw % sf == 0 - # generate weights - W = gen_finn_dt_tensor(wdt, (mw, mh)) - - # no activation, produce accumulators - T = None - tdt = None - if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: - odt = DataType["UINT32"] - else: - odt = DataType["INT32"] - - model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt) - for node in model.graph.node: - # lookup op_type in registry of CustomOps - inst = getCustomOp(node) - inst.set_nodeattr("mem_mode", mem_mode) - inst.set_nodeattr("resType", "auto") - inst.set_nodeattr("preferred_impl_style", preferred_impl_style) - total_fold = nf * sf - exp_total_cycles = int(np.ceil(total_fold * 1.2)) - model = model.transform(SpecializeLayers("xczu7ev-ffvc1156-2-e")) - model = model.transform(MinimizeWeightBitWidth()) - model = model.transform(MinimizeAccumulatorWidth()) - model = model.transform(SetExecMode("rtlsim")) - model = model.transform(GiveUniqueNodeNames()) - model = model.transform(PrepareIP("xczu7ev-ffvc1156-2-e", 5)) - model = model.transform(HLSSynthIP()) - model = model.transform(PrepareRTLSim()) - model = model.transform(DeriveCharacteristic(exp_total_cycles)) - node_inst = getCustomOp(model.graph.node[0]) - period_attr = node_inst.get_nodeattr("io_chrc_period") - assert period_attr == exp_total_cycles - chrc_in = node_inst.get_nodeattr("io_chrc_in") - chrc_out = node_inst.get_nodeattr("io_chrc_out") - if mem_mode == "internal_decoupled": - assert chrc_in.shape == (2, 2 * exp_total_cycles) - else: - assert chrc_in.shape == (1, 2 * exp_total_cycles) - assert chrc_out.shape == (1, 2 * exp_total_cycles) - # total number of transactions == 2*SF - assert chrc_in[0, -1] == 2 * sf - # all outputs should be produced within the exp n of cycles - assert chrc_out[0, exp_total_cycles] == nf - - @pytest.mark.parametrize("mh", [18]) @pytest.mark.parametrize("mw", [32]) @pytest.mark.parametrize("pe", [1, 9, 18]) @@ -963,3 +885,69 @@ def test_fpgadataflow_rtl_dynamic_mvau(mh, mw, n_vectors, pe, simd, idt_wdt, par assert ( output_matmul == output_mvau_rtl_stitch ).all(), "Output of ONNX model not matching output of stitched-IP RTL model!" + + +# mem_mode: internal_embedded or internal_decoupled +@pytest.mark.parametrize("mem_mode", ["internal_decoupled", "internal_embedded"]) +# activation: None or DataType +@pytest.mark.parametrize("act", [None]) +# weight datatype +@pytest.mark.parametrize("wdt", [DataType["INT4"]]) +# input datatype +@pytest.mark.parametrize("idt", [DataType["INT4"]]) +# neuron folding, -1 is maximum possible +@pytest.mark.parametrize("nf", [-1, 2, 8]) +# synapse folding, -1 is maximum possible +@pytest.mark.parametrize("sf", [-1, 2, 4]) +# HLS matrix width (input features) +@pytest.mark.parametrize("mw", [32]) +# HLS matrix height (output features) +@pytest.mark.parametrize("mh", [32]) +# Backend +@pytest.mark.parametrize("preferred_impl_style", ["hls", "rtl"]) +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.node_tree_modeling +def test_fpgadataflow_analytical_characterization_mvau( + mem_mode, idt, wdt, act, nf, sf, mw, mh, preferred_impl_style +): + if preferred_impl_style == "rtl" and (mem_mode == "internal_embedded" or act is not None): + pytest.skip("RTL-MVAU doesn't support const mem mode or embedded activations") + if nf == -1: + nf = mh + if sf == -1: + sf = mw + pe = mh // nf + simd = mw // sf + + assert mh % pe == 0 + assert mw % sf == 0 + # generate weights + W = gen_finn_dt_tensor(wdt, (mw, mh)) + + # no activation, produce accumulators + T = None + tdt = None + if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: + odt = DataType["UINT32"] + else: + odt = DataType["INT32"] + + model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt) + for node in model.graph.node: + # lookup op_type in registry of CustomOps + inst = getCustomOp(node) + inst.set_nodeattr("mem_mode", mem_mode) + inst.set_nodeattr("numInputVectors", [16]) + inst.set_nodeattr("resType", "auto") + inst.set_nodeattr("preferred_impl_style", preferred_impl_style) + + node_details = ("MVAU", mem_mode, idt, wdt, act, nf, sf, mw, mh, preferred_impl_style) + part = "xc7z020clg400-1" + target_clk_ns = 4 + max_allowed_volume_delta = 20 + max_allowed_length_delta = 26 + + assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta, max_allowed_length_delta + ), "characterized TAV does not match RTLsim'd one!" diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding.py b/tests/fpgadataflow/test_fpgadataflow_thresholding.py index d90a080bf2..8152c4139e 100644 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding.py @@ -52,6 +52,7 @@ from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds +from finn.util.test import tree_model_test test_fpga_part = "xczu3eg-sbva484-1-e" target_clk_ns = 5 @@ -397,3 +398,141 @@ def test_fpgadataflow_thresholding_stitched_ip( assert ( y_expected == y_produced ).all(), "Output of ONNX model not matching output of stitched-IP RTL model!" + + +@pytest.mark.parametrize("num_input_channels", [6, 16]) +@pytest.mark.parametrize( + "num_input_vecs", + [ + [1], + [1, 2, 2], + ], +) +@pytest.mark.parametrize("activation", [DataType["BIPOLAR"]]) +@pytest.mark.parametrize( + "idt_tdt_cfg", + [ + (DataType["INT8"], DataType["INT8"]), + ], +) +@pytest.mark.parametrize("fold", [-1, 1, 2]) +@pytest.mark.parametrize("narrow", [True, False]) +@pytest.mark.parametrize("per_tensor", [True, False]) +@pytest.mark.parametrize("impl_style", ["rtl"]) +@pytest.mark.parametrize("mem_mode", ["internal_embedded", "internal_decoupled"]) +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow +@pytest.mark.node_tree_modeling +def test_fpgadataflow_analytical_characterization_thresholding( + num_input_channels, + num_input_vecs, + activation, + idt_tdt_cfg, + fold, + narrow, + per_tensor, + impl_style, + mem_mode, +): + # the mem_mode parameter can only be used for the hls thresholding + # so the test will only be executed once for impl_style=rtl and once skipped + # when the mem_mode is varied. Otherwise, the same test configuration would always + # run twice. + if impl_style == "rtl" and mem_mode == "internal_decoupled": + pytest.skip( + "Skip, because test is identical to impl_style=rtl and mem_mode=internal_embedded" + ) + if narrow and activation == DataType["BIPOLAR"]: + pytest.skip("Narrow needs to be false with biploar activation.") + input_data_type, threshold_data_type = idt_tdt_cfg + num_steps = activation.get_num_possible_values() - 1 + + if fold == -1: + fold = num_input_channels + pe = num_input_channels // fold + if num_input_channels % pe != 0: + pytest.skip("Invalid folding configuration. Skipping test.") + + output_data_type = activation + if activation == DataType["BIPOLAR"]: + activation_bias = 0 + else: + activation_bias = activation.min() + if narrow and activation.signed(): + activation_bias += 1 + + # Generate random thresholds and sort in ascending order + thresholds = generate_random_threshold_values( + threshold_data_type, num_input_channels, num_steps, narrow, per_tensor + ) + + # provide non-decreasing/ascending thresholds + thresholds = sort_thresholds_increasing(thresholds) + + # Make a Multithreshold graph and convert to thresholding binary search node + model = make_single_multithresholding_modelwrapper( + thresholds, + input_data_type, + threshold_data_type, + output_data_type, + activation_bias, + num_input_vecs, + num_input_channels, + ) + + # calculate reference output + x = gen_finn_dt_tensor(input_data_type, tuple(num_input_vecs + [num_input_channels])) + + input_dict = {model.graph.input[0].name: x} + y_expected = oxe.execute_onnx(model, input_dict)[model.graph.output[0].name] + + if output_data_type == DataType["BIPOLAR"]: + # binary to bipolar + y_expected = 2 * y_expected - 1 + + model = model.transform(InferThresholdingLayer()) + + # Transform to the specified implementation style, either the + # RTL or HLS according to test parameters + node = model.get_nodes_by_op_type(model.graph.node[0].op_type)[0] + inst = getCustomOp(node) + inst.set_nodeattr("preferred_impl_style", impl_style) + model = model.transform(SpecializeLayers(test_fpga_part)) + model = model.transform(InferShapes()) + assert model.graph.node[0].op_type == "Thresholding_" + str(impl_style) + + node = model.get_nodes_by_op_type(model.graph.node[0].op_type)[0] + inst = getCustomOp(node) + inst.set_nodeattr("PE", pe) + + if impl_style == "hls": + inst.set_nodeattr("mem_mode", mem_mode) + + node_details = ( + "Thr", + input_data_type, + threshold_data_type, + output_data_type, + activation_bias, + num_input_vecs, + num_input_channels, + pe, + narrow, + per_tensor, + activation, + mem_mode, + impl_style, + ) + + max_allowed_volume_delta = 8 + max_allowed_length_delta = 6 + + assert tree_model_test( + model, + node_details, + test_fpga_part, + target_clk_ns, + max_allowed_volume_delta, + max_allowed_length_delta, + ), "characterized TAV does not match RTLsim'd one!" diff --git a/tests/fpgadataflow/test_fpgadataflow_vvau.py b/tests/fpgadataflow/test_fpgadataflow_vvau.py index 80b64d5e4a..cd8e572b79 100644 --- a/tests/fpgadataflow/test_fpgadataflow_vvau.py +++ b/tests/fpgadataflow/test_fpgadataflow_vvau.py @@ -63,6 +63,7 @@ from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers from finn.transformation.general import ApplyConfig +from finn.util.test import tree_model_test def _infer_sparse_weight_tensor(W_conv, k_h, k_w, channels): @@ -479,3 +480,87 @@ def test_fpgadataflow_vvau_rtl(kernel_size, in_feature_dim, in_chn, idt, wdt, pa assert ( golden_out == output_vvau_stitched ).all(), "Output of ONNX model not matching output of stitched-IP RTL model!" + + +# input datatype +@pytest.mark.parametrize("idt", [DataType["BIPOLAR"]]) +# weight datatype +@pytest.mark.parametrize("wdt", [DataType["BIPOLAR"]]) +# activation: None or DataType +@pytest.mark.parametrize("act", [DataType["BIPOLAR"], None]) +# PE +@pytest.mark.parametrize("pe", [1, 3, 6]) +# SIMD +@pytest.mark.parametrize("simd", [1, 9]) +# Input image shape +@pytest.mark.parametrize("dim_h", [10]) +@pytest.mark.parametrize("dim_w", [10, 1]) +# Kernel shape +@pytest.mark.parametrize("k_h", [3]) +@pytest.mark.parametrize("k_w", [3, 1]) +# Number of input and output channels +@pytest.mark.parametrize("channels", [3]) +# memory mode +@pytest.mark.parametrize("mem_mode", ["internal_decoupled", "internal_embedded"]) +@pytest.mark.fpgadataflow +@pytest.mark.slow +@pytest.mark.vivado +@pytest.mark.node_tree_modeling +def test_fpgadataflow_analytical_characterization_vvau( + idt, wdt, act, pe, simd, dim_h, dim_w, k_h, k_w, channels, mem_mode +): + if dim_w == 1 and k_w != 1: + pytest.skip("1D image requires 1D kernel, skipping.") + + if channels % pe != 0: + pytest.skip("Requirement Channels divisable by PE is violated.") + + if (k_h * k_w) % simd != 0: + pytest.skip("Requirement kernel (k_h * k_w) divisable by SIMD is violated.") + + # Generate weights in expected shape for ONNX and HLS node + W = gen_finn_dt_tensor(wdt, (channels, 1, k_h, k_w)) # shape: [channels, 1, k, k] + + # Generate inputs in expected format for ONNX and HLS node + x = gen_finn_dt_tensor(idt, (1, dim_h, dim_w, k_h * k_w * channels)) + x_vvau = x.reshape(1, dim_h, dim_w, k_h * k_w, channels // pe, pe) + x_vvau = x_vvau.transpose(0, 1, 2, 4, 3, 5) + x_vvau = x_vvau.reshape(1, dim_h, dim_w, channels * k_h * k_w) + + if act is None: + T = None + tdt = None + if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: + odt = DataType["UINT32"] + else: + odt = DataType["INT32"] + else: + odt = act + (min_v, max_v) = _calculate_dot_prod_range(idt, wdt, k_h * k_w) + n_steps = act.get_num_possible_values() - 1 + T = np.random.randint(min_v, max_v - 1, (channels, n_steps)).astype(np.float32) + T = np.sort(T, axis=1) + if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: + tdt = DataType["UINT32"] + # bias thresholds to be positive + T = np.ceil((T + (k_h * k_w)) / 2) + assert (T >= 0).all() + else: + tdt = DataType["INT32"] + + model = _make_single_vvau_modelwrapper( + W, pe, simd, k_h, k_w, channels, dim_h, dim_w, wdt, idt, odt, T, tdt, mem_mode + ) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + + node_details = ("VVAU", idt, wdt, act, pe, simd, dim_h, dim_w, k_h, k_w, channels, mem_mode) + part = "xc7z020clg400-1" + target_clk_ns = 4 + + max_allowed_volume_delta = 14 + max_allowed_length_delta = 14 + + assert tree_model_test( + model, node_details, part, target_clk_ns, max_allowed_volume_delta, max_allowed_length_delta + ), "characterized TAV does not match RTLsim'd one!"