diff --git a/fetch-repos.sh b/fetch-repos.sh index 8aad454d4f..ef99d38eaf 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -32,7 +32,7 @@ QONNX_COMMIT="f5c9819bd00f01f41e70639b8461c8e4b39432f7" FINN_EXP_COMMIT="0724be21111a21f0d81a072fccc1c446e053f851" BREVITAS_COMMIT="aad4d5a293db6f2ec622a92a5d3278e47072453e" CNPY_COMMIT="8c82362372ce600bbd1cf11d64661ab69d38d7de" -HLSLIB_COMMIT="a2cd3e6ce653a03e59af6bcb9fbeaa71618d160e" +HLSLIB_COMMIT="120c46293fdf534415a6a47973a8f712fca6d900" OMX_COMMIT="a5d48f93309b235fdd21556d16e86e6ef5db6e2e" AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index b74bbf538d..40ee90878e 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -1,5 +1,5 @@ # Copyright (C) 2020-2022, Xilinx, Inc. -# Copyright (C) 2023-2024, Advanced Micro Devices, Inc. +# Copyright (C) 2022-2024, Advanced Micro Devices, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -59,6 +59,7 @@ def register_custom_op(cls): ConvolutionInputGenerator, ) from finn.custom_op.fpgadataflow.crop import Crop +from finn.custom_op.fpgadataflow.deconvolution import Deconvolution from finn.custom_op.fpgadataflow.duplicatestreams import DuplicateStreams from finn.custom_op.fpgadataflow.fmpadding import FMPadding from finn.custom_op.fpgadataflow.fmpadding_pixel import FMPadding_Pixel @@ -96,6 +97,7 @@ def register_custom_op(cls): custom_op["AddStreams"] = AddStreams custom_op["ChannelwiseOp"] = ChannelwiseOp custom_op["ConvolutionInputGenerator"] = ConvolutionInputGenerator +custom_op["Deconvolution"] = Deconvolution custom_op["Crop"] = Crop custom_op["DuplicateStreams"] = DuplicateStreams custom_op["FMPadding"] = FMPadding diff --git a/src/finn/custom_op/fpgadataflow/deconvolution.py b/src/finn/custom_op/fpgadataflow/deconvolution.py new file mode 100644 index 0000000000..ad7a0bda1e --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/deconvolution.py @@ -0,0 +1,173 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import warnings +from qonnx.core.datatype import DataType + +from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp + + +class Deconvolution(HWCustomOp): + """Abstraction layer for HW implementation of Deconvolution""" + + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) + + def get_nodeattr_types(self): + my_attrs = { + "KernelDim": ("ints", True, []), # [H, W] = [Y, X] + "IFMChannels": ("i", True, 0), + "OFMChannels": ("i", True, 0), + "IFMDim": ("ints", True, []), # [H, W] = [Y, X] + "PE": ("i", True, 0), + "SIMD": ("i", True, 0), + "Stride": ("ints", True, [1, 1]), # [H, W] = [Y, X] + "Padding": ("ints", True, []), # [H, W] = [Y, X] + # FINN DataTypes for inputs, weights, outputs + "inputDataType": ("s", True, ""), + "weightDataType": ("s", True, ""), + "outputDataType": ("s", True, ""), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def get_normal_input_shape(self, ind=0): + if ind == 0: + ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim") + ifm_ch = self.get_nodeattr("IFMChannels") + ishape = (1, ifm_dim_h, ifm_dim_w, ifm_ch) + else: + ifm_ch = self.get_nodeattr("IFMChannels") + ofm_ch = self.get_nodeattr("OFMChannels") + k_h, k_w = self.get_nodeattr("KernelDim") + ishape = (ofm_ch, k_h, k_w, ifm_ch) + return ishape + + def get_folded_input_shape(self, ind=0): + if ind == 0: + ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim") + ifm_ch = self.get_nodeattr("IFMChannels") + simd = self.get_nodeattr("SIMD") + assert ifm_ch % simd == 0, "SIMD must divide IFMChannels" + fold = int(ifm_ch / simd) + folded_ishape = (1, ifm_dim_h, ifm_dim_w, fold, simd) + else: + folded_ishape = self.get_normal_input_shape(ind) + return folded_ishape + + def get_normal_output_shape(self, ind=0): + idim_h, idim_w = self.get_nodeattr("IFMDim") + stride_h, stride_w = self.get_nodeattr("Stride") + k_h, k_w = self.get_nodeattr("KernelDim") + ofm_ch = self.get_nodeattr("OFMChannels") + pad_h, pad_w = self.get_nodeattr("Padding") + odim_h = (idim_h - 1) * stride_h - 2 * pad_h + (k_h - 1) + 1 + odim_w = (idim_w - 1) * stride_w - 2 * pad_w + (k_w - 1) + 1 + oshape = (1, odim_h, odim_w, ofm_ch) + return oshape + + def get_folded_output_shape(self, ind=0): + normal_oshape = self.get_normal_output_shape() + odim_h = normal_oshape[1] + odim_w = normal_oshape[2] + ofm_ch = normal_oshape[3] + pe = self.get_nodeattr("PE") + fold = int(ofm_ch / pe) + folded_oshape = (1, odim_h, odim_w, fold, pe) + return folded_oshape + + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape() + oshape = self.get_normal_output_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpected input shape for Deconv." + # implement tensor with correct shape + return super().make_const_shape_op(oshape) + + def infer_node_datatype(self, model): + node = self.onnx_node + idt = model.get_tensor_datatype(node.input[0]) + if idt != self.get_input_datatype(): + warn_str = "inputDataType changing for %s: %s -> %s " % ( + node.name, + str(self.get_input_datatype()), + str(idt), + ) + warnings.warn(warn_str) + self.set_nodeattr("inputDataType", idt.name) + # set output datatype from property + odt = self.get_output_datatype() + model.set_tensor_datatype(node.output[0], odt) + + def verify_node(self): + pass + + def get_input_datatype(self, ind=0): + """Returns FINN DataType of input.""" + return DataType[self.get_nodeattr("inputDataType")] + + def get_weight_datatype(self): + """Returns FINN DataType of weights.""" + return DataType[self.get_nodeattr("weightDataType")] + + def get_output_datatype(self, ind=0): + """Returns FINN DataType of output.""" + return DataType[self.get_nodeattr("outputDataType")] + + def get_instream_width(self, ind=0): + """Returns stream width, input and output stream width are equal for + the sliding window function""" + if ind == 0: + ibits = self.get_input_datatype().bitwidth() + simd = self.get_nodeattr("SIMD") + ifm_ch = self.get_nodeattr("IFMChannels") + assert ifm_ch % simd == 0, "SIMD must divide IFMChannels" + in_width = simd * ibits + else: + in_width = 0 + return in_width + + def get_outstream_width(self, ind=0): + o_bits = self.get_output_datatype().bitwidth() + out_width = o_bits * self.get_nodeattr("PE") + return out_width + + def get_exp_cycles(self): + return 0 + + def bram_estimation(self): + return 0 + + def lut_estimation(self): + return 0 + + def uram_estimation(self): + return 0 + + def execute_node(self, context, graph): + pass diff --git a/src/finn/custom_op/fpgadataflow/hls/__init__.py b/src/finn/custom_op/fpgadataflow/hls/__init__.py index e80a581b57..b75656a758 100644 --- a/src/finn/custom_op/fpgadataflow/hls/__init__.py +++ b/src/finn/custom_op/fpgadataflow/hls/__init__.py @@ -58,6 +58,7 @@ def register_custom_op(cls): from finn.custom_op.fpgadataflow.hls.checksum_hls import CheckSum_hls from finn.custom_op.fpgadataflow.hls.concat_hls import StreamingConcat_hls from finn.custom_op.fpgadataflow.hls.crop_hls import Crop_hls +from finn.custom_op.fpgadataflow.hls.deconvolution_hls import Deconvolution_hls from finn.custom_op.fpgadataflow.hls.duplicatestreams_hls import DuplicateStreams_hls from finn.custom_op.fpgadataflow.hls.fmpadding_pixel_hls import FMPadding_Pixel_hls from finn.custom_op.fpgadataflow.hls.globalaccpool_hls import GlobalAccPool_hls @@ -84,6 +85,7 @@ def register_custom_op(cls): custom_op["AddStreams_hls"] = AddStreams_hls custom_op["ChannelwiseOp_hls"] = ChannelwiseOp_hls custom_op["CheckSum_hls"] = CheckSum_hls +custom_op["Deconvolution_hls"] = Deconvolution_hls custom_op["Crop_hls"] = Crop_hls custom_op["DuplicateStreams_hls"] = DuplicateStreams_hls custom_op["FMPadding_Pixel_hls"] = FMPadding_Pixel_hls diff --git a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py new file mode 100644 index 0000000000..d0e90661e3 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py @@ -0,0 +1,213 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +from qonnx.util.basic import interleave_matrix_outer_dim_from_partitions + +from finn.custom_op.fpgadataflow.deconvolution import Deconvolution +from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend +from finn.util.data_packing import numpy_to_hls_code + + +class Deconvolution_hls(Deconvolution, HLSBackend): + """Corresponds to finn-hlslib deconv function.""" + + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) + + def get_nodeattr_types(self): + my_attrs = {} + my_attrs.update(Deconvolution.get_nodeattr_types(self)) + my_attrs.update(HLSBackend.get_nodeattr_types(self)) + return my_attrs + + def calc_wmem(self): + """Calculates and returns WMEM.""" + ifm_ch = self.get_nodeattr("IFMChannels") + ofm_ch = self.get_nodeattr("OFMChannels") + kernel_2 = np.prod(self.get_nodeattr("KernelDim")) + pe = self.get_nodeattr("PE") + simd = self.get_nodeattr("SIMD") + assert ofm_ch % pe == 0, "Requirement output channels divisable by PE is violated." + assert ifm_ch % simd == 0, "Requirement input channels divisable by SIMD is violated." + wmem = (ofm_ch / pe) * kernel_2 * (ifm_ch / simd) + return int(wmem) + + def generate_params(self, model, path): + code_gen_dir = path + # weights, if not external + weights = model.get_initializer(self.onnx_node.input[1]) + # save hlslib-compatible weights in params.h + weight_filename = "{}/params.h".format(code_gen_dir) + self.make_weight_file(weights, "hls_header", weight_filename) + + def make_weight_file(self, weights, weight_file_mode, weight_file_name): + """Produce a file containing given weights in appropriate format for this + layer. This file can be used for either synthesis or run-time reconfig + of weights. + + Arguments: + + * weights : numpy array with weights to be put into the file + * weight_file_mode : one of {hls_header, decoupled_verilog_dat, + decoupled_runtime} + * weight_file_name : filename for the weight file to be generated + + """ + # convert weights into hlslib-compatible format + weight_tensor = self.get_hw_compatible_weight_tensor(weights) + export_wdt = self.get_weight_datatype() + if weight_file_mode == "hls_header": + weight_hls_code = numpy_to_hls_code(weight_tensor, export_wdt, "weights", False, True) + # remove framing {} + weight_hls_code = weight_hls_code[1:-2] + ";" + # write weights into C++ header file as dictated by finn-hlslib + f_weights = open(weight_file_name, "w") + f_weights.write( + "static {} const weights[{}][{}][{}] = ".format( + export_wdt.get_hls_datatype_str(), + self.calc_wmem(), + self.get_nodeattr("PE"), + self.get_nodeattr("SIMD"), + ) + ) + f_weights.write(weight_hls_code) + f_weights.close() + + def get_hw_compatible_weight_tensor(self, orig_weight_matrix): + """Convert the original numpy weight matrix orig_weight_matrix into + a form suitable for passing to the hlslib call: + * ensure OCH % PE == 0 and ICH % SIMD == 0 + * interleave rows between PEs + * reshape into (1, PE, WMEM, SIMD) and return + """ + k_h, k_w = self.get_nodeattr("KernelDim") + ifm_ch = self.get_nodeattr("IFMChannels") + ofm_ch = self.get_nodeattr("OFMChannels") + pe = self.get_nodeattr("PE") + simd = self.get_nodeattr("SIMD") + wmem = self.calc_wmem() + assert orig_weight_matrix.shape == ( + ofm_ch, + k_h, + k_w, + ifm_ch, + ), """Weights matrix doesn't + have expected shape (ofm_ch, k_h, k_w, ifm_ch)""" + assert ofm_ch % pe == 0, "Requirement output channels divisable by PE is violated." + assert ifm_ch % simd == 0, "Requirement input channels divisable by SIMD is violated." + # interleave rows between PEs and reshape + # distribute rows between PEs + ret = orig_weight_matrix + ret = ret.reshape(ofm_ch, k_h * k_w * ifm_ch) + ret = interleave_matrix_outer_dim_from_partitions(ret, pe) + # create SIMD as innermost dimension + ret = ret.reshape(1, pe, wmem, simd) + ret = ret.transpose(0, 2, 1, 3) + return ret + + def global_includes(self): + self.code_gen_dict["$GLOBALS$"] = ['#include "deconv.hpp"'] + + def defines(self, var): + ifm_dim = self.get_nodeattr("IFMDim") + self.code_gen_dict["$DEFINES$"] = [ + """constexpr unsigned Kernel = {};\n constexpr unsigned Stride = {};\n + constexpr unsigned Padding = {};\n constexpr unsigned IFMH = {};\n + constexpr unsigned IFMW = {};\n constexpr unsigned ICH = {};\n + constexpr unsigned OCH = {};\n constexpr unsigned SIMD1 = {};\n + constexpr unsigned PE1 = {};""".format( + self.get_nodeattr("KernelDim")[0], + self.get_nodeattr("Stride")[0], + self.get_nodeattr("Padding")[0], + ifm_dim[0], + ifm_dim[1], + self.get_nodeattr("IFMChannels"), + self.get_nodeattr("OFMChannels"), + self.get_nodeattr("SIMD"), + self.get_nodeattr("PE"), + ) + ] + + def docompute(self): + self.code_gen_dict["$DOCOMPUTE$"] = [] + self.code_gen_dict["$DOCOMPUTE$"].append( + """deconv + (weights, in0_V, out0_V);""" + ) + + def blackboxfunction(self): + input_elem_hls_type = self.get_input_datatype().get_hls_datatype_str() + output_elem_hls_type = self.get_output_datatype().get_hls_datatype_str() + simd = self.get_nodeattr("SIMD") + pe = self.get_nodeattr("PE") + in_stream = "hls::stream> &in0_V" % ( + input_elem_hls_type, + simd, + ) + out_stream = "hls::stream> &out0_V" % ( + output_elem_hls_type, + pe, + ) + blackbox_hls = "void %s(%s, %s)" % (self.onnx_node.name, in_stream, out_stream) + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [blackbox_hls] + + def pragmas(self): + self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0_V"] + self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out0_V") + self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE ap_ctrl_none port=return") + + self.code_gen_dict["$PRAGMAS$"].append('#include "params.h"') + # the weight tensor is ap_uint [PE][WMEM] + # partition for parallel access along the PE dimension (dim 1) + # self.code_gen_dict["$PRAGMAS$"].append( + # ("#pragma HLS ARRAY_PARTITION variable=weights.m_weights " "complete dim=1") + # ) + + def execute_node(self, context, graph): + HLSBackend.execute_node(self, context, graph) + + def timeout_value(self): + """Set timeout value for HLS functions defined for one clock cycle""" + simd = self.get_nodeattr("SIMD") + i_ch = self.get_nodeattr("IFMChannels") + k_h, k_w = self.get_nodeattr("KernelDim") + s_h, s_w = self.get_nodeattr("Stride") + i_h, i_w = self.get_nodeattr("IFMDim") + p_h, p_w = self.get_nodeattr("Padding") + if p_w >= k_w - s_w: + padup = 0 + else: + padup = (k_w - p_w - 1) / s_w + crop = s_w * padup - ((k_w - s_w) - p_w) + sf = i_ch / simd + w_eff = padup + i_w + padup + wo_eff = (w_eff - 1) * s_w + k_w + self.code_gen_dict["$TIMEOUT_VALUE$"] = [ + "%s" % (wo_eff * (crop + 1) * ((k_w / s_w) ** 2) * 4 * sf + 50) + ] diff --git a/tests/fpgadataflow/test_fpgadataflow_deconv.py b/tests/fpgadataflow/test_fpgadataflow_deconv.py index 400ed3b6e3..6cd60e07ad 100644 --- a/tests/fpgadataflow/test_fpgadataflow_deconv.py +++ b/tests/fpgadataflow/test_fpgadataflow_deconv.py @@ -63,12 +63,11 @@ target_clk_ns = 10 -def set_up_reference_model(idt, wdt, k, idim, ifm_ch, ofm_ch, stride, padding): +def set_up_reference_model(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding): idim_h, idim_w = idim stride_h, stride_w = stride odim_h = (idim_h - 1) * stride_h - 2 * padding + (k - 1) + 1 odim_w = (idim_w - 1) * stride_w - 2 * padding + (k - 1) + 1 - odt = DataType["INT32"] inp = helper.make_tensor_value_info( "inp", @@ -119,6 +118,73 @@ def set_up_reference_model(idt, wdt, k, idim, ifm_ch, ofm_ch, stride, padding): model = model.transform(InferShapes()) + return model, w_tensor + + +def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding, w_tensor): + idim_h, idim_w = idim + stride_h, stride_w = stride + odim_h = (idim_h - 1) * stride_h - 2 * padding + (k - 1) + 1 + odim_w = (idim_w - 1) * stride_w - 2 * padding + (k - 1) + 1 + + inp = helper.make_tensor_value_info( + "inp", + TensorProto.FLOAT, + [ + 1, + idim_h, + idim_w, + ifm_ch, + ], + ) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, odim_h, odim_w, ofm_ch]) + W = helper.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, k, k, ifm_ch]) + + Deconv = helper.make_node( + "Deconvolution_hls", + ["inp", "W"], + ["outp"], + domain="finn.custom_op.fpgadataflow.hls", + backend="fpgadataflow", + KernelDim=[k, k], + IFMChannels=ifm_ch, + OFMChannels=ofm_ch, + IFMDim=idim, + Stride=[stride_h, stride_w], + Padding=[padding, padding], + PE=1, + SIMD=1, + inputDataType=idt.name, + weightDataType=wdt.name, + outputDataType=odt.name, + cpp_interface="hls_vector", + hls_style="freerunning", + ) + + node_list = [Deconv] + value_info = [W] + + graph = helper.make_graph( + nodes=node_list, + name="convtranspose_graph", + inputs=[inp], + outputs=[outp], + value_info=value_info, + ) + + model = qonnx_make_model(graph, producer_name="convtranspose-model") + model = ModelWrapper(model) + + # initialize model + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype(model.graph.output[0].name, odt) + model.set_tensor_datatype("W", wdt) + + w_tensor = w_tensor.transpose(1, 2, 3, 0) + model.set_initializer("W", w_tensor) + + model = model.transform(InferShapes()) + return model @@ -143,13 +209,16 @@ def set_up_reference_model(idt, wdt, k, idim, ifm_ch, ofm_ch, stride, padding): @pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado -def test_fpgadataflow_deconv(idim, stride, ifm_ch, ofm_ch, simd, pe, k, padding, exec_mode): +def test_fpgadataflow_deconv_pixel_pad( + idim, stride, ifm_ch, ofm_ch, simd, pe, k, padding, exec_mode +): idt = wdt = DataType["INT4"] wdt = idt + odt = DataType["INT32"] idim_h, idim_w = idim stride_h, stride_w = stride - ref_model = set_up_reference_model(idt, wdt, k, idim, ifm_ch, ofm_ch, stride, padding) + ref_model = set_up_reference_model(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding)[0] odim_h = (idim_h - 1) * stride_h - 2 * padding + (k - 1) + 1 odim_w = (idim_w - 1) * stride_w - 2 * padding + (k - 1) + 1 @@ -208,3 +277,93 @@ def test_fpgadataflow_deconv(idim, stride, ifm_ch, ofm_ch, simd, pe, k, padding, exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0 + + +# input image dimension +@pytest.mark.parametrize("idim", [[8, 8]]) +# number of rows and number of cols to add +@pytest.mark.parametrize("stride", [[2, 2]]) +# number of channels +@pytest.mark.parametrize("ifm_ch", [2]) +# number of channels +@pytest.mark.parametrize("ofm_ch", [3]) +# Input parallelism +@pytest.mark.parametrize("simd", [1]) +# PE +@pytest.mark.parametrize("pe", [1]) +# kernel size +@pytest.mark.parametrize("k", [4]) +# padding +@pytest.mark.parametrize("padding", [1]) +# exec mode +@pytest.mark.parametrize("exec_mode", ["cppsim"]) +@pytest.mark.fpgadataflow +@pytest.mark.slow +@pytest.mark.vivado +def test_fpgadataflow_deconv_revd2(idim, stride, ifm_ch, ofm_ch, simd, pe, k, padding, exec_mode): + idt = wdt = DataType["INT8"] + wdt = idt + odt = DataType["INT32"] + idim_h, idim_w = idim + stride_h, stride_w = stride + + ref_model, w_tensor = set_up_reference_model( + idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding + ) + model = create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding, w_tensor) + + odim_h = (idim_h - 1) * stride_h - 2 * padding + (k - 1) + 1 + odim_w = (idim_w - 1) * stride_w - 2 * padding + (k - 1) + 1 + + input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, idim_h, idim_w]) + input_dict = {"inp": input_tensor} + + y_expected = oxe.execute_onnx(ref_model, input_dict)["outp"] + + # model = model.transform(InferShapes()) + # model = model.transform(GiveUniqueNodeNames()) + input_tensor_nhwc = input_tensor.transpose(0, 2, 3, 1) + input_dict_nhwc = {"inp": input_tensor_nhwc} + # y_produced = oxe.execute_onnx(model, input_dict_nhwc)["outp"] + # assert (y_produced == y_expected).all() + + # model = model.transform(SpecializeLayers(test_fpga_part)) + # model = model.transform(MinimizeAccumulatorWidth()) + + for n in model.graph.node: + if n.op_type.startswith("Deconvolution_hls"): + deconv_node = getCustomOp(n) + deconv_node.set_nodeattr("PE", pe) + deconv_node.set_nodeattr("SIMD", simd) + + expected_oshape = (1, odim_h, odim_w, ofm_ch) + # model.save("deconv.onnx") + # cppsim + if exec_mode == "cppsim": + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareCppSim()) + model = model.transform(CompileCppSim()) + model = model.transform(SetExecMode("cppsim")) + # breakpoint() + + # rtlsim + else: + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) + model = model.transform(HLSSynthIP()) + model = model.transform(PrepareRTLSim()) + model = model.transform(SetExecMode("rtlsim")) + + y_produced = oxe.execute_onnx(model, input_dict_nhwc)["outp"] + assert y_produced.shape == expected_oshape + y_produced = y_produced.transpose(0, 3, 1, 2) + assert (y_produced == y_expected).all() + + if exec_mode == "rtlsim": + node = model.get_nodes_by_op_type("Deconvolution_hls")[0] + inst = getCustomOp(node) + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") + exp_cycles_dict = model.analysis(exp_cycles_per_layer) + exp_cycles = exp_cycles_dict[node.name] + assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) + assert exp_cycles != 0