diff --git a/fetch-repos.sh b/fetch-repos.sh index 1473f83b1d..b001ccd6d6 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -1,5 +1,6 @@ #!/bin/bash # Copyright (c) 2020-2022, Xilinx, Inc. +# Copyright (C) 2022-2025, Advanced Micro Devices, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/src/finn/transformation/qonnx/fold_quant_weights.py b/src/finn/transformation/qonnx/fold_quant_weights.py index 0f6cbacb82..f1e18a8144 100644 --- a/src/finn/transformation/qonnx/fold_quant_weights.py +++ b/src/finn/transformation/qonnx/fold_quant_weights.py @@ -149,7 +149,8 @@ def apply(self, model): mul_tensor = helper.make_tensor_value_info( model.make_new_valueinfo_name(), TensorProto.FLOAT, - mul_shape, + mul_shape, # Note: This shape is known exactly as + # it is an initializer with known shape ) graph.value_info.append(mul_tensor) model.set_initializer(mul_tensor.name, scale) @@ -164,11 +165,12 @@ def apply(self, model): successor = successor[0] succ_output_name = successor.output[0] - output_shape = model.get_tensor_shape(successor.output[0]) act_mul_tensor = helper.make_tensor_value_info( model.make_new_valueinfo_name(), TensorProto.FLOAT, - output_shape, + None, # Note: Explicitly delete the shape + # annotation to be redone by the next shape + # inference ) graph.value_info.append(act_mul_tensor) successor.output[0] = act_mul_tensor.name @@ -186,19 +188,37 @@ def apply(self, model): div_tensor = helper.make_tensor_value_info( model.make_new_valueinfo_name(), TensorProto.FLOAT, - mul_shape, + None, # Note: Explicitly delete the shape + # annotation to be redone by the next shape + # inference ) graph.value_info.append(div_tensor) model.set_initializer(div_tensor.name, scale) - succ_input_name = successor.input[0] + # Detect which input of the add-like successor is + # fed by the quantizer node to select the other + # branch to insert the scale factor + if successor.input[0] == node_out: + succ_input_name = successor.input[1] + else: + succ_input_name = successor.input[0] + act_mul_tensor = helper.make_tensor_value_info( model.make_new_valueinfo_name(), TensorProto.FLOAT, - output_shape, + None, # Note: Explicitly delete the shape + # annotation to be redone by the next shape + # inference ) graph.value_info.append(act_mul_tensor) - successor.input[0] = act_mul_tensor.name + + # Detect which input of the add-like successor is + # fed by the quantizer node to select the other + # branch to insert the scale factor + if successor.input[0] == node_out: + successor.input[1] = act_mul_tensor.name + else: + successor.input[0] = act_mul_tensor.name div_node = helper.make_node( "Div", @@ -210,6 +230,8 @@ def apply(self, model): # remove old node graph.node.remove(n) graph_modified = True + # Note: Running shape inference is necessary as shape + # annotations have been deleted above model = model.transform(InferShapes()) return (model, graph_modified) return (model, graph_modified) diff --git a/src/finn/transformation/qonnx/qonnx_activation_handlers.py b/src/finn/transformation/qonnx/qonnx_activation_handlers.py index 92a9731c2a..1bb6097107 100644 --- a/src/finn/transformation/qonnx/qonnx_activation_handlers.py +++ b/src/finn/transformation/qonnx/qonnx_activation_handlers.py @@ -25,8 +25,8 @@ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - import numpy as np +import warnings from abc import ABC, abstractmethod from onnx import TensorProto, helper from qonnx.core.modelwrapper import ModelWrapper @@ -70,7 +70,7 @@ def _check_compatibility(self): @abstractmethod def _calculate_act_bias(self): """Calculate the activation bias, - which is introduced as an Add node behind the MultiTrheshold node. + which is introduced as an Add node behind the MultiThreshold node. """ raise NotImplementedError() @@ -82,7 +82,7 @@ def _calculate_thresholds(self): @abstractmethod def _calculate_act_scale(self): """Calculate the activation scale, - which is indroduced as a Mul node behind the Add node + which is introduced as a Mul node behind the Add node for the activation bias. """ raise NotImplementedError() @@ -157,7 +157,7 @@ def replace_quant_node(self): # Set scale and bias # If these values are scalar then they can be set as attributes # of the MultiThreshold node, if not they get inserted as adder and mul nodes - # behind the MultiTrheshold nodes. + # behind the MultiThreshold nodes. bias_scalar = adder_bias.shape == (1,) or len(adder_bias.shape) == 0 scale_scalar = mul_scale.shape == (1,) or len(mul_scale.shape) == 0 if scale_scalar and bias_scalar and self._q_node.op_type == "BipolarQuant": @@ -355,7 +355,7 @@ def _calculate_thresholds(self): act_node = self._model.find_direct_predecessors(self._q_node) act_node = act_node[0] if act_node.op_type == "Relu": - # Calculate thersholds, see: https://github.com/Xilinx/brevitas/blob/ + # Calculate thresholds, see: https://github.com/Xilinx/brevitas/blob/ # a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/export/ # onnx/finn/handler/act.py#L21 num_distinct_values = 2**bit_width @@ -395,11 +395,32 @@ def _calculate_thresholds(self): else: thresholds[c][t] = step / selu_scale + # First try to consider the tensor layout of the output for determining + # the number of output channels + layout = self._model.get_tensor_layout(self._q_node.output[0]) + # If there is a layout annotation, use this to determine the index of + # the channel dimension + if layout is not None and "C" in layout: + # Lookup the index in list + cdim = layout.index("C") + # If no layout has been annotated or there is no channel dimension, fall + # back to the previous default assumption + else: + # Assume the channels to be in axis 1 + cdim = 1 + # Issue a warning to the user, so they are aware of this + warnings.warn( + f"No layout annotations for {self._q_node.output[0]}:" + f" Assuming channel dimension at index {cdim}" + ) + # ToDo: The index 1 needs to be changed to -1 for the channels last format - num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[1] - final_shape = (num_output_channels, num_thresholds) - if thresholds.shape != final_shape: - thresholds = np.broadcast_to(thresholds, final_shape) + num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[cdim] + + assert ( + thresholds.shape[0] == 1 or thresholds.shape[0] == num_output_channels + ), """Quant node cannot be converted to MultiThreshold because only + per tensor or per channel quantization supported.""" return thresholds @@ -417,12 +438,12 @@ def _remove_activation_node(self, multi_threshold_node): act_node = self._model.find_direct_predecessors(self._q_node) if act_node is None: raise RuntimeError( - "For handling of Relu activations a predecesor to " "the Quant node must exist." + "For handling of Relu activations a predecessor to " "the Quant node must exist." ) act_node = act_node[0] if act_node.op_type not in self.valid_predecessor_op_types(): raise RuntimeError( - "The predecesor of the Quant node must be Relu or Selu for handling " + "The predecessor of the Quant node must be Relu or Selu for handling " "of activations." ) @@ -509,7 +530,7 @@ def _calculate_thresholds(self): else: raise RuntimeError("Got an unexpected quantizer node type") - # Calculate thersholds, see: https://github.com/Xilinx/brevitas/ + # Calculate thresholds, see: https://github.com/Xilinx/brevitas/ # blob/a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/ # export/onnx/finn/handler/act.py#L76 if bit_width == 1.0: @@ -537,8 +558,28 @@ def _calculate_thresholds(self): for t in range(num_thresholds): thresholds[c][t] = min_threshold[c] + step[c] * t - # currently only per tensor or per channel quantization is supported - num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[1] + # First try to consider the tensor layout of the output for + # determining the number of output channels + layout = self._model.get_tensor_layout(self._q_node.output[0]) + # If there is a layout annotation, use this to determine the index + # of the channel dimension + if layout is not None and "C" in layout: + # Lookup the index in list + cdim = layout.index("C") + # If no layout has been annotated or there is no channel dimension, + # fall back to the previous default assumption + else: + # Assume the channels to be in axis 1 + cdim = 1 + # Issue a warning to the user, so they are aware of this + warnings.warn( + f"No layout annotations for {self._q_node.output[0]}:" + f" Assuming channel dimension at index {cdim}" + ) + + # ToDo: The index 1 needs to be changed to -1 for the channels last format + num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[cdim] + assert ( thresholds.shape[0] == 1 or thresholds.shape[0] == num_output_channels ), """Quant node cannot be converted to MultiThreshold because only diff --git a/src/finn/transformation/streamline/__init__.py b/src/finn/transformation/streamline/__init__.py index 2e68de698b..39ef87f81c 100644 --- a/src/finn/transformation/streamline/__init__.py +++ b/src/finn/transformation/streamline/__init__.py @@ -76,8 +76,8 @@ def apply(self, model): BatchNormToAffine(), ConvertSignToThres(), MoveMulPastMaxPool(), - MoveScalarLinearPastInvariants(), AbsorbSignBiasIntoMultiThreshold(), + MoveScalarLinearPastInvariants(), MoveAddPastMul(), MoveScalarAddPastMatMul(), MoveAddPastConv(), diff --git a/src/finn/transformation/streamline/absorb.py b/src/finn/transformation/streamline/absorb.py index e3e2468bba..33dfd61f75 100644 --- a/src/finn/transformation/streamline/absorb.py +++ b/src/finn/transformation/streamline/absorb.py @@ -29,6 +29,9 @@ import numpy as np import qonnx.core.data_layout as DataLayout import warnings + +# Protobuf onnx graph node type +from onnx import NodeProto # noqa from onnx import helper as oh from qonnx.core.datatype import DataType from qonnx.custom_op.registry import getCustomOp @@ -134,7 +137,7 @@ def apply(self, model): # remove the add node graph.node.remove(n) graph_modified = True - return (model, graph_modified) + return model, graph_modified class AbsorbMulIntoMultiThreshold(Transformation): @@ -215,7 +218,7 @@ def apply(self, model): class Absorb1BitMulIntoMatMul(Transformation): - """Absorb bipolar or binary multiplications into the preciding matrix + """Absorb bipolar or binary multiplications into the preceding matrix multiply.""" def apply(self, model): @@ -224,16 +227,22 @@ def apply(self, model): graph_modified = False for n in graph.node: node_ind += 1 - if n.op_type == "MatMul": + # TODO: Fork-nodes could be handled if the muls are the same in all + # branches, but this is not checked nor rewired at all right now. + if n.op_type == "MatMul" and not model.is_fork_node(n): matmul_weight_name = n.input[1] W = model.get_initializer(matmul_weight_name) Wdt = model.get_tensor_datatype(matmul_weight_name) - assert W is not None, "Initializer for matmul weights is not set." + # Skip matmuls with no initializers + if W is None: + continue consumer = model.find_consumer(n.output[0]) if consumer is not None and consumer.op_type == "Mul": mul_weight_name = consumer.input[1] A = model.get_initializer(mul_weight_name) - assert A is not None, "Initializer for mul weights is not set." + # Skip muls with no initializers + if A is None: + continue is_1bit = model.get_tensor_datatype(mul_weight_name).bitwidth() == 1 if is_1bit: Wnew = A * W @@ -252,7 +261,7 @@ def apply(self, model): class Absorb1BitMulIntoConv(Transformation): - """Absorb bipolar or binary multiplications into the preciding convolution.""" + """Absorb bipolar or binary multiplications into the preceding convolution.""" def apply(self, model): graph = model.graph @@ -260,16 +269,20 @@ def apply(self, model): graph_modified = False for n in graph.node: node_ind += 1 - if n.op_type == "Conv": + if n.op_type == "Conv" and not model.is_fork_node(n): conv_weight_name = n.input[1] W = model.get_initializer(conv_weight_name) Wdt = model.get_tensor_datatype(conv_weight_name) - assert W is not None, "Initializer for conv weights is not set." + # Skip convs with no initializers + if W is None: + continue consumer = model.find_consumer(n.output[0]) if consumer is not None and consumer.op_type == "Mul": mul_weight_name = consumer.input[1] A = model.get_initializer(mul_weight_name) - assert A is not None, "Initializer for mul weights is not set." + # Skip muls with no initializers + if A is None: + continue is_1bit = model.get_tensor_datatype(mul_weight_name).bitwidth() == 1 is_scalar = np.prod(A.shape) == 1 actual_ndims = len(tuple(filter(lambda x: x > 1, A.shape))) diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py index 9a7e9d0723..cc6634f480 100644 --- a/src/finn/transformation/streamline/reorder.py +++ b/src/finn/transformation/streamline/reorder.py @@ -105,36 +105,40 @@ class MoveScalarMulPastMatMul(Transformation): """Move scalar mul operations past matmul operations. We want to have muls next to each other such that they can be collapsed into a single mul.""" + # Applies the transform to a whole model graph def apply(self, model): + # Get the model graph out of the model wrapper object graph = model.graph - node_ind = 0 + # Keep track of whether the graph has been modified graph_modified = False - for n in graph.node: - node_ind += 1 + # Iterate all nodes in the graph keeping track of the index + for node_ind, n in enumerate(graph.node): if n.op_type == "Mul" and not model.is_fork_node(n) and not model.is_join_node(n): consumer = model.find_consumer(n.output[0]) - if ( - consumer is not None - and consumer.op_type == "MatMul" - and not model.is_join_node(consumer) - ): + if consumer is not None and consumer.op_type == "MatMul": mul_weight_name = n.input[1] - matmul_weight_name = consumer.input[1] A = model.get_initializer(mul_weight_name) - W = model.get_initializer(matmul_weight_name) - if (A is None) or (W is None): - warnings.warn("MatMul or Mul params are not constant, skipping") - continue start_name = n.input[0] middle_name = n.output[0] end_name = consumer.output[0] mm_out_shape = model.get_tensor_shape(end_name) + # check which input mul node is connected to build the right node connectivity + if n.output[0] == consumer.input[0]: + new_matmul_inps = [start_name, consumer.input[1]] + elif n.output[0] == consumer.input[1]: + new_matmul_inps = [consumer.input[0], start_name] + else: + raise Exception( + """Invalid pattern detected, + output of matmul is not connected to any of the consumers inputs.""" + ) + if all(x == 1 for x in A.shape): # if the mul is scalar, we can simply swap the order of ops # make and insert new nodes new_matmul = oh.make_node( "MatMul", - [start_name, matmul_weight_name], + new_matmul_inps, [middle_name], name=consumer.name, ) @@ -152,7 +156,7 @@ def apply(self, model): graph.node.remove(consumer) graph_modified = True model = model.transform(InferShapes()) - return (model, graph_modified) + return model, graph_modified class MoveScalarAddPastMatMul(Transformation): @@ -606,6 +610,17 @@ class MoveScalarLinearPastInvariants(Transformation): GlobalAveragePool """ + # Op-types of currently supported invariants + SUPPORTED_INVARIANTS = { + "GlobalAveragePool", + "Reshape", + "Transpose", + "Flatten", + "Slice", + "Squeeze", + "Unsqueeze", + } + def apply(self, model): graph = model.graph node_ind = 0 @@ -618,13 +633,7 @@ def apply(self, model): # Extract mode and scales and input shape mode = get_by_name(n.attribute, "mode").s.decode("ascii") is_nearest_neighbor_resample = mode == "nearest" - if ( - n.op_type == "GlobalAveragePool" - or n.op_type == "Reshape" - or n.op_type == "Transpose" - or n.op_type == "Flatten" - or is_nearest_neighbor_resample - ): + if n.op_type in self.SUPPORTED_INVARIANTS or is_nearest_neighbor_resample: in0 = n.input[0] if in0 is None: continue @@ -634,6 +643,16 @@ def apply(self, model): continue if prod0.op_type in ["Mul", "Add", "Div"]: + # Cannot handle fork-nodes, try MoveLinearPastFork first + if model.is_fork_node(prod0): + warnings.warn( + f"{self.__class__.__name__}:" + f" Skipping near match: {prod0.name} is a fork-node," + f" try MoveLinearPastFork first" + ) + # Skip transforming this node as moving this would lead + # to messed up or detached graph + continue # check if second input of producer is an initializer init0 = model.get_initializer(prod0.input[1]) # if either initializer is None, skip diff --git a/tests/transformation/streamline/test_move_scalar_past_matmul.py b/tests/transformation/streamline/test_move_scalar_past_matmul.py index e4f4357fff..515e9b9462 100644 --- a/tests/transformation/streamline/test_move_scalar_past_matmul.py +++ b/tests/transformation/streamline/test_move_scalar_past_matmul.py @@ -72,6 +72,43 @@ def test_move_scalar_mul_past_matmul(): assert new_model.graph.node[0].output[0] == new_model.graph.node[1].input[0] +@pytest.mark.streamline +def test_move_scalar_mul_past_join_matmul(): + top_in1 = oh.make_tensor_value_info("top_in1", TensorProto.FLOAT, [1, 2]) + top_in2 = oh.make_tensor_value_info("top_in2", TensorProto.FLOAT, [2, 1]) + mul1_param = oh.make_tensor_value_info("mul1_param", TensorProto.FLOAT, [1, 1]) + mul2_param = oh.make_tensor_value_info("mul2_param", TensorProto.FLOAT, [1, 1]) + top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, [1, 1]) + modelproto = qonnx_make_model( + oh.make_graph( + name="test", + inputs=[top_in1, top_in2], + outputs=[top_out], + value_info=[mul1_param, mul2_param], + nodes=[ + oh.make_node("Mul", ["top_in1", "mul1_param"], ["middle1"]), + oh.make_node("Mul", ["top_in2", "mul2_param"], ["middle2"]), + oh.make_node("MatMul", ["middle1", "middle2"], ["top_out"]), + ], + ) + ) + model = ModelWrapper(modelproto) + model = model.transform(InferShapes()) + model.set_initializer("mul1_param", np.asarray([[3]], dtype=np.float32)) + model.set_initializer("mul2_param", np.asarray([[3]], dtype=np.float32)) + new_model = model.transform(MoveScalarMulPastMatMul()) + inp_dict = { + "top_in1": np.asarray([[-1.0, 1.0]], dtype=np.float32), + "top_in2": np.asarray([[1.0], [-1.0]], dtype=np.float32), + } + assert ox.compare_execution(model, new_model, inp_dict) + assert new_model.graph.node[0].op_type == "MatMul" + assert new_model.graph.node[1].op_type == "Mul" + assert new_model.graph.node[2].op_type == "Mul" + assert new_model.graph.node[0].output[0] == new_model.graph.node[1].input[0] + assert new_model.graph.node[1].output[0] == new_model.graph.node[2].input[0] + + @pytest.mark.streamline def test_move_scalar_add_past_matmul(): top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, [1, 2])