eki-project · bwintermann · Sep 1, 2025 · Sep 3, 2025 · Sep 3, 2025 · Sep 4, 2025
diff --git a/.gitignore b/.gitignore
@@ -54,6 +54,11 @@ tags
 poetry.lock
 *.code-workspace
 .env
+settings.yaml
+settings.yml
+deps/
+FINN_TMP
+FINN_IP_CACHE
 
 # Package files
 *.egg

diff --git a/README.md b/README.md
@@ -34,7 +34,7 @@ FINN+ incorporates all upstream FINN development while adding significant enhanc
 ### Developer Experience
 
 - **Better Diagnostics** - Improved logging and error handling throughout the framework
-- **Type Safety** - Comprehensive type hinting and checking for better code quality
+- **IP Caching** - IP Caching between builds for faster design iteration
 - **YAML Configuration** - Alternative YAML-based build configuration system
 - **Simplified Setup** - Containerless installation and setup process
 

diff --git a/src/finn/builder/build_dataflow.py b/src/finn/builder/build_dataflow.py
@@ -54,6 +54,7 @@
 
 from finn.builder.build_dataflow_config import DataflowBuildConfig, default_build_dataflow_steps
 from finn.builder.build_dataflow_steps import build_dataflow_step_lookup
+from finn.transformation.fpgadataflow.ip_cache import CACHE_IP_DEFINITIONS
 from finn.util.exception import (
     FINNConfigurationError,
     FINNDataflowError,
@@ -350,6 +351,18 @@ def build_dataflow_cfg(model_filename, cfg: DataflowBuildConfig):
     print(f"Final outputs will be generated in {cfg.output_dir}")
     print(f"Build log is at {logfile}")
 
+    # Printing all cached IPs
+    if cfg.use_ip_caching:
+        log.info("IP Caching enabled.")
+        if cfg.verbose:
+            log.info("Caching enabled for operators: ")
+            for k, v in CACHE_IP_DEFINITIONS.items():
+                log.info(f"Operator: {k}:")
+                if "use" in v.keys():
+                    log.info("\tuse: " + ", ".join(v["use"]))
+                if "ignore" in v.keys():
+                    log.info("\nignore: " + ", ".join(v["ignore"]))
+
     # Setup done, start build flow
     try:
         # If start_step is specified, override the input model

diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py
@@ -156,8 +156,7 @@ class VerificationStepType(str, Enum):
     "step_minimize_bit_width",
     "step_generate_estimate_reports",
     "step_set_fifo_depths",
-    "step_hw_codegen",
-    "step_hw_ipgen",
+    "step_ip_generation",
     "step_create_stitched_ip",
     "step_measure_rtlsim_performance",
     "step_out_of_context_synthesis",
@@ -334,6 +333,25 @@ class DataflowBuildConfig(DataClassJSONMixin, DataClassYAMLMixin):
     #: If not specified it will default to synth_clk_period_ns
     hls_clk_period_ns: Optional[float] = None
 
+    #: Use an IP Cache to re-use code-gen (PrepareIP) and HLS (HLSSynthIP)
+    #: artifacts from previous runs to speed up the build process.
+    use_ip_caching: bool = True
+
+    #: (Only relevant if use_ip_caching is enabled)
+    #: Hash function to be used when caching the IP cores.
+    ip_cache_hashfunction: str = "sha256"
+
+    #: (Only relevant if use_ip_caching is enabled)
+    #: Whether the value of _resolve_hls_clk_period() is used as part of
+    #: the cached key. Can be turned off for more cache hits, but
+    #: then delivers an IP with an outdated constraints file. This
+    #: might affect OOC Synthesis and other parts of the design, use
+    #: at your own risk.
+    cache_hls_clk_period: bool = True
+
+    #: The same as `cache_hls_clk_period`, but for the passed FPGA part.
+    cache_fpgapart: bool = True
+
     #: (Optional, only relevant when shell_flow_type = VITIS_ALVEO)
     #: Which Vitis platform will be used, e.g. "xilinx_u250_xdma_201830_2".
     #: If not specified but "board" is specified, will use the FINN

diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py
@@ -85,6 +85,7 @@
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker
+from finn.transformation.fpgadataflow.ip_cache import CachedIPGen
 from finn.transformation.fpgadataflow.make_driver import (
     MakeCPPDriver,
     MakePYNQDriverInstrumentation,
@@ -117,7 +118,7 @@
 from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
 from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
 from finn.util.basic import get_liveness_threshold_cycles, get_rtlsim_trace_depth
-from finn.util.exception import FINNUserError
+from finn.util.exception import FINNConfigurationError, FINNUserError
 from finn.util.logging import log
 from finn.util.test import execute_parent
 
@@ -521,6 +522,49 @@ def step_minimize_bit_width(model: ModelWrapper, cfg: DataflowBuildConfig):
     return model
 
 
+def _make_hls_estimate_report(model: ModelWrapper, cfg: DataflowBuildConfig) -> None:
+    report_dir = cfg.output_dir + "/report"
+    os.makedirs(report_dir, exist_ok=True)
+    estimate_layer_resources_hls = model.analysis(hls_synth_res_estimation)
+    estimate_layer_resources_hls["total"] = aggregate_dict_keys(estimate_layer_resources_hls)
+    with open(report_dir + "/estimate_layer_resources_hls.json", "w") as f:
+        json.dump(estimate_layer_resources_hls, f, indent=2)
+
+
+def step_ip_generation(model: ModelWrapper, cfg: DataflowBuildConfig) -> ModelWrapper:
+    """Unified step, that does what step_hw_codegen and step_hw_ipgen did before. (With cache!)."""
+    if cfg.use_ip_caching:
+        clk = cfg._resolve_hls_clk_period()
+        if clk is None:
+            # TODO: Change into a logging error instead of an exception?
+            raise FINNConfigurationError(
+                "Please specify synth_clk_period_ns in your build "
+                "config (and optionally hls_clk_period_ns) before "
+                "generating IPs!"
+            )
+        model = model.transform(
+            CachedIPGen(
+                cfg.ip_cache_hashfunction,
+                include_prepare_ip=True,
+                cache_clock=cfg.cache_hls_clk_period,
+                fpgapart=cfg._resolve_fpga_part(),
+                clk=clk,
+                cache_fpgapart=cfg.cache_fpgapart,
+            )
+        )
+    else:
+        model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()))
+        model = model.transform(HLSSynthIP())
+    model = model.transform(ReplaceVerilogRelPaths())
+    _make_hls_estimate_report(model, cfg)
+
+    if VerificationStepType.NODE_BY_NODE_RTLSIM in cfg._resolve_verification_steps():
+        model = model.transform(PrepareRTLSim())
+        model = model.transform(SetExecMode("rtlsim"))
+        verify_step(model, cfg, "node_by_node_rtlsim", need_parent=True)
+    return model
+
+
 def step_hw_codegen(model: ModelWrapper, cfg: DataflowBuildConfig):
     """Generate Vitis HLS code to prepare HLSBackend nodes for IP generation.
     And fills RTL templates for RTLBackend nodes."""
@@ -533,15 +577,36 @@ def step_hw_ipgen(model: ModelWrapper, cfg: DataflowBuildConfig):
     """Run Vitis HLS synthesis on generated code for HLSBackend nodes,
     in order to generate IP blocks. For RTL nodes this step does not do anything."""
 
-    model = model.transform(HLSSynthIP())
-    model = model.transform(ReplaceVerilogRelPaths())
-    report_dir = cfg.output_dir + "/report"
-    os.makedirs(report_dir, exist_ok=True)
-    estimate_layer_resources_hls = model.analysis(hls_synth_res_estimation)
-    estimate_layer_resources_hls["total"] = aggregate_dict_keys(estimate_layer_resources_hls)
-    with open(report_dir + "/estimate_layer_resources_hls.json", "w") as f:
-        json.dump(estimate_layer_resources_hls, f, indent=2)
+    if cfg.use_ip_caching:
+        log.info("Using IP cache to fetch generated IPs...")
+        clk = cfg._resolve_hls_clk_period()
+        if clk is None and cfg.cache_hls_clk_period:
+            log.critical(
+                "No HLS/general synthesis clock period was specified, but required for "
+                "caching (cfg.cache_hls_clk_period). Skipping caching for safety. "
+                "Executing just HLSSynthIP()..."
+            )
+            model = model.transform(HLSSynthIP())
+        else:
+            # If clk is None but we don't use it anways, give it some placeholder value
+            if clk is None:
+                clk = 0
+            model = model.transform(
+                CachedIPGen(
+                    cfg.ip_cache_hashfunction,
+                    cache_clock=cfg.cache_hls_clk_period,
+                    include_prepare_ip=False,
+                    fpgapart=cfg._resolve_fpga_part(),
+                    clk=clk,
+                    cache_fpgapart=cfg.cache_fpgapart,
+                )
+            )
+    else:
+        log.info("Generating all IPs from scratch...")
+        model = model.transform(HLSSynthIP())
 
+    model = model.transform(ReplaceVerilogRelPaths())
+    _make_hls_estimate_report(model, cfg)
     if VerificationStepType.NODE_BY_NODE_RTLSIM in cfg._resolve_verification_steps():
         model = model.transform(PrepareRTLSim())
         model = model.transform(SetExecMode("rtlsim"))
@@ -1059,6 +1124,7 @@ def step_deployment_package(model: ModelWrapper, cfg: DataflowBuildConfig):
     "step_apply_folding_config": step_apply_folding_config,
     "step_minimize_bit_width": step_minimize_bit_width,
     "step_generate_estimate_reports": step_generate_estimate_reports,
+    "step_ip_generation": step_ip_generation,
     "step_hw_codegen": step_hw_codegen,
     "step_hw_ipgen": step_hw_ipgen,
     "step_set_fifo_depths": step_set_fifo_depths,

diff --git a/src/finn/custom_op/fpgadataflow/hls/__init__.py b/src/finn/custom_op/fpgadataflow/hls/__init__.py
@@ -28,6 +28,7 @@
 
 from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend
 from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
+from finn.transformation.fpgadataflow.ip_cache import cache_ip
 
 # Dictionary of HLSBackend implementations
 custom_op = dict()
@@ -117,3 +118,8 @@ def register_custom_op(cls):
 custom_op["SplitMultiHeads_hls"] = SplitMultiHeads_hls
 custom_op["MergeMultiHeads_hls"] = MergeMultiHeads_hls
 custom_op["ReplicateStream_hls"] = ReplicateStream_hls
+
+# Apply cache to all ops
+for key in custom_op.keys():
+    if issubclass(custom_op[key], HWCustomOp):
+        custom_op[key] = cache_ip(attributes=None)(custom_op[key])
diff --git a/src/finn/custom_op/fpgadataflow/rtl/__init__.py b/src/finn/custom_op/fpgadataflow/rtl/__init__.py
@@ -26,6 +26,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
 from finn.custom_op.fpgadataflow.rtl.convolutioninputgenerator_rtl import (
     ConvolutionInputGenerator_rtl,
 )
@@ -37,6 +38,7 @@
 from finn.custom_op.fpgadataflow.rtl.streamingfifo_rtl import StreamingFIFO_rtl
 from finn.custom_op.fpgadataflow.rtl.thresholding_rtl import Thresholding_rtl
 from finn.custom_op.fpgadataflow.rtl.vectorvectoractivation_rtl import VVAU_rtl
+from finn.transformation.fpgadataflow.ip_cache import cache_ip
 
 custom_op = dict()
 
@@ -49,3 +51,8 @@
 custom_op["MVAU_rtl"] = MVAU_rtl
 custom_op["VVAU_rtl"] = VVAU_rtl
 custom_op["Thresholding_rtl"] = Thresholding_rtl
+
+# Apply cache to all ops
+for key in custom_op.keys():
+    if issubclass(custom_op[key], HWCustomOp):
+        custom_op[key] = cache_ip(attributes=None)(custom_op[key])
diff --git a/src/finn/interface/interface_utils.py b/src/finn/interface/interface_utils.py
@@ -133,6 +133,27 @@ def resolve_deps_path(deps: Path | None, settings: dict) -> Path | None:
     return None
 
 
+def resolve_cache_path(cache: Path | None, settings: dict) -> Path:
+    """Resolve the path to the IP cache. Always returns a valid Path.
+
+    Resolution order is:
+    Command Line Argument -> Environment var -> Settings -> Default (finn-plus/FINN_IP_CACHE)
+    """
+    if cache is not None:
+        return cache
+    if "FINN_IP_CACHE" in os.environ.keys():
+        p = Path(os.environ["FINN_IP_CACHE"])
+        if p.is_absolute():
+            return p
+        return Path(__file__).parent.parent.parent.parent / p
+    if "FINN_IP_CACHE" in settings.keys():
+        p = Path(settings["FINN_IP_CACHE"])
+        if p.is_absolute():
+            return p
+        return Path(__file__).parent.parent.parent.parent / p
+    return Path(__file__).parent.parent.parent.parent / "FINN_IP_CACHE"
+
+
 def resolve_num_workers(num: int, settings: dict) -> int:
     """Resolve the number of workers to use. Uses 75% of cores available as default fallback"""
     if num > -1: