From 94c9904bdcf3ee22d330da1e19f81e3463f2ed07 Mon Sep 17 00:00:00 2001 From: Brad Dwyer Date: Wed, 25 Mar 2026 19:56:08 -0700 Subject: [PATCH 1/9] Add Introspection of Locally Cached Models --- inference/core/cache/air_gapped.py | 296 +++++++++++ .../core/interfaces/http/builder/routes.py | 85 +++- .../interfaces/http/handlers/workflows.py | 114 ++++- inference/core/interfaces/http/http_api.py | 7 +- .../models/foundation/anthropic_claude/v1.py | 5 + .../models/foundation/anthropic_claude/v2.py | 5 + .../models/foundation/anthropic_claude/v3.py | 5 + .../core_steps/models/foundation/clip/v1.py | 15 + .../models/foundation/clip_comparison/v1.py | 15 + .../models/foundation/clip_comparison/v2.py | 15 + .../models/foundation/cog_vlm/v1.py | 4 + .../models/foundation/depth_estimation/v1.py | 9 + .../models/foundation/easy_ocr/v1.py | 13 + .../models/foundation/florence2/v1.py | 8 + .../models/foundation/florence2/v2.py | 8 + .../core_steps/models/foundation/gaze/v1.py | 5 + .../models/foundation/google_gemini/v1.py | 4 + .../models/foundation/google_gemini/v2.py | 4 + .../models/foundation/google_gemini/v3.py | 4 + .../models/foundation/google_vision_ocr/v1.py | 5 + .../models/foundation/llama_vision/v1.py | 4 + .../core_steps/models/foundation/lmm/v1.py | 5 + .../models/foundation/lmm_classifier/v1.py | 5 + .../models/foundation/moondream2/v1.py | 5 + .../core_steps/models/foundation/ocr/v1.py | 5 + .../core_steps/models/foundation/openai/v1.py | 5 + .../core_steps/models/foundation/openai/v2.py | 5 + .../core_steps/models/foundation/openai/v3.py | 5 + .../core_steps/models/foundation/openai/v4.py | 5 + .../foundation/perception_encoder/v1.py | 9 + .../core_steps/models/foundation/qwen/v1.py | 5 + .../models/foundation/qwen3_5vl/v1.py | 5 + .../models/foundation/qwen3vl/v1.py | 5 + .../models/foundation/seg_preview/v1.py | 5 + .../models/foundation/segment_anything2/v1.py | 10 + .../models/foundation/segment_anything3/v1.py | 5 + .../models/foundation/segment_anything3/v2.py | 5 + .../models/foundation/segment_anything3/v3.py | 5 + .../foundation/segment_anything3_3d/v1.py | 5 + .../models/foundation/smolvlm/v1.py | 5 + .../foundation/stability_ai/image_gen/v1.py | 5 + .../foundation/stability_ai/inpainting/v1.py | 5 + .../foundation/stability_ai/outpainting/v1.py | 5 + .../models/foundation/yolo_world/v1.py | 14 + .../roboflow/instance_segmentation/v1.py | 5 + .../roboflow/instance_segmentation/v2.py | 5 + .../models/roboflow/keypoint_detection/v1.py | 5 + .../models/roboflow/keypoint_detection/v2.py | 5 + .../roboflow/multi_class_classification/v1.py | 5 + .../roboflow/multi_class_classification/v2.py | 5 + .../roboflow/multi_label_classification/v1.py | 5 + .../roboflow/multi_label_classification/v2.py | 5 + .../models/roboflow/object_detection/v1.py | 5 + .../models/roboflow/object_detection/v2.py | 5 + .../roboflow/semantic_segmentation/v1.py | 4 + .../sinks/roboflow/custom_metadata/v1.py | 5 + .../sinks/roboflow/dataset_upload/v1.py | 5 + .../sinks/roboflow/dataset_upload/v2.py | 5 + .../v1.py | 4 + .../core_steps/sinks/slack/notification/v1.py | 4 + .../core_steps/sinks/twilio/sms/v1.py | 4 + .../core_steps/sinks/twilio/sms/v2.py | 5 + .../workflows/core_steps/sinks/webhook/v1.py | 5 + tests/unit/__init__.py | 0 tests/unit/core/__init__.py | 0 tests/unit/core/cache/__init__.py | 0 tests/unit/core/cache/test_air_gapped.py | 299 +++++++++++ tests/unit/core/interfaces/__init__.py | 0 tests/unit/core/interfaces/http/__init__.py | 0 .../http/test_blocks_describe_airgapped.py | 464 ++++++++++++++++++ .../core/workflows/test_air_gapped_blocks.py | 117 +++++ 71 files changed, 1730 insertions(+), 4 deletions(-) create mode 100644 inference/core/cache/air_gapped.py create mode 100644 tests/unit/__init__.py create mode 100644 tests/unit/core/__init__.py create mode 100644 tests/unit/core/cache/__init__.py create mode 100644 tests/unit/core/cache/test_air_gapped.py create mode 100644 tests/unit/core/interfaces/__init__.py create mode 100644 tests/unit/core/interfaces/http/__init__.py create mode 100644 tests/unit/core/interfaces/http/test_blocks_describe_airgapped.py create mode 100644 tests/unit/core/workflows/test_air_gapped_blocks.py diff --git a/inference/core/cache/air_gapped.py b/inference/core/cache/air_gapped.py new file mode 100644 index 0000000000..5128eca110 --- /dev/null +++ b/inference/core/cache/air_gapped.py @@ -0,0 +1,296 @@ +"""Utilities for discovering models and foundation-model weights in the local cache. + +Used by the air-gapped workflow builder to enumerate what is available for +offline workflow construction. +""" + +import hashlib +import json +import logging +import os +import re +from typing import Any, Dict, List, Optional + +from inference.core.cache.model_artifacts import are_all_files_cached, get_cache_dir +from inference.core.env import MODEL_CACHE_DIR +from inference.core.roboflow_api import MODEL_TYPE_KEY, PROJECT_TASK_TYPE_KEY + +logger = logging.getLogger(__name__) + +# Directories directly under MODEL_CACHE_DIR that are not model trees. +_SKIP_TOP_LEVEL = {"workflow", "_file_locks"} + + +def _slugify_model_id(model_id: str) -> str: + """Reproduce the slug used by inference-models for cache directory names. + + Must stay in sync with + ``inference_models.models.auto_loaders.core.slugify_model_id_to_os_safe_format``. + """ + slug = re.sub(r"[^A-Za-z0-9_-]+", "-", model_id) + slug = re.sub(r"[_-]{2,}", "-", slug) + if not slug: + slug = "special-char-only-model-id" + if len(slug) > 48: + slug = slug[:48] + digest = hashlib.blake2s(model_id.encode("utf-8"), digest_size=4).hexdigest() + return f"{slug}-{digest}" + + +def _is_model_cached(model_id: str) -> bool: + """Check if *model_id* has cached artifacts in either cache layout. + + Layout 1 (traditional): ``MODEL_CACHE_DIR/{model_id}/`` with files inside. + Layout 2 (inference-models): ``MODEL_CACHE_DIR/models-cache/{slug}/`` with + sub-directories containing model files. + """ + # Traditional layout + traditional_path = os.path.join(MODEL_CACHE_DIR, model_id) + if os.path.isdir(traditional_path) and os.listdir(traditional_path): + return True + + # inference-models layout + slug = _slugify_model_id(model_id) + models_cache_path = os.path.join(MODEL_CACHE_DIR, "models-cache", slug) + if os.path.isdir(models_cache_path) and os.listdir(models_cache_path): + return True + + return False + + +def _load_blocks() -> list: + """Load workflow blocks, isolating the heavy import for testability.""" + from inference.core.workflows.execution_engine.introspection.blocks_loader import ( + load_workflow_blocks, + ) + + return load_workflow_blocks() + + +def scan_cached_models(cache_dir: str) -> List[Dict[str, Any]]: + """Walk *cache_dir* looking for ``model_type.json`` marker files. + + Each marker is written by the model registry when a model is first + downloaded. The file contains at least ``project_task_type`` and + ``model_type`` keys. + + Returns a list of dicts with the following shape:: + + { + "model_id": "workspace/project/3", + "name": "workspace/project/3", + "task_type": "object-detection", + "model_architecture": "yolov8n", + "is_foundation": False, + } + """ + results: List[Dict[str, Any]] = [] + if not os.path.isdir(cache_dir): + return results + + for root, dirs, files in os.walk(cache_dir): + # Prune top-level directories we know are not model trees. + rel = os.path.relpath(root, cache_dir) + if rel == ".": + dirs[:] = [d for d in dirs if d not in _SKIP_TOP_LEVEL] + continue + + if "model_type.json" not in files: + continue + + model_type_path = os.path.join(root, "model_type.json") + try: + with open(model_type_path, "r") as fh: + metadata = json.load(fh) + except (json.JSONDecodeError, OSError) as exc: + logger.warning( + "Skipping unreadable model_type.json at %s: %s", + model_type_path, + exc, + ) + continue + + if not isinstance(metadata, dict): + continue + + # Support both traditional keys and inference-models metadata keys. + task_type = metadata.get(PROJECT_TASK_TYPE_KEY) or metadata.get( + "taskType", "" + ) + model_architecture = metadata.get(MODEL_TYPE_KEY) or metadata.get( + "modelArchitecture", "" + ) + + if not task_type: + continue + + model_id = os.path.relpath(root, cache_dir) + # Normalise path separators on Windows. + model_id = model_id.replace(os.sep, "/") + + results.append( + { + "model_id": model_id, + "name": model_id, + "task_type": task_type, + "model_architecture": model_architecture, + "is_foundation": False, + } + ) + + return results + + +def get_cached_foundation_models( + blocks: Optional[list] = None, +) -> List[Dict[str, Any]]: + """Return metadata for workflow blocks whose required weights are cached. + + Each block whose manifest class exposes a ``get_required_cache_artifacts`` + classmethod is inspected. If every artifact it declares is already present + in the local cache the block is included in the result list. + + Blocks that do not expose the classmethod are silently skipped. + + Args: + blocks: Optional pre-loaded list of block specifications. When + *None* (the default) the blocks are loaded via the engine's + block loader. + """ + results: List[Dict[str, Any]] = [] + if blocks is None: + try: + blocks = _load_blocks() + except Exception: + logger.debug( + "Could not load workflow blocks for foundation model scan", + exc_info=True, + ) + return results + + for block in blocks: + manifest_cls = block.manifest_class + if not hasattr(manifest_cls, "get_required_cache_artifacts"): + continue + + try: + artifacts_spec = manifest_cls.get_required_cache_artifacts() + except Exception: + logger.debug( + "Error calling get_required_cache_artifacts on %s", + block.identifier, + exc_info=True, + ) + continue + + # artifacts_spec can be: + # - list of model_id strings (new format): block is cached if ANY + # variant directory exists and contains files + # - dict with {"model_id": ..., "files": [...]} (legacy format) + if isinstance(artifacts_spec, list): + cached = any(_is_model_cached(mid) for mid in artifacts_spec) + if not cached: + continue + model_id = artifacts_spec[0] if artifacts_spec else "" + elif isinstance(artifacts_spec, dict): + model_id = artifacts_spec.get("model_id") + required_files = artifacts_spec.get("files", []) + if not model_id or not required_files: + continue + if not are_all_files_cached(files=required_files, model_id=model_id): + continue + else: + continue + + # Derive name from the block's manifest schema (json_schema_extra) + # rather than requiring it in the artifacts dict. + block_name = model_id + try: + schema = manifest_cls.model_json_schema() + block_name = schema.get("name", model_id) + except Exception: + pass + + # Use the block type identifier from the manifest's type field. + block_type_id = _get_block_type_identifier(block) + + results.append( + { + "model_id": model_id, + "name": block_name, + "task_type": "", + "model_architecture": "", + "is_foundation": True, + "block_type": block_type_id, + } + ) + + return results + + +def get_task_type_to_block_mapping( + blocks: Optional[list] = None, +) -> Dict[str, List[str]]: + """Build a reverse mapping from task_type to compatible block type identifiers. + + Uses ``get_compatible_task_types()`` classmethod on block manifests when + available. Blocks that do not expose the classmethod are skipped. + + Args: + blocks: Optional pre-loaded list of block specifications. When + *None* (the default) the blocks are loaded via the engine's + block loader. + """ + mapping: Dict[str, List[str]] = {} + if blocks is None: + try: + blocks = _load_blocks() + except Exception: + logger.debug( + "Could not load workflow blocks for task-type mapping", + exc_info=True, + ) + return mapping + + for block in blocks: + manifest_cls = block.manifest_class + if not hasattr(manifest_cls, "get_compatible_task_types"): + continue + + try: + task_types = manifest_cls.get_compatible_task_types() + except Exception: + logger.debug( + "Error calling get_compatible_task_types on %s", + block.identifier, + exc_info=True, + ) + continue + + if not isinstance(task_types, (list, tuple, set)): + continue + + # Derive the manifest type identifier + # (e.g. "roboflow_core/roboflow_object_detection_model@v2") + # from the block schema. + block_type_id = _get_block_type_identifier(block) + + for tt in task_types: + mapping.setdefault(tt, []).append(block_type_id) + + return mapping + + +def _get_block_type_identifier(block) -> str: + """Extract the canonical ``type`` identifier from a block specification.""" + try: + schema = block.manifest_class.model_json_schema() + type_prop = schema.get("properties", {}).get("type", {}) + # The type field is typically a const or enum with one value. + if "const" in type_prop: + return type_prop["const"] + if "enum" in type_prop and type_prop["enum"]: + return type_prop["enum"][0] + except Exception: + pass + return block.identifier diff --git a/inference/core/interfaces/http/builder/routes.py b/inference/core/interfaces/http/builder/routes.py index bf6bf9c90b..0d384974cf 100644 --- a/inference/core/interfaces/http/builder/routes.py +++ b/inference/core/interfaces/http/builder/routes.py @@ -2,14 +2,20 @@ import logging import os import re +import time from hashlib import sha256 from pathlib import Path -from typing import Any, Dict +from typing import Any, Dict, List, Optional, Tuple from fastapi import APIRouter, Body, Depends, Header, HTTPException, status from starlette.responses import HTMLResponse, JSONResponse, RedirectResponse, Response from starlette.status import HTTP_201_CREATED, HTTP_400_BAD_REQUEST, HTTP_404_NOT_FOUND +from inference.core.cache.air_gapped import ( + get_cached_foundation_models, + get_task_type_to_block_mapping, + scan_cached_models, +) from inference.core.env import BUILDER_ORIGIN, MODEL_CACHE_DIR from inference.core.interfaces.http.error_handlers import with_route_exceptions_async @@ -132,6 +138,83 @@ async def get_all_workflows(): ) +# ---------------------------------------------------------------- +# Models cache for /build/api/models (TTL-based) +# IMPORTANT: This route MUST be defined BEFORE /api/{workflow_id} +# otherwise FastAPI will match "models" as a workflow_id. +# ---------------------------------------------------------------- +_models_cache: Optional[Tuple[float, List[Dict[str, Any]]]] = None +_MODELS_CACHE_TTL = 30.0 # seconds + + +@router.get("/api/models", dependencies=[Depends(verify_csrf_token)]) +@with_route_exceptions_async +async def get_cached_models(): + """Return all models available in the local cache. + + Combines user-trained models discovered via ``model_type.json`` markers + with foundation-model blocks whose weights are fully cached. + Results are cached for 30 seconds to avoid repeated filesystem scans. + """ + global _models_cache # noqa: PLW0603 + + now = time.time() + if _models_cache is not None: + cached_at, cached_result = _models_cache + if now - cached_at < _MODELS_CACHE_TTL: + return Response( + content=json.dumps({"models": cached_result}), + media_type="application/json", + status_code=200, + ) + + # Build reverse alias map: canonical_id → [alias1, alias2, ...] + from inference.models.aliases import REGISTERED_ALIASES + + reverse_aliases: Dict[str, List[str]] = {} + for alias, canonical in REGISTERED_ALIASES.items(): + reverse_aliases.setdefault(canonical, []).append(alias) + + # Scan the filesystem for cached models. + user_models = scan_cached_models(MODEL_CACHE_DIR) + foundation_models = get_cached_foundation_models() + + # De-duplicate by model_id (foundation models take precedence). + seen: Dict[str, Dict[str, Any]] = {} + for m in user_models: + seen[m["model_id"]] = m + for m in foundation_models: + seen[m["model_id"]] = m + + # Enrich each model with compatible block types and aliases. + task_to_blocks = get_task_type_to_block_mapping() + models = [] + for m in seen.values(): + entry = dict(m) + entry.setdefault( + "compatible_block_types", + task_to_blocks.get(m.get("task_type", ""), []), + ) + # Add known aliases for this model + model_id = m.get("model_id", "") + aliases = reverse_aliases.get(model_id, []) + entry["aliases"] = aliases + # Use the shortest alias as display name if available + if aliases and (entry.get("name") == model_id or not entry.get("name")): + entry["name"] = min(aliases, key=len) + # Remove internal-only keys if present. + entry.pop("block_type", None) + models.append(entry) + + _models_cache = (now, models) + + return Response( + content=json.dumps({"models": models}), + media_type="application/json", + status_code=200, + ) + + @router.get("/api/{workflow_id}", dependencies=[Depends(verify_csrf_token)]) @with_route_exceptions_async async def get_workflow(workflow_id: str): diff --git a/inference/core/interfaces/http/handlers/workflows.py b/inference/core/interfaces/http/handlers/workflows.py index f0035eb53a..cbec35c73a 100644 --- a/inference/core/interfaces/http/handlers/workflows.py +++ b/inference/core/interfaces/http/handlers/workflows.py @@ -1,8 +1,13 @@ # TODO - for everyone: start migrating other handlers to bring relief to http_api.py -from typing import Dict, List, Optional, Set, Union +import copy +import logging +import os +from typing import Any, Dict, List, Optional, Set, Union from packaging.specifiers import SpecifierSet +from inference.core.cache.model_artifacts import are_all_files_cached +from inference.core.env import ENABLE_BUILDER, MODEL_CACHE_DIR from inference.core.entities.responses.workflows import ( DescribeInterfaceResponse, ExternalBlockPropertyPrimitiveDefinition, @@ -41,11 +46,14 @@ discover_kinds_typing_hints, ) +logger = logging.getLogger(__name__) + def handle_describe_workflows_blocks_request( dynamic_blocks_definitions: Optional[List[DynamicBlockDefinition]] = None, requested_execution_engine_version: Optional[str] = None, api_key: Optional[str] = None, + air_gapped: bool = False, ) -> WorkflowsBlocksDescription: if dynamic_blocks_definitions is None: dynamic_blocks_definitions = [] @@ -91,7 +99,7 @@ def handle_describe_workflows_blocks_request( operators_descriptions=uql_operators_descriptions, ) ) - return WorkflowsBlocksDescription( + result = WorkflowsBlocksDescription( blocks=blocks_description.blocks, declared_kinds=blocks_description.declared_kinds, kinds_connections=kinds_connections, @@ -99,6 +107,108 @@ def handle_describe_workflows_blocks_request( universal_query_language_description=universal_query_language_description, dynamic_block_definition_schema=DynamicBlockDefinition.schema(), ) + if air_gapped and ENABLE_BUILDER: + result = enrich_with_air_gapped_info(result) + return result + + +def enrich_with_air_gapped_info( + result: WorkflowsBlocksDescription, +) -> WorkflowsBlocksDescription: + """Post-process block descriptions to include air-gapped availability info. + + Deep-copies block schemas before mutating so the LRU-cached objects are + not modified. + """ + enriched_blocks = [] + for block in result.blocks: + manifest_cls = block.manifest_class + air_gapped_info = _get_air_gapped_info_for_block(manifest_cls) + # Deep-copy the schema dict to avoid mutating the cached object + enriched_schema = copy.deepcopy(block.block_schema) + if "json_schema_extra" not in enriched_schema: + enriched_schema["json_schema_extra"] = {} + enriched_schema["json_schema_extra"]["air_gapped_info"] = air_gapped_info + enriched_blocks.append( + block.model_copy(update={"block_schema": enriched_schema}) + ) + return result.model_copy(update={"blocks": enriched_blocks}) + + +def _get_air_gapped_info_for_block( + manifest_cls: Any, +) -> Dict[str, Any]: + """Determine air-gapped availability for a single block manifest class.""" + # 1. Explicit air-gapped availability declaration (e.g. cloud-only blocks) + if hasattr(manifest_cls, "get_air_gapped_availability"): + try: + info = manifest_cls.get_air_gapped_availability() + if isinstance(info, dict): + result: Dict[str, Any] = dict(info) + _add_compatible_task_types(manifest_cls, result) + return result + except Exception: + logger.debug( + "Error calling get_air_gapped_availability on %s", + getattr(manifest_cls, "__name__", str(manifest_cls)), + exc_info=True, + ) + + # 2. Foundation model blocks with cache artifact requirements + if hasattr(manifest_cls, "get_required_cache_artifacts"): + try: + artifacts_spec = manifest_cls.get_required_cache_artifacts() + if isinstance(artifacts_spec, list): + from inference.core.cache.air_gapped import _is_model_cached + + cached = any(_is_model_cached(mid) for mid in artifacts_spec) + model_id = artifacts_spec[0] if artifacts_spec else "" + result = { + "available": cached, + "reason": None if cached else "missing_cache_artifacts", + "model_id": model_id, + } + _add_compatible_task_types(manifest_cls, result) + return result + elif isinstance(artifacts_spec, dict): + model_id = artifacts_spec.get("model_id") + required_files = artifacts_spec.get("files", []) + if model_id and required_files: + cached = are_all_files_cached( + files=required_files, model_id=model_id + ) + result = { + "available": cached, + "reason": None if cached else "missing_cache_artifacts", + "model_id": model_id, + } + _add_compatible_task_types(manifest_cls, result) + return result + except Exception: + logger.debug( + "Error checking cache artifacts for %s", + getattr(manifest_cls, "__name__", str(manifest_cls)), + exc_info=True, + ) + + # 3. Default: block is available (pure logic blocks, local-network blocks, etc.) + result: Dict[str, Any] = {"available": True} + _add_compatible_task_types(manifest_cls, result) + return result + + +def _add_compatible_task_types( + manifest_cls: Any, + info: Dict[str, Any], +) -> None: + """If the manifest exposes compatible task types, add them to the info dict.""" + if hasattr(manifest_cls, "get_compatible_task_types"): + try: + task_types = manifest_cls.get_compatible_task_types() + if isinstance(task_types, (list, tuple, set)): + info["compatible_task_types"] = list(task_types) + except Exception: + pass def handle_describe_workflows_interface( diff --git a/inference/core/interfaces/http/http_api.py b/inference/core/interfaces/http/http_api.py index 48693a76a1..e9a9089092 100644 --- a/inference/core/interfaces/http/http_api.py +++ b/inference/core/interfaces/http/http_api.py @@ -1656,8 +1656,11 @@ def get_execution_engine_versions() -> ExecutionEngineVersions: @with_route_exceptions def describe_workflows_blocks( request: Request, + air_gapped: bool = Query(False), ) -> Union[WorkflowsBlocksDescription, Response]: - result = handle_describe_workflows_blocks_request() + result = handle_describe_workflows_blocks_request( + air_gapped=air_gapped, + ) return gzip_response_if_requested(request=request, response=result) @app.post( @@ -1674,6 +1677,7 @@ def describe_workflows_blocks( def describe_workflows_blocks( request: Request, request_payload: Optional[DescribeBlocksRequest] = None, + air_gapped: bool = Query(False), ) -> Union[WorkflowsBlocksDescription, Response]: # TODO: get rid of async: https://github.com/roboflow/inference/issues/569 dynamic_blocks_definitions = None @@ -1693,6 +1697,7 @@ def describe_workflows_blocks( dynamic_blocks_definitions=dynamic_blocks_definitions, requested_execution_engine_version=requested_execution_engine_version, api_key=api_key, + air_gapped=air_gapped, ) return gzip_response_if_requested(request=request, response=result) diff --git a/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v1.py b/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v1.py index 447b10af5c..f51dc62841 100644 --- a/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v1.py @@ -225,6 +225,11 @@ def validate(self) -> "BlockManifest": ) return self + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v2.py b/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v2.py index 51387e9702..86110458e8 100644 --- a/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v2.py +++ b/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v2.py @@ -304,6 +304,11 @@ def validate(self) -> "BlockManifest": ) return self + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v3.py b/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v3.py index e50625f8ed..b33c674291 100644 --- a/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v3.py +++ b/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v3.py @@ -321,6 +321,11 @@ def validate(self) -> "BlockManifest": ) return self + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/foundation/clip/v1.py b/inference/core/workflows/core_steps/models/foundation/clip/v1.py index 96cc9c4650..9fff7ac8b2 100644 --- a/inference/core/workflows/core_steps/models/foundation/clip/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/clip/v1.py @@ -93,6 +93,21 @@ def describe_outputs(cls) -> List[OutputDefinition]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return [ + "clip/RN101", + "clip/RN50", + "clip/RN50x16", + "clip/RN50x4", + "clip/RN50x64", + "clip/ViT-B-16", + "clip/ViT-B-32", + "clip/ViT-L-14-336px", + "clip/ViT-L-14", + ] + text_cache = LRUCache() diff --git a/inference/core/workflows/core_steps/models/foundation/clip_comparison/v1.py b/inference/core/workflows/core_steps/models/foundation/clip_comparison/v1.py index fe7631e2de..9669ec2df5 100644 --- a/inference/core/workflows/core_steps/models/foundation/clip_comparison/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/clip_comparison/v1.py @@ -99,6 +99,21 @@ def describe_outputs(cls) -> List[OutputDefinition]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return [ + "clip/RN101", + "clip/RN50", + "clip/RN50x16", + "clip/RN50x4", + "clip/RN50x64", + "clip/ViT-B-16", + "clip/ViT-B-32", + "clip/ViT-L-14-336px", + "clip/ViT-L-14", + ] + class ClipComparisonBlockV1(WorkflowBlock): diff --git a/inference/core/workflows/core_steps/models/foundation/clip_comparison/v2.py b/inference/core/workflows/core_steps/models/foundation/clip_comparison/v2.py index ae66fe27db..d61b7da23c 100644 --- a/inference/core/workflows/core_steps/models/foundation/clip_comparison/v2.py +++ b/inference/core/workflows/core_steps/models/foundation/clip_comparison/v2.py @@ -123,6 +123,21 @@ def describe_outputs(cls) -> List[OutputDefinition]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return [ + "clip/RN101", + "clip/RN50", + "clip/RN50x16", + "clip/RN50x4", + "clip/RN50x64", + "clip/ViT-B-16", + "clip/ViT-B-32", + "clip/ViT-L-14-336px", + "clip/ViT-L-14", + ] + class ClipComparisonBlockV2(WorkflowBlock): diff --git a/inference/core/workflows/core_steps/models/foundation/cog_vlm/v1.py b/inference/core/workflows/core_steps/models/foundation/cog_vlm/v1.py index aa6411bdb8..00212d55b7 100644 --- a/inference/core/workflows/core_steps/models/foundation/cog_vlm/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/cog_vlm/v1.py @@ -92,6 +92,10 @@ class BlockManifest(WorkflowBlockManifest): ], ) + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/foundation/depth_estimation/v1.py b/inference/core/workflows/core_steps/models/foundation/depth_estimation/v1.py index ae095e16c4..e61177a624 100644 --- a/inference/core/workflows/core_steps/models/foundation/depth_estimation/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/depth_estimation/v1.py @@ -114,6 +114,15 @@ def get_parameters_accepting_batches(cls) -> List[str]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return [ + "depth-anything-v2/small", + "depth-anything-v3/small", + "depth-anything-v3/base", + ] + class DepthEstimationBlockV1(WorkflowBlock): def __init__( diff --git a/inference/core/workflows/core_steps/models/foundation/easy_ocr/v1.py b/inference/core/workflows/core_steps/models/foundation/easy_ocr/v1.py index 9d605b4503..8a0678d0ce 100644 --- a/inference/core/workflows/core_steps/models/foundation/easy_ocr/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/easy_ocr/v1.py @@ -144,6 +144,19 @@ def describe_outputs(cls) -> List[OutputDefinition]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return [ + "easy_ocr/english_g2", + "easy_ocr/japanese_g2", + "easy_ocr/kannada_g2", + "easy_ocr/korean_g2", + "easy_ocr/latin_g2", + "easy_ocr/telugu_g2", + "easy_ocr/zh_sim_g2", + ] + class EasyOCRBlockV1(WorkflowBlock): def __init__( diff --git a/inference/core/workflows/core_steps/models/foundation/florence2/v1.py b/inference/core/workflows/core_steps/models/foundation/florence2/v1.py index 073a93400c..e0f5aec6c0 100644 --- a/inference/core/workflows/core_steps/models/foundation/florence2/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/florence2/v1.py @@ -289,6 +289,14 @@ def get_execution_engine_compatibility(cls) -> Optional[str]: class BlockManifest(BaseManifest): + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return [ + "florence-pretrains/3", + "florence-pretrains/4", + ] + model_config = ConfigDict( json_schema_extra={ "name": "Florence-2 Model", diff --git a/inference/core/workflows/core_steps/models/foundation/florence2/v2.py b/inference/core/workflows/core_steps/models/foundation/florence2/v2.py index 2da55cc116..29f0c3a74b 100644 --- a/inference/core/workflows/core_steps/models/foundation/florence2/v2.py +++ b/inference/core/workflows/core_steps/models/foundation/florence2/v2.py @@ -22,6 +22,14 @@ class V2BlockManifest(BaseManifest): + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return [ + "florence-pretrains/3", + "florence-pretrains/4", + ] + type: Literal["roboflow_core/florence_2@v2"] model_id: Union[WorkflowParameterSelector(kind=[ROBOFLOW_MODEL_ID_KIND]), str] = ( Field( diff --git a/inference/core/workflows/core_steps/models/foundation/gaze/v1.py b/inference/core/workflows/core_steps/models/foundation/gaze/v1.py index 5cd9a88fcf..11f1f7431e 100644 --- a/inference/core/workflows/core_steps/models/foundation/gaze/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/gaze/v1.py @@ -182,6 +182,11 @@ def describe_outputs(cls) -> List[OutputDefinition]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return ["gaze/L2CS"] + class GazeBlockV1(WorkflowBlock): def __init__( diff --git a/inference/core/workflows/core_steps/models/foundation/google_gemini/v1.py b/inference/core/workflows/core_steps/models/foundation/google_gemini/v1.py index ece8a28a26..2ad15c76d5 100644 --- a/inference/core/workflows/core_steps/models/foundation/google_gemini/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/google_gemini/v1.py @@ -213,6 +213,10 @@ class BlockManifest(WorkflowBlockManifest): "Please restrict if you hit Google Gemini API limits.", ) + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @field_validator("model_version", mode="before") @classmethod def validate_model_version(cls, value): diff --git a/inference/core/workflows/core_steps/models/foundation/google_gemini/v2.py b/inference/core/workflows/core_steps/models/foundation/google_gemini/v2.py index 4e5245a3e0..f858ef8394 100644 --- a/inference/core/workflows/core_steps/models/foundation/google_gemini/v2.py +++ b/inference/core/workflows/core_steps/models/foundation/google_gemini/v2.py @@ -287,6 +287,10 @@ class BlockManifest(WorkflowBlockManifest): "Please restrict if you hit Google Gemini API limits.", ) + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @field_validator("model_version", mode="before") @classmethod def validate_model_version(cls, value): diff --git a/inference/core/workflows/core_steps/models/foundation/google_gemini/v3.py b/inference/core/workflows/core_steps/models/foundation/google_gemini/v3.py index 0b3ee3b21a..00fd474c53 100644 --- a/inference/core/workflows/core_steps/models/foundation/google_gemini/v3.py +++ b/inference/core/workflows/core_steps/models/foundation/google_gemini/v3.py @@ -333,6 +333,10 @@ class BlockManifest(WorkflowBlockManifest): "Please restrict if you hit Google Gemini API limits.", ) + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @field_validator("model_version", mode="before") @classmethod def validate_model_version(cls, value): diff --git a/inference/core/workflows/core_steps/models/foundation/google_vision_ocr/v1.py b/inference/core/workflows/core_steps/models/foundation/google_vision_ocr/v1.py index cb77aa79a9..4bb31bb72a 100644 --- a/inference/core/workflows/core_steps/models/foundation/google_vision_ocr/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/google_vision_ocr/v1.py @@ -98,6 +98,11 @@ class BlockManifest(WorkflowBlockManifest): examples=[["en", "fr"], ["de"]], ) + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def describe_outputs(cls) -> List[OutputDefinition]: return [ diff --git a/inference/core/workflows/core_steps/models/foundation/llama_vision/v1.py b/inference/core/workflows/core_steps/models/foundation/llama_vision/v1.py index 17beeca390..d40e91f09e 100644 --- a/inference/core/workflows/core_steps/models/foundation/llama_vision/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/llama_vision/v1.py @@ -262,6 +262,10 @@ def validate(self) -> "BlockManifest": ) return self + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @field_validator("temperature") @classmethod def validate_temperature(cls, value: Union[str, float]) -> Union[str, float]: diff --git a/inference/core/workflows/core_steps/models/foundation/lmm/v1.py b/inference/core/workflows/core_steps/models/foundation/lmm/v1.py index d066e30b3d..72202d61d3 100644 --- a/inference/core/workflows/core_steps/models/foundation/lmm/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/lmm/v1.py @@ -130,6 +130,11 @@ class BlockManifest(WorkflowBlockManifest): examples=[{"count": "number of cats in the picture"}, "$inputs.json_output"], ) + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/foundation/lmm_classifier/v1.py b/inference/core/workflows/core_steps/models/foundation/lmm_classifier/v1.py index bc7be55e96..232d7df4b1 100644 --- a/inference/core/workflows/core_steps/models/foundation/lmm_classifier/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/lmm_classifier/v1.py @@ -96,6 +96,11 @@ class BlockManifest(WorkflowBlockManifest): ) ) + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/foundation/moondream2/v1.py b/inference/core/workflows/core_steps/models/foundation/moondream2/v1.py index 71b368330d..58f0c41881 100644 --- a/inference/core/workflows/core_steps/models/foundation/moondream2/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/moondream2/v1.py @@ -106,6 +106,11 @@ def get_parameters_accepting_batches(cls) -> List[str]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return ["moondream2/moondream2_2b_jul24"] + class Moondream2BlockV1(WorkflowBlock): def __init__( diff --git a/inference/core/workflows/core_steps/models/foundation/ocr/v1.py b/inference/core/workflows/core_steps/models/foundation/ocr/v1.py index 9815c27df8..bda750d0b2 100644 --- a/inference/core/workflows/core_steps/models/foundation/ocr/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/ocr/v1.py @@ -106,6 +106,11 @@ def describe_outputs(cls) -> List[OutputDefinition]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return ["doctr/default"] + class OCRModelBlockV1(WorkflowBlock): # TODO: we need data model for OCR predictions diff --git a/inference/core/workflows/core_steps/models/foundation/openai/v1.py b/inference/core/workflows/core_steps/models/foundation/openai/v1.py index 2c43a34fcd..320d4ec7df 100644 --- a/inference/core/workflows/core_steps/models/foundation/openai/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/openai/v1.py @@ -121,6 +121,11 @@ class BlockManifest(WorkflowBlockManifest): examples=[450], ) + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/foundation/openai/v2.py b/inference/core/workflows/core_steps/models/foundation/openai/v2.py index d32953e912..c5c30f5973 100644 --- a/inference/core/workflows/core_steps/models/foundation/openai/v2.py +++ b/inference/core/workflows/core_steps/models/foundation/openai/v2.py @@ -222,6 +222,11 @@ def validate(self) -> "BlockManifest": ) return self + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/foundation/openai/v3.py b/inference/core/workflows/core_steps/models/foundation/openai/v3.py index c16cd9b139..a7d01bef43 100644 --- a/inference/core/workflows/core_steps/models/foundation/openai/v3.py +++ b/inference/core/workflows/core_steps/models/foundation/openai/v3.py @@ -232,6 +232,11 @@ def validate(self) -> "BlockManifest": ) return self + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/foundation/openai/v4.py b/inference/core/workflows/core_steps/models/foundation/openai/v4.py index 9ed040b7fb..c18ac07230 100644 --- a/inference/core/workflows/core_steps/models/foundation/openai/v4.py +++ b/inference/core/workflows/core_steps/models/foundation/openai/v4.py @@ -330,6 +330,11 @@ def validate(self) -> "BlockManifest": ) return self + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/foundation/perception_encoder/v1.py b/inference/core/workflows/core_steps/models/foundation/perception_encoder/v1.py index c9455f0a33..289fcb21c5 100644 --- a/inference/core/workflows/core_steps/models/foundation/perception_encoder/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/perception_encoder/v1.py @@ -85,6 +85,15 @@ def describe_outputs(cls) -> List[OutputDefinition]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return [ + "perception_encoder/PE-Core-B16-224", + "perception_encoder/PE-Core-L14-336", + "perception_encoder/PE-Core-G14-448", + ] + text_cache = LRUCache() diff --git a/inference/core/workflows/core_steps/models/foundation/qwen/v1.py b/inference/core/workflows/core_steps/models/foundation/qwen/v1.py index e183216960..02beba7155 100644 --- a/inference/core/workflows/core_steps/models/foundation/qwen/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/qwen/v1.py @@ -107,6 +107,11 @@ def get_parameters_accepting_batches(cls) -> List[str]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return ["qwen-pretrains/1"] + ########################################################################## # Qwen2.5-VL Workflow Block diff --git a/inference/core/workflows/core_steps/models/foundation/qwen3_5vl/v1.py b/inference/core/workflows/core_steps/models/foundation/qwen3_5vl/v1.py index 4b0aa5d4c1..b4a7f4e8dd 100644 --- a/inference/core/workflows/core_steps/models/foundation/qwen3_5vl/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/qwen3_5vl/v1.py @@ -130,6 +130,11 @@ def get_parameters_accepting_batches(cls) -> List[str]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return ["qwen3_5-0.8b", "qwen3_5-2b"] + ########################################################################## # Qwen3.5-VL Workflow Block diff --git a/inference/core/workflows/core_steps/models/foundation/qwen3vl/v1.py b/inference/core/workflows/core_steps/models/foundation/qwen3vl/v1.py index c4d210b628..bc6c3b0213 100644 --- a/inference/core/workflows/core_steps/models/foundation/qwen3vl/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/qwen3vl/v1.py @@ -104,6 +104,11 @@ def get_parameters_accepting_batches(cls) -> List[str]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return ["qwen-pretrains/2"] + ########################################################################## # Qwen3-VL Workflow Block diff --git a/inference/core/workflows/core_steps/models/foundation/seg_preview/v1.py b/inference/core/workflows/core_steps/models/foundation/seg_preview/v1.py index 1a4412fe9c..07eb0b1400 100644 --- a/inference/core/workflows/core_steps/models/foundation/seg_preview/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/seg_preview/v1.py @@ -105,6 +105,11 @@ def describe_outputs(cls) -> List[OutputDefinition]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_air_gapped_availability(cls) -> dict: + """This block requires internet access to the remote inference proxy.""" + return {"available": False, "reason": "requires_internet"} + class SegPreviewBlockV1(WorkflowBlock): diff --git a/inference/core/workflows/core_steps/models/foundation/segment_anything2/v1.py b/inference/core/workflows/core_steps/models/foundation/segment_anything2/v1.py index f4c6da4fc3..459671bbd8 100644 --- a/inference/core/workflows/core_steps/models/foundation/segment_anything2/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/segment_anything2/v1.py @@ -144,6 +144,16 @@ def describe_outputs(cls) -> List[OutputDefinition]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return [ + "sam2/hiera_large", + "sam2/hiera_small", + "sam2/hiera_tiny", + "sam2/hiera_b_plus", + ] + class SegmentAnything2BlockV1(WorkflowBlock): diff --git a/inference/core/workflows/core_steps/models/foundation/segment_anything3/v1.py b/inference/core/workflows/core_steps/models/foundation/segment_anything3/v1.py index 17f93e6143..848077b592 100644 --- a/inference/core/workflows/core_steps/models/foundation/segment_anything3/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/segment_anything3/v1.py @@ -137,6 +137,11 @@ def describe_outputs(cls) -> List[OutputDefinition]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return ["sam3/sam3_final"] + class SegmentAnything3BlockV1(WorkflowBlock): diff --git a/inference/core/workflows/core_steps/models/foundation/segment_anything3/v2.py b/inference/core/workflows/core_steps/models/foundation/segment_anything3/v2.py index 242fd2ef8c..8a50827cb9 100644 --- a/inference/core/workflows/core_steps/models/foundation/segment_anything3/v2.py +++ b/inference/core/workflows/core_steps/models/foundation/segment_anything3/v2.py @@ -188,6 +188,11 @@ def describe_outputs(cls) -> List[OutputDefinition]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return ["sam3/sam3_final"] + class SegmentAnything3BlockV2(WorkflowBlock): diff --git a/inference/core/workflows/core_steps/models/foundation/segment_anything3/v3.py b/inference/core/workflows/core_steps/models/foundation/segment_anything3/v3.py index 9563726ce8..fb10e12759 100644 --- a/inference/core/workflows/core_steps/models/foundation/segment_anything3/v3.py +++ b/inference/core/workflows/core_steps/models/foundation/segment_anything3/v3.py @@ -206,6 +206,11 @@ def describe_outputs(cls) -> List[OutputDefinition]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return ["sam3/sam3_final"] + class SegmentAnything3BlockV3(WorkflowBlock): diff --git a/inference/core/workflows/core_steps/models/foundation/segment_anything3_3d/v1.py b/inference/core/workflows/core_steps/models/foundation/segment_anything3_3d/v1.py index 93a4cc0690..a098cc4e92 100644 --- a/inference/core/workflows/core_steps/models/foundation/segment_anything3_3d/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/segment_anything3_3d/v1.py @@ -73,6 +73,11 @@ class BlockManifest(WorkflowBlockManifest): examples=["$steps.sam2.predictions", "$steps.detections.mask_polygon"], ) + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images", "mask_input"] diff --git a/inference/core/workflows/core_steps/models/foundation/smolvlm/v1.py b/inference/core/workflows/core_steps/models/foundation/smolvlm/v1.py index cbb1569b75..55e95a9d75 100644 --- a/inference/core/workflows/core_steps/models/foundation/smolvlm/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/smolvlm/v1.py @@ -93,6 +93,11 @@ def get_parameters_accepting_batches(cls) -> List[str]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return ["smolvlm2/smolvlm-2.2b-instruct"] + class SmolVLM2BlockV1(WorkflowBlock): def __init__( diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py index f496f2c41e..cf075cb7c8 100644 --- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py @@ -112,6 +112,11 @@ class BlockManifest(WorkflowBlockManifest): private=True, ) + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def describe_outputs(cls) -> List[OutputDefinition]: return [ diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/inpainting/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/inpainting/v1.py index 742fae5c18..ec41316b5a 100644 --- a/inference/core/workflows/core_steps/models/foundation/stability_ai/inpainting/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/inpainting/v1.py @@ -147,6 +147,11 @@ class BlockManifest(WorkflowBlockManifest): examples=[200], ) + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def describe_outputs(cls) -> List[OutputDefinition]: return [ diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/outpainting/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/outpainting/v1.py index 5358d1c019..9a952dbd71 100644 --- a/inference/core/workflows/core_steps/models/foundation/stability_ai/outpainting/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/outpainting/v1.py @@ -180,6 +180,11 @@ class BlockManifest(WorkflowBlockManifest): private=True, ) + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def describe_outputs(cls) -> List[OutputDefinition]: return [ diff --git a/inference/core/workflows/core_steps/models/foundation/yolo_world/v1.py b/inference/core/workflows/core_steps/models/foundation/yolo_world/v1.py index e16e470632..2cb3e4a599 100644 --- a/inference/core/workflows/core_steps/models/foundation/yolo_world/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/yolo_world/v1.py @@ -119,6 +119,20 @@ def describe_outputs(cls) -> List[OutputDefinition]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.3.0,<2.0.0" + @classmethod + def get_required_cache_artifacts(cls) -> list: + """Return list of model_id variants that can satisfy this block.""" + return [ + "yolo_world/v2-s", + "yolo_world/v2-m", + "yolo_world/v2-l", + "yolo_world/v2-x", + "yolo_world/s", + "yolo_world/m", + "yolo_world/l", + "yolo_world/x", + ] + class YoloWorldModelBlockV1(WorkflowBlock): diff --git a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v1.py b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v1.py index be0b3c69b6..79f9f625e8 100644 --- a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v1.py +++ b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v1.py @@ -154,6 +154,11 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) + + @classmethod + def get_compatible_task_types(cls) -> list: + return ["instance-segmentation"] + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v2.py b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v2.py index bac58d1886..97c6c93323 100644 --- a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v2.py +++ b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v2.py @@ -151,6 +151,11 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) + + @classmethod + def get_compatible_task_types(cls) -> list: + return ["instance-segmentation"] + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v1.py b/inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v1.py index ee3eb1f9b2..49f82b9c9a 100644 --- a/inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v1.py +++ b/inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v1.py @@ -146,6 +146,11 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) + + @classmethod + def get_compatible_task_types(cls) -> list: + return ["keypoint-detection"] + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v2.py b/inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v2.py index 237d08a7eb..c0d2f82a12 100644 --- a/inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v2.py +++ b/inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v2.py @@ -142,6 +142,11 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) + + @classmethod + def get_compatible_task_types(cls) -> list: + return ["keypoint-detection"] + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/roboflow/multi_class_classification/v1.py b/inference/core/workflows/core_steps/models/roboflow/multi_class_classification/v1.py index f462369e05..aa9521e5b4 100644 --- a/inference/core/workflows/core_steps/models/roboflow/multi_class_classification/v1.py +++ b/inference/core/workflows/core_steps/models/roboflow/multi_class_classification/v1.py @@ -95,6 +95,11 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) + + @classmethod + def get_compatible_task_types(cls) -> list: + return ["classification"] + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/roboflow/multi_class_classification/v2.py b/inference/core/workflows/core_steps/models/roboflow/multi_class_classification/v2.py index 0a9324c295..d272f41611 100644 --- a/inference/core/workflows/core_steps/models/roboflow/multi_class_classification/v2.py +++ b/inference/core/workflows/core_steps/models/roboflow/multi_class_classification/v2.py @@ -98,6 +98,11 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) + + @classmethod + def get_compatible_task_types(cls) -> list: + return ["classification"] + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/roboflow/multi_label_classification/v1.py b/inference/core/workflows/core_steps/models/roboflow/multi_label_classification/v1.py index 9525eef1d1..4d19aa5fe5 100644 --- a/inference/core/workflows/core_steps/models/roboflow/multi_label_classification/v1.py +++ b/inference/core/workflows/core_steps/models/roboflow/multi_label_classification/v1.py @@ -101,6 +101,11 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) + + @classmethod + def get_compatible_task_types(cls) -> list: + return ["multi-label-classification"] + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/roboflow/multi_label_classification/v2.py b/inference/core/workflows/core_steps/models/roboflow/multi_label_classification/v2.py index 0c78af89c9..8fd4893d5d 100644 --- a/inference/core/workflows/core_steps/models/roboflow/multi_label_classification/v2.py +++ b/inference/core/workflows/core_steps/models/roboflow/multi_label_classification/v2.py @@ -97,6 +97,11 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) + + @classmethod + def get_compatible_task_types(cls) -> list: + return ["multi-label-classification"] + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/roboflow/object_detection/v1.py b/inference/core/workflows/core_steps/models/roboflow/object_detection/v1.py index ebf88edb55..96f087a13f 100644 --- a/inference/core/workflows/core_steps/models/roboflow/object_detection/v1.py +++ b/inference/core/workflows/core_steps/models/roboflow/object_detection/v1.py @@ -136,6 +136,11 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) + + @classmethod + def get_compatible_task_types(cls) -> list: + return ["object-detection"] + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/roboflow/object_detection/v2.py b/inference/core/workflows/core_steps/models/roboflow/object_detection/v2.py index eaa50fbd9a..3cd0beec49 100644 --- a/inference/core/workflows/core_steps/models/roboflow/object_detection/v2.py +++ b/inference/core/workflows/core_steps/models/roboflow/object_detection/v2.py @@ -132,6 +132,11 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) + + @classmethod + def get_compatible_task_types(cls) -> list: + return ["object-detection"] + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/models/roboflow/semantic_segmentation/v1.py b/inference/core/workflows/core_steps/models/roboflow/semantic_segmentation/v1.py index 881caa8f66..a0e62e99e2 100644 --- a/inference/core/workflows/core_steps/models/roboflow/semantic_segmentation/v1.py +++ b/inference/core/workflows/core_steps/models/roboflow/semantic_segmentation/v1.py @@ -84,6 +84,10 @@ class BlockManifest(WorkflowBlockManifest): images: Selector(kind=[IMAGE_KIND]) = ImageInputField model_id: Union[Selector(kind=[ROBOFLOW_MODEL_ID_KIND]), str] = RoboflowModelField + @classmethod + def get_compatible_task_types(cls) -> list: + return ["semantic-segmentation"] + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images"] diff --git a/inference/core/workflows/core_steps/sinks/roboflow/custom_metadata/v1.py b/inference/core/workflows/core_steps/sinks/roboflow/custom_metadata/v1.py index 265be2e6b8..7b3760522e 100644 --- a/inference/core/workflows/core_steps/sinks/roboflow/custom_metadata/v1.py +++ b/inference/core/workflows/core_steps/sinks/roboflow/custom_metadata/v1.py @@ -160,6 +160,11 @@ class BlockManifest(WorkflowBlockManifest): examples=[True, False], ) + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def describe_outputs(cls) -> List[OutputDefinition]: return [ diff --git a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py index 498057a893..b09c22f28c 100644 --- a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py +++ b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py @@ -244,6 +244,11 @@ class BlockManifest(WorkflowBlockManifest): examples=["serial_12345", "camera1_frame_001", "$inputs.filename"], ) + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images", "predictions", "image_name"] diff --git a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v2.py b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v2.py index fc16a4ede3..99a497605a 100644 --- a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v2.py +++ b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v2.py @@ -233,6 +233,11 @@ class BlockManifest(WorkflowBlockManifest): examples=[{"camera_id": "cam_01", "location": "$inputs.location"}, {}], ) + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def get_parameters_accepting_batches(cls) -> List[str]: return ["images", "predictions", "image_name"] diff --git a/inference/core/workflows/core_steps/sinks/roboflow/model_monitoring_inference_aggregator/v1.py b/inference/core/workflows/core_steps/sinks/roboflow/model_monitoring_inference_aggregator/v1.py index de957f3217..63f2ec2ba7 100644 --- a/inference/core/workflows/core_steps/sinks/roboflow/model_monitoring_inference_aggregator/v1.py +++ b/inference/core/workflows/core_steps/sinks/roboflow/model_monitoring_inference_aggregator/v1.py @@ -189,6 +189,10 @@ class BlockManifest(WorkflowBlockManifest): examples=[True, False], ) + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @field_validator("frequency") @classmethod def ensure_frequency_is_correct(cls, value: Any) -> Any: diff --git a/inference/core/workflows/core_steps/sinks/slack/notification/v1.py b/inference/core/workflows/core_steps/sinks/slack/notification/v1.py index 80709ebe1f..9d00a8ef69 100644 --- a/inference/core/workflows/core_steps/sinks/slack/notification/v1.py +++ b/inference/core/workflows/core_steps/sinks/slack/notification/v1.py @@ -186,6 +186,10 @@ class BlockManifest(WorkflowBlockManifest): json_schema_extra={"hidden": True}, ) + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @field_validator("cooldown_seconds") @classmethod def ensure_cooldown_seconds_within_bounds(cls, value: Any) -> dict: diff --git a/inference/core/workflows/core_steps/sinks/twilio/sms/v1.py b/inference/core/workflows/core_steps/sinks/twilio/sms/v1.py index 04ba0fd435..9a45ec0661 100644 --- a/inference/core/workflows/core_steps/sinks/twilio/sms/v1.py +++ b/inference/core/workflows/core_steps/sinks/twilio/sms/v1.py @@ -211,6 +211,10 @@ class BlockManifest(WorkflowBlockManifest): }, ) + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @field_validator("cooldown_seconds") @classmethod def ensure_cooldown_seconds_within_bounds(cls, value: Any) -> dict: diff --git a/inference/core/workflows/core_steps/sinks/twilio/sms/v2.py b/inference/core/workflows/core_steps/sinks/twilio/sms/v2.py index 26c3539881..9d0c57fd3d 100644 --- a/inference/core/workflows/core_steps/sinks/twilio/sms/v2.py +++ b/inference/core/workflows/core_steps/sinks/twilio/sms/v2.py @@ -308,6 +308,11 @@ class BlockManifest(WorkflowBlockManifest): }, ) + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def describe_outputs(cls) -> List[OutputDefinition]: return [ diff --git a/inference/core/workflows/core_steps/sinks/webhook/v1.py b/inference/core/workflows/core_steps/sinks/webhook/v1.py index 19c2a047f5..0343f6f43a 100644 --- a/inference/core/workflows/core_steps/sinks/webhook/v1.py +++ b/inference/core/workflows/core_steps/sinks/webhook/v1.py @@ -340,6 +340,11 @@ class BlockManifest(WorkflowBlockManifest): examples=["$inputs.cooldown_seconds", 10], ) + + @classmethod + def get_air_gapped_availability(cls) -> dict: + return {"available": False, "reason": "requires_internet"} + @classmethod def describe_outputs(cls) -> List[OutputDefinition]: return [ diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/core/__init__.py b/tests/unit/core/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/core/cache/__init__.py b/tests/unit/core/cache/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/core/cache/test_air_gapped.py b/tests/unit/core/cache/test_air_gapped.py new file mode 100644 index 0000000000..fb583eb047 --- /dev/null +++ b/tests/unit/core/cache/test_air_gapped.py @@ -0,0 +1,299 @@ +"""Tests for inference.core.cache.air_gapped scanning utilities.""" + +import json +import os +from dataclasses import dataclass +from typing import Any, Dict, List +from unittest.mock import MagicMock, patch + +import pytest + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _write_model_type_json( + cache_dir: str, + model_id: str, + metadata: dict, +) -> None: + """Write a ``model_type.json`` marker inside *cache_dir/model_id*.""" + model_dir = os.path.join(cache_dir, model_id) + os.makedirs(model_dir, exist_ok=True) + with open(os.path.join(model_dir, "model_type.json"), "w") as fh: + json.dump(metadata, fh) + + +def _make_block_spec( + identifier: str, + manifest_cls: type, +): + """Build a minimal BlockSpecification-like object.""" + + @dataclass + class _FakeBlockSpec: + block_source: str + identifier: str + block_class: Any + manifest_class: Any + + return _FakeBlockSpec( + block_source="test", + identifier=identifier, + block_class=MagicMock(), + manifest_class=manifest_cls, + ) + + +# --------------------------------------------------------------------------- +# scan_cached_models +# --------------------------------------------------------------------------- + + +class TestScanTraditionalCache: + """model_type.json written with ``project_task_type`` / ``model_type`` keys.""" + + def test_scan_traditional_cache(self, tmp_path): + from inference.core.cache.air_gapped import scan_cached_models + + cache = str(tmp_path) + _write_model_type_json( + cache, + "my-workspace/my-project/3", + {"project_task_type": "object-detection", "model_type": "yolov8n"}, + ) + + result = scan_cached_models(cache) + + assert len(result) == 1 + m = result[0] + assert m["model_id"] == "my-workspace/my-project/3" + assert m["task_type"] == "object-detection" + assert m["model_architecture"] == "yolov8n" + assert m["is_foundation"] is False + + +class TestScanInferenceModelsCache: + """model_type.json written with ``taskType`` / ``modelArchitecture`` keys.""" + + def test_scan_inference_models_cache(self, tmp_path): + from inference.core.cache.air_gapped import scan_cached_models + + cache = str(tmp_path) + _write_model_type_json( + cache, + "coco/1", + {"taskType": "instance-segmentation", "modelArchitecture": "yolact"}, + ) + + result = scan_cached_models(cache) + + assert len(result) == 1 + m = result[0] + assert m["model_id"] == "coco/1" + assert m["task_type"] == "instance-segmentation" + assert m["model_architecture"] == "yolact" + assert m["is_foundation"] is False + + +class TestSkipNonModelDirs: + """Ensure ``workflow/`` and ``_file_locks/`` are not traversed.""" + + def test_skip_non_model_dirs(self, tmp_path): + from inference.core.cache.air_gapped import scan_cached_models + + cache = str(tmp_path) + + # These should be skipped. + _write_model_type_json( + cache, + "workflow/some-workflow", + {"project_task_type": "object-detection", "model_type": "yolov8n"}, + ) + _write_model_type_json( + cache, + "_file_locks/lock-dir", + {"project_task_type": "object-detection", "model_type": "yolov8n"}, + ) + + # This should be found. + _write_model_type_json( + cache, + "real-workspace/real-project/1", + {"project_task_type": "classification", "model_type": "vit"}, + ) + + result = scan_cached_models(cache) + + assert len(result) == 1 + assert result[0]["model_id"] == "real-workspace/real-project/1" + + +# --------------------------------------------------------------------------- +# Foundation model detection +# --------------------------------------------------------------------------- + + +class TestFoundationModelDetection: + """Block with ``get_required_cache_artifacts()`` whose files exist.""" + + def test_foundation_model_detected_when_cached(self, tmp_path): + from inference.core.cache.air_gapped import get_cached_foundation_models + + cache = str(tmp_path) + model_id = "foundation/clip" + + # Create the required files in the cache. + model_dir = os.path.join(cache, model_id) + os.makedirs(model_dir, exist_ok=True) + for fname in ["weights.pt", "config.json"]: + open(os.path.join(model_dir, fname), "w").close() + + class FakeManifest: + """Minimal manifest: only model_id and files are required. + + Name is derived from model_json_schema() metadata. + """ + + @classmethod + def get_required_cache_artifacts(cls) -> Dict[str, Any]: + return { + "model_id": "foundation/clip", + "files": ["weights.pt", "config.json"], + } + + @classmethod + def model_json_schema(cls) -> dict: + return {"name": "CLIP"} + + block = _make_block_spec("roboflow_core/clip@v1", FakeManifest) + + with patch( + "inference.core.cache.air_gapped.MODEL_CACHE_DIR", + cache, + ), patch( + "inference.core.cache.model_artifacts.MODEL_CACHE_DIR", + cache, + ): + result = get_cached_foundation_models(blocks=[block]) + + assert len(result) == 1 + m = result[0] + assert m["model_id"] == "foundation/clip" + assert m["is_foundation"] is True + assert m["name"] == "CLIP" + + +class TestFoundationModelMissing: + """Block with ``get_required_cache_artifacts()`` whose files do NOT exist.""" + + def test_foundation_model_not_detected_when_missing(self, tmp_path): + from inference.core.cache.air_gapped import get_cached_foundation_models + + cache = str(tmp_path) + + class FakeManifest: + @classmethod + def get_required_cache_artifacts(cls) -> Dict[str, Any]: + return { + "model_id": "foundation/sam", + "files": ["encoder.pt", "decoder.pt"], + } + + @classmethod + def model_json_schema(cls) -> dict: + return {"name": "SAM"} + + block = _make_block_spec("roboflow_core/sam@v1", FakeManifest) + + with patch( + "inference.core.cache.air_gapped.MODEL_CACHE_DIR", + cache, + ), patch( + "inference.core.cache.model_artifacts.MODEL_CACHE_DIR", + cache, + ): + result = get_cached_foundation_models(blocks=[block]) + + assert len(result) == 0 + + +class TestFoundationModelListFormat: + """Block with ``get_required_cache_artifacts()`` returning a list of model_ids.""" + + def test_detected_when_any_variant_cached(self, tmp_path): + from inference.core.cache.air_gapped import get_cached_foundation_models + + cache = str(tmp_path) + + # Create a cache directory for one of the variants with a file. + variant_dir = os.path.join(cache, "clip", "ViT-B-32") + os.makedirs(variant_dir, exist_ok=True) + open(os.path.join(variant_dir, "visual.onnx"), "w").close() + + class FakeManifest: + @classmethod + def get_required_cache_artifacts(cls) -> List[str]: + return ["clip/RN50", "clip/ViT-B-32", "clip/ViT-L-14"] + + @classmethod + def model_json_schema(cls) -> dict: + return {"name": "CLIP"} + + block = _make_block_spec("roboflow_core/clip@v1", FakeManifest) + + with patch("inference.core.cache.air_gapped.MODEL_CACHE_DIR", cache): + result = get_cached_foundation_models(blocks=[block]) + + assert len(result) == 1 + m = result[0] + assert m["is_foundation"] is True + assert m["name"] == "CLIP" + + def test_not_detected_when_no_variant_cached(self, tmp_path): + from inference.core.cache.air_gapped import get_cached_foundation_models + + cache = str(tmp_path) + + class FakeManifest: + @classmethod + def get_required_cache_artifacts(cls) -> List[str]: + return ["clip/RN50", "clip/ViT-B-32"] + + @classmethod + def model_json_schema(cls) -> dict: + return {"name": "CLIP"} + + block = _make_block_spec("roboflow_core/clip@v1", FakeManifest) + + with patch("inference.core.cache.air_gapped.MODEL_CACHE_DIR", cache): + result = get_cached_foundation_models(blocks=[block]) + + assert len(result) == 0 + + def test_not_detected_when_variant_dir_empty(self, tmp_path): + from inference.core.cache.air_gapped import get_cached_foundation_models + + cache = str(tmp_path) + + # Create a cache directory but with no files in it. + variant_dir = os.path.join(cache, "clip", "ViT-B-32") + os.makedirs(variant_dir, exist_ok=True) + + class FakeManifest: + @classmethod + def get_required_cache_artifacts(cls) -> List[str]: + return ["clip/ViT-B-32"] + + @classmethod + def model_json_schema(cls) -> dict: + return {"name": "CLIP"} + + block = _make_block_spec("roboflow_core/clip@v1", FakeManifest) + + with patch("inference.core.cache.air_gapped.MODEL_CACHE_DIR", cache): + result = get_cached_foundation_models(blocks=[block]) + + assert len(result) == 0 diff --git a/tests/unit/core/interfaces/__init__.py b/tests/unit/core/interfaces/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/core/interfaces/http/__init__.py b/tests/unit/core/interfaces/http/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/core/interfaces/http/test_blocks_describe_airgapped.py b/tests/unit/core/interfaces/http/test_blocks_describe_airgapped.py new file mode 100644 index 0000000000..16a6ec53c2 --- /dev/null +++ b/tests/unit/core/interfaces/http/test_blocks_describe_airgapped.py @@ -0,0 +1,464 @@ +"""Tests for air-gapped enrichment in /workflows/blocks/describe.""" + +import copy +import importlib +import sys +from typing import Any, Dict, List, Optional, Type +from unittest.mock import MagicMock, patch + +import pytest +from pydantic import BaseModel, Field + +from inference.core.workflows.execution_engine.entities.base import OutputDefinition +from inference.core.workflows.execution_engine.introspection.entities import ( + BlockDescription, +) +from inference.core.workflows.prototypes.block import WorkflowBlock + + +# --------------------------------------------------------------------------- +# Helpers: stub manifest classes +# --------------------------------------------------------------------------- + + +class _BaseManifest(BaseModel): + type: str = "stub" + + @classmethod + def get_execution_engine_compatibility(cls) -> Optional[str]: + return None + + @classmethod + def get_input_dimensionality_offsets(cls) -> Dict[str, int]: + return {} + + @classmethod + def get_dimensionality_reference_property(cls) -> Optional[str]: + return None + + @classmethod + def get_output_dimensionality_offset(cls) -> int: + return 0 + + @classmethod + def describe_outputs(cls) -> List[OutputDefinition]: + return [] + + +class PlainBlockManifest(_BaseManifest): + """A block with no air-gapped classmethods -- should default to available.""" + + type: str = "test/plain_block@v1" + + +class CloudOnlyManifest(_BaseManifest): + """Simulates an OpenAI-style block that requires internet.""" + + type: str = "test/cloud_only@v1" + + +# Attach the classmethod manually to avoid Pydantic treating it as a validator. +@classmethod # type: ignore[misc] +def _cloud_only_air_gapped(cls) -> Dict[str, Any]: + return {"available": False, "reason": "requires_internet"} + + +CloudOnlyManifest.get_air_gapped_availability = _cloud_only_air_gapped + + +class FoundationModelManifest(_BaseManifest): + """Simulates a foundation model block with cache artifacts.""" + + type: str = "test/foundation_model@v1" + + +@classmethod # type: ignore[misc] +def _foundation_cache_artifacts(cls) -> Dict[str, Any]: + return { + "model_id": "yolov8n-640", + "files": ["weights.pt", "config.json"], + } + + +@classmethod # type: ignore[misc] +def _foundation_task_types(cls) -> List[str]: + return ["object-detection", "instance-segmentation"] + + +FoundationModelManifest.get_required_cache_artifacts = _foundation_cache_artifacts +FoundationModelManifest.get_compatible_task_types = _foundation_task_types + + +class ListFormatFoundationManifest(_BaseManifest): + """Simulates a foundation model block with list-format cache artifacts.""" + + type: str = "test/list_foundation@v1" + + +@classmethod # type: ignore[misc] +def _list_cache_artifacts(cls) -> List[str]: + return ["clip/RN50", "clip/ViT-B-32", "clip/ViT-L-14"] + + +@classmethod # type: ignore[misc] +def _list_task_types(cls) -> List[str]: + return ["embedding"] + + +ListFormatFoundationManifest.get_required_cache_artifacts = _list_cache_artifacts +ListFormatFoundationManifest.get_compatible_task_types = _list_task_types + + +class LocalNetworkManifest(_BaseManifest): + """Simulates a local-network block (ONVIF, S3, etc.) -- should be available.""" + + type: str = "test/local_network@v1" + + +class _StubBlock(WorkflowBlock): + @classmethod + def get_manifest(cls): + return PlainBlockManifest + + async def run(self, *args, **kwargs): + pass + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +def _make_block_description( + manifest_cls: Type[BaseModel], + block_type: str = "test/stub@v1", +) -> BlockDescription: + schema = manifest_cls.model_json_schema() + return BlockDescription( + manifest_class=manifest_cls, + block_class=_StubBlock, + block_schema=schema, + outputs_manifest=[], + block_source="test", + fully_qualified_block_class_name=f"test.{manifest_cls.__name__}", + human_friendly_block_name=manifest_cls.__name__, + manifest_type_identifier=block_type, + manifest_type_identifier_aliases=[], + execution_engine_compatibility=None, + input_dimensionality_offsets={}, + dimensionality_reference_property=None, + output_dimensionality_offset=0, + ) + + +def _make_result(blocks: List[BlockDescription]) -> "WorkflowsBlocksDescription": + from inference.core.entities.responses.workflows import ( + UniversalQueryLanguageDescription, + WorkflowsBlocksDescription, + ) + + return WorkflowsBlocksDescription( + blocks=blocks, + declared_kinds=[], + kinds_connections={}, + primitives_connections=[], + universal_query_language_description=UniversalQueryLanguageDescription( + operations_description=[], + operators_descriptions=[], + ), + dynamic_block_definition_schema={}, + ) + + +def _import_enrichment(): + """Import only the enrichment helpers, avoiding the full block-loading chain.""" + # We need the module to be importable without triggering + # inference.core.workflows.execution_engine.core (which loads all blocks). + # The enrichment function itself does not depend on that import chain, + # so we can import it directly once the module is loaded. + import inference.core.interfaces.http.handlers.workflows as mod + + return mod.enrich_with_air_gapped_info, mod._get_air_gapped_info_for_block + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestGetAirGappedInfoForBlock: + """Unit tests for _get_air_gapped_info_for_block (no full import chain needed).""" + + def test_plain_block_defaults_to_available(self): + from inference.core.interfaces.http.handlers.workflows import ( + _get_air_gapped_info_for_block, + ) + + info = _get_air_gapped_info_for_block(PlainBlockManifest) + assert info["available"] is True + + def test_cloud_only_block_requires_internet(self): + from inference.core.interfaces.http.handlers.workflows import ( + _get_air_gapped_info_for_block, + ) + + info = _get_air_gapped_info_for_block(CloudOnlyManifest) + assert info["available"] is False + assert info["reason"] == "requires_internet" + + def test_local_network_block_is_available(self): + from inference.core.interfaces.http.handlers.workflows import ( + _get_air_gapped_info_for_block, + ) + + info = _get_air_gapped_info_for_block(LocalNetworkManifest) + assert info["available"] is True + + @patch( + "inference.core.interfaces.http.handlers.workflows.are_all_files_cached", + return_value=True, + ) + def test_foundation_model_cached(self, mock_cache): + from inference.core.interfaces.http.handlers.workflows import ( + _get_air_gapped_info_for_block, + ) + + info = _get_air_gapped_info_for_block(FoundationModelManifest) + assert info["available"] is True + assert info["model_id"] == "yolov8n-640" + assert "object-detection" in info["compatible_task_types"] + mock_cache.assert_called_once() + + @patch( + "inference.core.interfaces.http.handlers.workflows.are_all_files_cached", + return_value=False, + ) + def test_foundation_model_not_cached(self, mock_cache): + from inference.core.interfaces.http.handlers.workflows import ( + _get_air_gapped_info_for_block, + ) + + info = _get_air_gapped_info_for_block(FoundationModelManifest) + assert info["available"] is False + assert info["reason"] == "missing_cache_artifacts" + assert info["model_id"] == "yolov8n-640" + + +class TestEnrichWithAirGappedInfo: + def test_air_gapped_info_added_when_flag_set(self): + """When enrich_with_air_gapped_info is called, blocks get air_gapped_info.""" + from inference.core.interfaces.http.handlers.workflows import ( + enrich_with_air_gapped_info, + ) + + blocks = [_make_block_description(PlainBlockManifest, "test/plain_block@v1")] + result = _make_result(blocks) + + enriched = enrich_with_air_gapped_info(result) + + for block in enriched.blocks: + assert "json_schema_extra" in block.block_schema + info = block.block_schema["json_schema_extra"]["air_gapped_info"] + assert "available" in info + + def test_air_gapped_info_absent_when_flag_not_set(self): + """Without calling enrich, blocks should NOT have air_gapped_info.""" + blocks = [_make_block_description(PlainBlockManifest, "test/plain_block@v1")] + result = _make_result(blocks) + + for block in result.blocks: + extra = block.block_schema.get("json_schema_extra", {}) + assert "air_gapped_info" not in extra + + def test_cloud_only_blocks_marked_requires_internet(self): + """Blocks with get_air_gapped_availability returning requires_internet + should be marked as unavailable.""" + from inference.core.interfaces.http.handlers.workflows import ( + enrich_with_air_gapped_info, + ) + + blocks = [ + _make_block_description(CloudOnlyManifest, "test/cloud_only@v1"), + ] + result = _make_result(blocks) + + enriched = enrich_with_air_gapped_info(result) + + info = enriched.blocks[0].block_schema["json_schema_extra"]["air_gapped_info"] + assert info["available"] is False + assert info["reason"] == "requires_internet" + + def test_local_network_blocks_not_marked_unavailable(self): + """Local network blocks with no special classmethods + should default to available=True.""" + from inference.core.interfaces.http.handlers.workflows import ( + enrich_with_air_gapped_info, + ) + + blocks = [ + _make_block_description(LocalNetworkManifest, "test/local_network@v1"), + ] + result = _make_result(blocks) + + enriched = enrich_with_air_gapped_info(result) + + info = enriched.blocks[0].block_schema["json_schema_extra"]["air_gapped_info"] + assert info["available"] is True + + @patch( + "inference.core.interfaces.http.handlers.workflows.ENABLE_BUILDER", + False, + ) + def test_air_gapped_ignored_without_enable_builder(self): + """When ENABLE_BUILDER is False, air_gapped=True should be silently ignored + in handle_describe_workflows_blocks_request.""" + from inference.core.interfaces.http.handlers.workflows import ( + handle_describe_workflows_blocks_request, + ) + + with patch( + "inference.core.interfaces.http.handlers.workflows.describe_available_blocks" + ) as mock_describe, patch( + "inference.core.interfaces.http.handlers.workflows.discover_blocks_connections" + ) as mock_connections, patch( + "inference.core.interfaces.http.handlers.workflows.compile_dynamic_blocks", + return_value=[], + ), patch( + "inference.core.interfaces.http.handlers.workflows.prepare_operations_descriptions", + return_value=[], + ), patch( + "inference.core.interfaces.http.handlers.workflows.prepare_operators_descriptions", + return_value=[], + ): + blocks = [ + _make_block_description(PlainBlockManifest, "test/plain_block@v1") + ] + from inference.core.workflows.execution_engine.introspection.entities import ( + BlocksDescription, + ) + + mock_describe.return_value = BlocksDescription( + blocks=blocks, declared_kinds=[] + ) + mock_connections.return_value = MagicMock( + kinds_connections={}, primitives_connections=[] + ) + + result = handle_describe_workflows_blocks_request(air_gapped=True) + + for block in result.blocks: + extra = block.block_schema.get("json_schema_extra", {}) + assert "air_gapped_info" not in extra + + def test_enrichment_does_not_mutate_original(self): + """enrich_with_air_gapped_info must not mutate the original result object.""" + from inference.core.interfaces.http.handlers.workflows import ( + enrich_with_air_gapped_info, + ) + + blocks = [_make_block_description(PlainBlockManifest, "test/plain_block@v1")] + result = _make_result(blocks) + original_schema = copy.deepcopy(result.blocks[0].block_schema) + + enrich_with_air_gapped_info(result) + + # The original result's block_schema should be untouched + assert result.blocks[0].block_schema == original_schema + + @patch( + "inference.core.interfaces.http.handlers.workflows.are_all_files_cached", + return_value=True, + ) + def test_foundation_model_cached_shows_available(self, mock_cache): + """Foundation model blocks with all artifacts cached should show available=True.""" + from inference.core.interfaces.http.handlers.workflows import ( + enrich_with_air_gapped_info, + ) + + blocks = [ + _make_block_description( + FoundationModelManifest, "test/foundation_model@v1" + ), + ] + result = _make_result(blocks) + + enriched = enrich_with_air_gapped_info(result) + + info = enriched.blocks[0].block_schema["json_schema_extra"]["air_gapped_info"] + assert info["available"] is True + assert info["model_id"] == "yolov8n-640" + assert "compatible_task_types" in info + assert "object-detection" in info["compatible_task_types"] + + @patch( + "inference.core.interfaces.http.handlers.workflows.are_all_files_cached", + return_value=False, + ) + def test_foundation_model_not_cached_shows_unavailable(self, mock_cache): + """Foundation model blocks with missing artifacts should show available=False.""" + from inference.core.interfaces.http.handlers.workflows import ( + enrich_with_air_gapped_info, + ) + + blocks = [ + _make_block_description( + FoundationModelManifest, "test/foundation_model@v1" + ), + ] + result = _make_result(blocks) + + enriched = enrich_with_air_gapped_info(result) + + info = enriched.blocks[0].block_schema["json_schema_extra"]["air_gapped_info"] + assert info["available"] is False + assert info["reason"] == "missing_cache_artifacts" + + +class TestListFormatFoundationModel: + """Tests for blocks using the new list-format get_required_cache_artifacts.""" + + def test_list_format_cached_variant(self, tmp_path): + """List-format block with a cached variant directory should be available.""" + import os + + from inference.core.interfaces.http.handlers.workflows import ( + _get_air_gapped_info_for_block, + ) + + cache = str(tmp_path) + variant_dir = os.path.join(cache, "clip", "ViT-B-32") + os.makedirs(variant_dir, exist_ok=True) + open(os.path.join(variant_dir, "visual.onnx"), "w").close() + + with patch( + "inference.core.interfaces.http.handlers.workflows.MODEL_CACHE_DIR", + cache, + ), patch( + "inference.core.cache.air_gapped.MODEL_CACHE_DIR", + cache, + ): + info = _get_air_gapped_info_for_block(ListFormatFoundationManifest) + + assert info["available"] is True + assert info["model_id"] == "clip/RN50" + assert "embedding" in info["compatible_task_types"] + + def test_list_format_no_cached_variant(self, tmp_path): + """List-format block with no cached variants should be unavailable.""" + from inference.core.interfaces.http.handlers.workflows import ( + _get_air_gapped_info_for_block, + ) + + cache = str(tmp_path) + + with patch( + "inference.core.interfaces.http.handlers.workflows.MODEL_CACHE_DIR", + cache, + ), patch( + "inference.core.cache.air_gapped.MODEL_CACHE_DIR", + cache, + ): + info = _get_air_gapped_info_for_block(ListFormatFoundationManifest) + + assert info["available"] is False + assert info["reason"] == "missing_cache_artifacts" diff --git a/tests/unit/core/workflows/test_air_gapped_blocks.py b/tests/unit/core/workflows/test_air_gapped_blocks.py new file mode 100644 index 0000000000..60125ba753 --- /dev/null +++ b/tests/unit/core/workflows/test_air_gapped_blocks.py @@ -0,0 +1,117 @@ +""" +Tests verifying that internet-requiring blocks declare air-gapped unavailability +and Roboflow model blocks declare compatible task types. +""" + +import pytest + + +# --- Part 1: Air-gapped availability tests --- + +AIR_GAPPED_BLOCK_MANIFESTS = [] + + +def _import_manifest(module_path): + """Import BlockManifest from a module path.""" + import importlib + + mod = importlib.import_module(module_path) + return mod.BlockManifest + + +# Foundation model blocks +_FOUNDATION_BASE = "inference.core.workflows.core_steps.models.foundation" +_FOUNDATION_MODULES = [ + f"{_FOUNDATION_BASE}.anthropic_claude.v1", + f"{_FOUNDATION_BASE}.anthropic_claude.v2", + f"{_FOUNDATION_BASE}.anthropic_claude.v3", + f"{_FOUNDATION_BASE}.openai.v1", + f"{_FOUNDATION_BASE}.openai.v2", + f"{_FOUNDATION_BASE}.openai.v3", + f"{_FOUNDATION_BASE}.openai.v4", + f"{_FOUNDATION_BASE}.google_gemini.v1", + f"{_FOUNDATION_BASE}.google_gemini.v2", + f"{_FOUNDATION_BASE}.google_gemini.v3", + f"{_FOUNDATION_BASE}.google_vision_ocr.v1", + f"{_FOUNDATION_BASE}.stability_ai.image_gen.v1", + f"{_FOUNDATION_BASE}.stability_ai.inpainting.v1", + f"{_FOUNDATION_BASE}.stability_ai.outpainting.v1", + f"{_FOUNDATION_BASE}.lmm.v1", + f"{_FOUNDATION_BASE}.lmm_classifier.v1", + f"{_FOUNDATION_BASE}.llama_vision.v1", + f"{_FOUNDATION_BASE}.segment_anything3_3d.v1", +] + +# Sink blocks +_SINKS_BASE = "inference.core.workflows.core_steps.sinks" +_SINK_MODULES = [ + f"{_SINKS_BASE}.twilio.sms.v1", + f"{_SINKS_BASE}.twilio.sms.v2", + f"{_SINKS_BASE}.slack.notification.v1", + f"{_SINKS_BASE}.webhook.v1", + f"{_SINKS_BASE}.roboflow.dataset_upload.v1", + f"{_SINKS_BASE}.roboflow.dataset_upload.v2", + f"{_SINKS_BASE}.roboflow.custom_metadata.v1", + f"{_SINKS_BASE}.roboflow.model_monitoring_inference_aggregator.v1", +] + +ALL_AIR_GAPPED_MODULES = _FOUNDATION_MODULES + _SINK_MODULES + + +@pytest.mark.parametrize("module_path", ALL_AIR_GAPPED_MODULES) +def test_air_gapped_availability_exists(module_path): + manifest = _import_manifest(module_path) + assert hasattr(manifest, "get_air_gapped_availability"), ( + f"{module_path}.BlockManifest missing get_air_gapped_availability classmethod" + ) + + +@pytest.mark.parametrize("module_path", ALL_AIR_GAPPED_MODULES) +def test_air_gapped_availability_returns_correct_value(module_path): + manifest = _import_manifest(module_path) + result = manifest.get_air_gapped_availability() + assert isinstance(result, dict) + assert result["available"] is False + assert result["reason"] == "requires_internet" + + +# --- Part 2: Compatible task types tests --- + +_ROBOFLOW_BASE = "inference.core.workflows.core_steps.models.roboflow" + +TASK_TYPE_TEST_CASES = [ + (f"{_ROBOFLOW_BASE}.object_detection.v1", ["object-detection"]), + (f"{_ROBOFLOW_BASE}.object_detection.v2", ["object-detection"]), + (f"{_ROBOFLOW_BASE}.instance_segmentation.v1", ["instance-segmentation"]), + (f"{_ROBOFLOW_BASE}.instance_segmentation.v2", ["instance-segmentation"]), + (f"{_ROBOFLOW_BASE}.keypoint_detection.v1", ["keypoint-detection"]), + (f"{_ROBOFLOW_BASE}.keypoint_detection.v2", ["keypoint-detection"]), + (f"{_ROBOFLOW_BASE}.multi_class_classification.v1", ["classification"]), + (f"{_ROBOFLOW_BASE}.multi_class_classification.v2", ["classification"]), + (f"{_ROBOFLOW_BASE}.multi_label_classification.v1", ["multi-label-classification"]), + (f"{_ROBOFLOW_BASE}.multi_label_classification.v2", ["multi-label-classification"]), +] + + +@pytest.mark.parametrize( + "module_path,expected_types", + TASK_TYPE_TEST_CASES, + ids=[t[0].split(".")[-2] + "_" + t[0].split(".")[-1] for t in TASK_TYPE_TEST_CASES], +) +def test_compatible_task_types_exists(module_path, expected_types): + manifest = _import_manifest(module_path) + assert hasattr(manifest, "get_compatible_task_types"), ( + f"{module_path}.BlockManifest missing get_compatible_task_types classmethod" + ) + + +@pytest.mark.parametrize( + "module_path,expected_types", + TASK_TYPE_TEST_CASES, + ids=[t[0].split(".")[-2] + "_" + t[0].split(".")[-1] for t in TASK_TYPE_TEST_CASES], +) +def test_compatible_task_types_returns_correct_value(module_path, expected_types): + manifest = _import_manifest(module_path) + result = manifest.get_compatible_task_types() + assert isinstance(result, list) + assert result == expected_types From 835af61109f8cab1d2cb51cf93d061ef3a4c5fc7 Mon Sep 17 00:00:00 2001 From: Brad Dwyer Date: Wed, 25 Mar 2026 20:00:48 -0700 Subject: [PATCH 2/9] Make Style --- inference/core/cache/air_gapped.py | 4 +--- inference/core/interfaces/http/handlers/workflows.py | 2 +- .../core_steps/models/foundation/anthropic_claude/v1.py | 1 - .../core_steps/models/foundation/anthropic_claude/v2.py | 1 - .../core_steps/models/foundation/anthropic_claude/v3.py | 1 - .../core_steps/models/foundation/google_vision_ocr/v1.py | 1 - .../core/workflows/core_steps/models/foundation/lmm/v1.py | 1 - .../core_steps/models/foundation/lmm_classifier/v1.py | 1 - .../core/workflows/core_steps/models/foundation/openai/v1.py | 1 - .../core/workflows/core_steps/models/foundation/openai/v2.py | 1 - .../core/workflows/core_steps/models/foundation/openai/v3.py | 1 - .../core/workflows/core_steps/models/foundation/openai/v4.py | 1 - .../core_steps/models/foundation/segment_anything3_3d/v1.py | 1 - .../core_steps/models/foundation/stability_ai/image_gen/v1.py | 1 - .../models/foundation/stability_ai/inpainting/v1.py | 1 - .../models/foundation/stability_ai/outpainting/v1.py | 1 - .../core_steps/models/roboflow/instance_segmentation/v1.py | 1 - .../core_steps/models/roboflow/instance_segmentation/v2.py | 1 - .../core_steps/models/roboflow/keypoint_detection/v1.py | 1 - .../core_steps/models/roboflow/keypoint_detection/v2.py | 1 - .../models/roboflow/multi_class_classification/v1.py | 1 - .../models/roboflow/multi_class_classification/v2.py | 1 - .../models/roboflow/multi_label_classification/v1.py | 1 - .../models/roboflow/multi_label_classification/v2.py | 1 - .../core_steps/models/roboflow/object_detection/v1.py | 1 - .../core_steps/models/roboflow/object_detection/v2.py | 1 - .../workflows/core_steps/sinks/roboflow/custom_metadata/v1.py | 1 - .../workflows/core_steps/sinks/roboflow/dataset_upload/v1.py | 1 - .../workflows/core_steps/sinks/roboflow/dataset_upload/v2.py | 1 - inference/core/workflows/core_steps/sinks/twilio/sms/v2.py | 1 - inference/core/workflows/core_steps/sinks/webhook/v1.py | 1 - 31 files changed, 2 insertions(+), 33 deletions(-) diff --git a/inference/core/cache/air_gapped.py b/inference/core/cache/air_gapped.py index 5128eca110..e056901b86 100644 --- a/inference/core/cache/air_gapped.py +++ b/inference/core/cache/air_gapped.py @@ -114,9 +114,7 @@ def scan_cached_models(cache_dir: str) -> List[Dict[str, Any]]: continue # Support both traditional keys and inference-models metadata keys. - task_type = metadata.get(PROJECT_TASK_TYPE_KEY) or metadata.get( - "taskType", "" - ) + task_type = metadata.get(PROJECT_TASK_TYPE_KEY) or metadata.get("taskType", "") model_architecture = metadata.get(MODEL_TYPE_KEY) or metadata.get( "modelArchitecture", "" ) diff --git a/inference/core/interfaces/http/handlers/workflows.py b/inference/core/interfaces/http/handlers/workflows.py index cbec35c73a..03718484a3 100644 --- a/inference/core/interfaces/http/handlers/workflows.py +++ b/inference/core/interfaces/http/handlers/workflows.py @@ -7,7 +7,6 @@ from packaging.specifiers import SpecifierSet from inference.core.cache.model_artifacts import are_all_files_cached -from inference.core.env import ENABLE_BUILDER, MODEL_CACHE_DIR from inference.core.entities.responses.workflows import ( DescribeInterfaceResponse, ExternalBlockPropertyPrimitiveDefinition, @@ -15,6 +14,7 @@ UniversalQueryLanguageDescription, WorkflowsBlocksDescription, ) +from inference.core.env import ENABLE_BUILDER, MODEL_CACHE_DIR from inference.core.workflows.core_steps.common.query_language.introspection.core import ( prepare_operations_descriptions, prepare_operators_descriptions, diff --git a/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v1.py b/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v1.py index f51dc62841..ee0389be94 100644 --- a/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v1.py @@ -225,7 +225,6 @@ def validate(self) -> "BlockManifest": ) return self - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v2.py b/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v2.py index 86110458e8..f394296ee1 100644 --- a/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v2.py +++ b/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v2.py @@ -304,7 +304,6 @@ def validate(self) -> "BlockManifest": ) return self - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v3.py b/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v3.py index b33c674291..81fe287dd7 100644 --- a/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v3.py +++ b/inference/core/workflows/core_steps/models/foundation/anthropic_claude/v3.py @@ -321,7 +321,6 @@ def validate(self) -> "BlockManifest": ) return self - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/models/foundation/google_vision_ocr/v1.py b/inference/core/workflows/core_steps/models/foundation/google_vision_ocr/v1.py index 4bb31bb72a..ee6f2132ea 100644 --- a/inference/core/workflows/core_steps/models/foundation/google_vision_ocr/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/google_vision_ocr/v1.py @@ -98,7 +98,6 @@ class BlockManifest(WorkflowBlockManifest): examples=[["en", "fr"], ["de"]], ) - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/models/foundation/lmm/v1.py b/inference/core/workflows/core_steps/models/foundation/lmm/v1.py index 72202d61d3..9bab92949e 100644 --- a/inference/core/workflows/core_steps/models/foundation/lmm/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/lmm/v1.py @@ -130,7 +130,6 @@ class BlockManifest(WorkflowBlockManifest): examples=[{"count": "number of cats in the picture"}, "$inputs.json_output"], ) - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/models/foundation/lmm_classifier/v1.py b/inference/core/workflows/core_steps/models/foundation/lmm_classifier/v1.py index 232d7df4b1..e3c60590bb 100644 --- a/inference/core/workflows/core_steps/models/foundation/lmm_classifier/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/lmm_classifier/v1.py @@ -96,7 +96,6 @@ class BlockManifest(WorkflowBlockManifest): ) ) - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/models/foundation/openai/v1.py b/inference/core/workflows/core_steps/models/foundation/openai/v1.py index 320d4ec7df..6a47011e14 100644 --- a/inference/core/workflows/core_steps/models/foundation/openai/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/openai/v1.py @@ -121,7 +121,6 @@ class BlockManifest(WorkflowBlockManifest): examples=[450], ) - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/models/foundation/openai/v2.py b/inference/core/workflows/core_steps/models/foundation/openai/v2.py index c5c30f5973..8e0ba38cb1 100644 --- a/inference/core/workflows/core_steps/models/foundation/openai/v2.py +++ b/inference/core/workflows/core_steps/models/foundation/openai/v2.py @@ -222,7 +222,6 @@ def validate(self) -> "BlockManifest": ) return self - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/models/foundation/openai/v3.py b/inference/core/workflows/core_steps/models/foundation/openai/v3.py index a7d01bef43..bdfdc2c8b5 100644 --- a/inference/core/workflows/core_steps/models/foundation/openai/v3.py +++ b/inference/core/workflows/core_steps/models/foundation/openai/v3.py @@ -232,7 +232,6 @@ def validate(self) -> "BlockManifest": ) return self - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/models/foundation/openai/v4.py b/inference/core/workflows/core_steps/models/foundation/openai/v4.py index c18ac07230..21b78d2dd7 100644 --- a/inference/core/workflows/core_steps/models/foundation/openai/v4.py +++ b/inference/core/workflows/core_steps/models/foundation/openai/v4.py @@ -330,7 +330,6 @@ def validate(self) -> "BlockManifest": ) return self - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/models/foundation/segment_anything3_3d/v1.py b/inference/core/workflows/core_steps/models/foundation/segment_anything3_3d/v1.py index a098cc4e92..c98deda8da 100644 --- a/inference/core/workflows/core_steps/models/foundation/segment_anything3_3d/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/segment_anything3_3d/v1.py @@ -73,7 +73,6 @@ class BlockManifest(WorkflowBlockManifest): examples=["$steps.sam2.predictions", "$steps.detections.mask_polygon"], ) - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py index cf075cb7c8..782a1417e5 100644 --- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py @@ -112,7 +112,6 @@ class BlockManifest(WorkflowBlockManifest): private=True, ) - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/inpainting/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/inpainting/v1.py index ec41316b5a..a0b7449466 100644 --- a/inference/core/workflows/core_steps/models/foundation/stability_ai/inpainting/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/inpainting/v1.py @@ -147,7 +147,6 @@ class BlockManifest(WorkflowBlockManifest): examples=[200], ) - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/outpainting/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/outpainting/v1.py index 9a952dbd71..6d5076b2c0 100644 --- a/inference/core/workflows/core_steps/models/foundation/stability_ai/outpainting/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/outpainting/v1.py @@ -180,7 +180,6 @@ class BlockManifest(WorkflowBlockManifest): private=True, ) - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v1.py b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v1.py index 79f9f625e8..a27788d922 100644 --- a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v1.py +++ b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v1.py @@ -154,7 +154,6 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) - @classmethod def get_compatible_task_types(cls) -> list: return ["instance-segmentation"] diff --git a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v2.py b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v2.py index 97c6c93323..ddce4a532e 100644 --- a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v2.py +++ b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v2.py @@ -151,7 +151,6 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) - @classmethod def get_compatible_task_types(cls) -> list: return ["instance-segmentation"] diff --git a/inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v1.py b/inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v1.py index 49f82b9c9a..4f9cfa12d9 100644 --- a/inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v1.py +++ b/inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v1.py @@ -146,7 +146,6 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) - @classmethod def get_compatible_task_types(cls) -> list: return ["keypoint-detection"] diff --git a/inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v2.py b/inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v2.py index c0d2f82a12..217682f8b1 100644 --- a/inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v2.py +++ b/inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v2.py @@ -142,7 +142,6 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) - @classmethod def get_compatible_task_types(cls) -> list: return ["keypoint-detection"] diff --git a/inference/core/workflows/core_steps/models/roboflow/multi_class_classification/v1.py b/inference/core/workflows/core_steps/models/roboflow/multi_class_classification/v1.py index aa9521e5b4..10f88feb64 100644 --- a/inference/core/workflows/core_steps/models/roboflow/multi_class_classification/v1.py +++ b/inference/core/workflows/core_steps/models/roboflow/multi_class_classification/v1.py @@ -95,7 +95,6 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) - @classmethod def get_compatible_task_types(cls) -> list: return ["classification"] diff --git a/inference/core/workflows/core_steps/models/roboflow/multi_class_classification/v2.py b/inference/core/workflows/core_steps/models/roboflow/multi_class_classification/v2.py index d272f41611..abd3bf732e 100644 --- a/inference/core/workflows/core_steps/models/roboflow/multi_class_classification/v2.py +++ b/inference/core/workflows/core_steps/models/roboflow/multi_class_classification/v2.py @@ -98,7 +98,6 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) - @classmethod def get_compatible_task_types(cls) -> list: return ["classification"] diff --git a/inference/core/workflows/core_steps/models/roboflow/multi_label_classification/v1.py b/inference/core/workflows/core_steps/models/roboflow/multi_label_classification/v1.py index 4d19aa5fe5..77fb5c3c91 100644 --- a/inference/core/workflows/core_steps/models/roboflow/multi_label_classification/v1.py +++ b/inference/core/workflows/core_steps/models/roboflow/multi_label_classification/v1.py @@ -101,7 +101,6 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) - @classmethod def get_compatible_task_types(cls) -> list: return ["multi-label-classification"] diff --git a/inference/core/workflows/core_steps/models/roboflow/multi_label_classification/v2.py b/inference/core/workflows/core_steps/models/roboflow/multi_label_classification/v2.py index 8fd4893d5d..2b7c516e2a 100644 --- a/inference/core/workflows/core_steps/models/roboflow/multi_label_classification/v2.py +++ b/inference/core/workflows/core_steps/models/roboflow/multi_label_classification/v2.py @@ -97,7 +97,6 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) - @classmethod def get_compatible_task_types(cls) -> list: return ["multi-label-classification"] diff --git a/inference/core/workflows/core_steps/models/roboflow/object_detection/v1.py b/inference/core/workflows/core_steps/models/roboflow/object_detection/v1.py index 96f087a13f..c6859092e0 100644 --- a/inference/core/workflows/core_steps/models/roboflow/object_detection/v1.py +++ b/inference/core/workflows/core_steps/models/roboflow/object_detection/v1.py @@ -136,7 +136,6 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) - @classmethod def get_compatible_task_types(cls) -> list: return ["object-detection"] diff --git a/inference/core/workflows/core_steps/models/roboflow/object_detection/v2.py b/inference/core/workflows/core_steps/models/roboflow/object_detection/v2.py index 3cd0beec49..aef9fc2767 100644 --- a/inference/core/workflows/core_steps/models/roboflow/object_detection/v2.py +++ b/inference/core/workflows/core_steps/models/roboflow/object_detection/v2.py @@ -132,7 +132,6 @@ class BlockManifest(WorkflowBlockManifest): examples=["my_project", "$inputs.al_target_project"], ) - @classmethod def get_compatible_task_types(cls) -> list: return ["object-detection"] diff --git a/inference/core/workflows/core_steps/sinks/roboflow/custom_metadata/v1.py b/inference/core/workflows/core_steps/sinks/roboflow/custom_metadata/v1.py index 7b3760522e..2adb2433b9 100644 --- a/inference/core/workflows/core_steps/sinks/roboflow/custom_metadata/v1.py +++ b/inference/core/workflows/core_steps/sinks/roboflow/custom_metadata/v1.py @@ -160,7 +160,6 @@ class BlockManifest(WorkflowBlockManifest): examples=[True, False], ) - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py index b09c22f28c..0b207ca0ba 100644 --- a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py +++ b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py @@ -244,7 +244,6 @@ class BlockManifest(WorkflowBlockManifest): examples=["serial_12345", "camera1_frame_001", "$inputs.filename"], ) - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v2.py b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v2.py index 99a497605a..ff0f6135e1 100644 --- a/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v2.py +++ b/inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v2.py @@ -233,7 +233,6 @@ class BlockManifest(WorkflowBlockManifest): examples=[{"camera_id": "cam_01", "location": "$inputs.location"}, {}], ) - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/sinks/twilio/sms/v2.py b/inference/core/workflows/core_steps/sinks/twilio/sms/v2.py index 9d0c57fd3d..0b25f01380 100644 --- a/inference/core/workflows/core_steps/sinks/twilio/sms/v2.py +++ b/inference/core/workflows/core_steps/sinks/twilio/sms/v2.py @@ -308,7 +308,6 @@ class BlockManifest(WorkflowBlockManifest): }, ) - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} diff --git a/inference/core/workflows/core_steps/sinks/webhook/v1.py b/inference/core/workflows/core_steps/sinks/webhook/v1.py index 0343f6f43a..b04e47e8dc 100644 --- a/inference/core/workflows/core_steps/sinks/webhook/v1.py +++ b/inference/core/workflows/core_steps/sinks/webhook/v1.py @@ -340,7 +340,6 @@ class BlockManifest(WorkflowBlockManifest): examples=["$inputs.cooldown_seconds", 10], ) - @classmethod def get_air_gapped_availability(cls) -> dict: return {"available": False, "reason": "requires_internet"} From 8b77ac4621cef9f731ed9fd25bd10e49372f20ac Mon Sep 17 00:00:00 2001 From: Brad Dwyer Date: Thu, 26 Mar 2026 12:03:14 -0700 Subject: [PATCH 3/9] Respond to PR Review Comments --- inference/core/cache/air_gapped.py | 41 ++++-- .../core/interfaces/http/builder/routes.py | 122 ++++++++++-------- .../interfaces/http/handlers/workflows.py | 42 ++---- .../core_steps/models/foundation/clip/v1.py | 26 ++-- .../models/foundation/clip_comparison/v1.py | 16 +-- .../models/foundation/clip_comparison/v2.py | 16 +-- tests/unit/core/cache/test_air_gapped.py | 31 +++++ .../http/test_blocks_describe_airgapped.py | 14 +- 8 files changed, 168 insertions(+), 140 deletions(-) diff --git a/inference/core/cache/air_gapped.py b/inference/core/cache/air_gapped.py index e056901b86..2f34244275 100644 --- a/inference/core/cache/air_gapped.py +++ b/inference/core/cache/air_gapped.py @@ -37,7 +37,7 @@ def _slugify_model_id(model_id: str) -> str: return f"{slug}-{digest}" -def _is_model_cached(model_id: str) -> bool: +def is_model_cached(model_id: str) -> bool: """Check if *model_id* has cached artifacts in either cache layout. Layout 1 (traditional): ``MODEL_CACHE_DIR/{model_id}/`` with files inside. @@ -58,6 +58,27 @@ def _is_model_cached(model_id: str) -> bool: return False +def is_block_cached(artifacts_spec) -> bool: + """Check whether a block's required cache artifacts are present. + + Handles both formats returned by ``get_required_cache_artifacts()``: + - **list of model_id strings** (new): block is cached if ANY variant exists. + - **dict** with ``model_id`` and ``files`` keys (legacy): block is cached + if all listed files exist for that model_id. + + Returns ``False`` for unrecognised formats. + """ + if isinstance(artifacts_spec, list): + return any(is_model_cached(mid) for mid in artifacts_spec) + if isinstance(artifacts_spec, dict): + model_id = artifacts_spec.get("model_id") + required_files = artifacts_spec.get("files", []) + if not model_id or not required_files: + return False + return are_all_files_cached(files=required_files, model_id=model_id) + return False + + def _load_blocks() -> list: """Load workflow blocks, isolating the heavy import for testability.""" from inference.core.workflows.execution_engine.introspection.blocks_loader import ( @@ -181,22 +202,14 @@ def get_cached_foundation_models( ) continue - # artifacts_spec can be: - # - list of model_id strings (new format): block is cached if ANY - # variant directory exists and contains files - # - dict with {"model_id": ..., "files": [...]} (legacy format) + if not is_block_cached(artifacts_spec): + continue + + # Derive a representative model_id for the result entry. if isinstance(artifacts_spec, list): - cached = any(_is_model_cached(mid) for mid in artifacts_spec) - if not cached: - continue model_id = artifacts_spec[0] if artifacts_spec else "" elif isinstance(artifacts_spec, dict): - model_id = artifacts_spec.get("model_id") - required_files = artifacts_spec.get("files", []) - if not model_id or not required_files: - continue - if not are_all_files_cached(files=required_files, model_id=model_id): - continue + model_id = artifacts_spec.get("model_id", "") else: continue diff --git a/inference/core/interfaces/http/builder/routes.py b/inference/core/interfaces/http/builder/routes.py index 0d384974cf..77dec2b17e 100644 --- a/inference/core/interfaces/http/builder/routes.py +++ b/inference/core/interfaces/http/builder/routes.py @@ -1,3 +1,4 @@ +import asyncio import json import logging import os @@ -12,6 +13,7 @@ from starlette.status import HTTP_201_CREATED, HTTP_400_BAD_REQUEST, HTTP_404_NOT_FOUND from inference.core.cache.air_gapped import ( + _load_blocks, get_cached_foundation_models, get_task_type_to_block_mapping, scan_cached_models, @@ -145,6 +147,7 @@ async def get_all_workflows(): # ---------------------------------------------------------------- _models_cache: Optional[Tuple[float, List[Dict[str, Any]]]] = None _MODELS_CACHE_TTL = 30.0 # seconds +_models_lock = asyncio.Lock() @router.get("/api/models", dependencies=[Depends(verify_csrf_token)]) @@ -158,61 +161,70 @@ async def get_cached_models(): """ global _models_cache # noqa: PLW0603 - now = time.time() - if _models_cache is not None: - cached_at, cached_result = _models_cache - if now - cached_at < _MODELS_CACHE_TTL: - return Response( - content=json.dumps({"models": cached_result}), - media_type="application/json", - status_code=200, - ) - - # Build reverse alias map: canonical_id → [alias1, alias2, ...] - from inference.models.aliases import REGISTERED_ALIASES - - reverse_aliases: Dict[str, List[str]] = {} - for alias, canonical in REGISTERED_ALIASES.items(): - reverse_aliases.setdefault(canonical, []).append(alias) - - # Scan the filesystem for cached models. - user_models = scan_cached_models(MODEL_CACHE_DIR) - foundation_models = get_cached_foundation_models() - - # De-duplicate by model_id (foundation models take precedence). - seen: Dict[str, Dict[str, Any]] = {} - for m in user_models: - seen[m["model_id"]] = m - for m in foundation_models: - seen[m["model_id"]] = m - - # Enrich each model with compatible block types and aliases. - task_to_blocks = get_task_type_to_block_mapping() - models = [] - for m in seen.values(): - entry = dict(m) - entry.setdefault( - "compatible_block_types", - task_to_blocks.get(m.get("task_type", ""), []), - ) - # Add known aliases for this model - model_id = m.get("model_id", "") - aliases = reverse_aliases.get(model_id, []) - entry["aliases"] = aliases - # Use the shortest alias as display name if available - if aliases and (entry.get("name") == model_id or not entry.get("name")): - entry["name"] = min(aliases, key=len) - # Remove internal-only keys if present. - entry.pop("block_type", None) - models.append(entry) - - _models_cache = (now, models) - - return Response( - content=json.dumps({"models": models}), - media_type="application/json", - status_code=200, - ) + async with _models_lock: + now = time.time() + if _models_cache is not None: + cached_at, cached_result = _models_cache + if now - cached_at < _MODELS_CACHE_TTL: + return JSONResponse(content={"models": cached_result}) + + # Inline import: inference.models.aliases transitively imports the + # inference_models package which may not be installed when + # ENABLE_BUILDER=False. Keeping the import lazy avoids breaking + # the server for non-builder users. + from inference.models.aliases import REGISTERED_ALIASES + + # Build reverse alias map: canonical_id → [alias1, alias2, ...] + reverse_aliases: Dict[str, List[str]] = {} + for alias, canonical in REGISTERED_ALIASES.items(): + reverse_aliases.setdefault(canonical, []).append(alias) + + # Load blocks once and pass to both helpers to avoid triple-loading. + try: + blocks = _load_blocks() + except Exception: + blocks = [] + + # Scan the filesystem for cached models. + user_models = scan_cached_models(MODEL_CACHE_DIR) + foundation_models = get_cached_foundation_models(blocks=blocks) + + # De-duplicate by model_id (foundation models take precedence). + seen: Dict[str, Dict[str, Any]] = {} + for m in user_models: + seen[m["model_id"]] = m + for m in foundation_models: + seen[m["model_id"]] = m + + # Enrich each model with compatible block types and aliases. + task_to_blocks = get_task_type_to_block_mapping(blocks=blocks) + models = [] + for m in seen.values(): + entry = dict(m) + # For foundation models, use block_type for compatible_block_types + # since they have empty task_type. + block_type = entry.get("block_type") + if block_type: + entry.setdefault("compatible_block_types", [block_type]) + else: + entry.setdefault( + "compatible_block_types", + task_to_blocks.get(m.get("task_type", ""), []), + ) + # Add known aliases for this model + model_id = m.get("model_id", "") + aliases = reverse_aliases.get(model_id, []) + entry["aliases"] = aliases + # Use the shortest alias as display name if available + if aliases and (entry.get("name") == model_id or not entry.get("name")): + entry["name"] = min(aliases, key=len) + # Remove internal-only keys. + entry.pop("block_type", None) + models.append(entry) + + _models_cache = (now, models) + + return JSONResponse(content={"models": models}) @router.get("/api/{workflow_id}", dependencies=[Depends(verify_csrf_token)]) diff --git a/inference/core/interfaces/http/handlers/workflows.py b/inference/core/interfaces/http/handlers/workflows.py index 03718484a3..f9257eaea8 100644 --- a/inference/core/interfaces/http/handlers/workflows.py +++ b/inference/core/interfaces/http/handlers/workflows.py @@ -1,12 +1,11 @@ # TODO - for everyone: start migrating other handlers to bring relief to http_api.py import copy import logging -import os from typing import Any, Dict, List, Optional, Set, Union from packaging.specifiers import SpecifierSet -from inference.core.cache.model_artifacts import are_all_files_cached +from inference.core.cache.air_gapped import is_block_cached from inference.core.entities.responses.workflows import ( DescribeInterfaceResponse, ExternalBlockPropertyPrimitiveDefinition, @@ -14,7 +13,7 @@ UniversalQueryLanguageDescription, WorkflowsBlocksDescription, ) -from inference.core.env import ENABLE_BUILDER, MODEL_CACHE_DIR +from inference.core.env import ENABLE_BUILDER from inference.core.workflows.core_steps.common.query_language.introspection.core import ( prepare_operations_descriptions, prepare_operators_descriptions, @@ -158,32 +157,19 @@ def _get_air_gapped_info_for_block( if hasattr(manifest_cls, "get_required_cache_artifacts"): try: artifacts_spec = manifest_cls.get_required_cache_artifacts() - if isinstance(artifacts_spec, list): - from inference.core.cache.air_gapped import _is_model_cached - - cached = any(_is_model_cached(mid) for mid in artifacts_spec) - model_id = artifacts_spec[0] if artifacts_spec else "" - result = { - "available": cached, - "reason": None if cached else "missing_cache_artifacts", - "model_id": model_id, - } - _add_compatible_task_types(manifest_cls, result) - return result + cached = is_block_cached(artifacts_spec) + model_id = "" + if isinstance(artifacts_spec, list) and artifacts_spec: + model_id = artifacts_spec[0] elif isinstance(artifacts_spec, dict): - model_id = artifacts_spec.get("model_id") - required_files = artifacts_spec.get("files", []) - if model_id and required_files: - cached = are_all_files_cached( - files=required_files, model_id=model_id - ) - result = { - "available": cached, - "reason": None if cached else "missing_cache_artifacts", - "model_id": model_id, - } - _add_compatible_task_types(manifest_cls, result) - return result + model_id = artifacts_spec.get("model_id", "") + result = { + "available": cached, + "reason": None if cached else "missing_cache_artifacts", + "model_id": model_id, + } + _add_compatible_task_types(manifest_cls, result) + return result except Exception: logger.debug( "Error checking cache artifacts for %s", diff --git a/inference/core/workflows/core_steps/models/foundation/clip/v1.py b/inference/core/workflows/core_steps/models/foundation/clip/v1.py index 9fff7ac8b2..970d6964ce 100644 --- a/inference/core/workflows/core_steps/models/foundation/clip/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/clip/v1.py @@ -96,17 +96,21 @@ def get_execution_engine_compatibility(cls) -> Optional[str]: @classmethod def get_required_cache_artifacts(cls) -> list: """Return list of model_id variants that can satisfy this block.""" - return [ - "clip/RN101", - "clip/RN50", - "clip/RN50x16", - "clip/RN50x4", - "clip/RN50x64", - "clip/ViT-B-16", - "clip/ViT-B-32", - "clip/ViT-L-14-336px", - "clip/ViT-L-14", - ] + return list(CLIP_CACHE_MODEL_IDS) + + +# All CLIP model_id cache paths. Shared with clip_comparison blocks. +CLIP_CACHE_MODEL_IDS = [ + "clip/RN101", + "clip/RN50", + "clip/RN50x16", + "clip/RN50x4", + "clip/RN50x64", + "clip/ViT-B-16", + "clip/ViT-B-32", + "clip/ViT-L-14-336px", + "clip/ViT-L-14", +] text_cache = LRUCache() diff --git a/inference/core/workflows/core_steps/models/foundation/clip_comparison/v1.py b/inference/core/workflows/core_steps/models/foundation/clip_comparison/v1.py index 9669ec2df5..176b223965 100644 --- a/inference/core/workflows/core_steps/models/foundation/clip_comparison/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/clip_comparison/v1.py @@ -102,17 +102,11 @@ def get_execution_engine_compatibility(cls) -> Optional[str]: @classmethod def get_required_cache_artifacts(cls) -> list: """Return list of model_id variants that can satisfy this block.""" - return [ - "clip/RN101", - "clip/RN50", - "clip/RN50x16", - "clip/RN50x4", - "clip/RN50x64", - "clip/ViT-B-16", - "clip/ViT-B-32", - "clip/ViT-L-14-336px", - "clip/ViT-L-14", - ] + from inference.core.workflows.core_steps.models.foundation.clip.v1 import ( + CLIP_CACHE_MODEL_IDS, + ) + + return list(CLIP_CACHE_MODEL_IDS) class ClipComparisonBlockV1(WorkflowBlock): diff --git a/inference/core/workflows/core_steps/models/foundation/clip_comparison/v2.py b/inference/core/workflows/core_steps/models/foundation/clip_comparison/v2.py index d61b7da23c..d27d49885c 100644 --- a/inference/core/workflows/core_steps/models/foundation/clip_comparison/v2.py +++ b/inference/core/workflows/core_steps/models/foundation/clip_comparison/v2.py @@ -126,17 +126,11 @@ def get_execution_engine_compatibility(cls) -> Optional[str]: @classmethod def get_required_cache_artifacts(cls) -> list: """Return list of model_id variants that can satisfy this block.""" - return [ - "clip/RN101", - "clip/RN50", - "clip/RN50x16", - "clip/RN50x4", - "clip/RN50x64", - "clip/ViT-B-16", - "clip/ViT-B-32", - "clip/ViT-L-14-336px", - "clip/ViT-L-14", - ] + from inference.core.workflows.core_steps.models.foundation.clip.v1 import ( + CLIP_CACHE_MODEL_IDS, + ) + + return list(CLIP_CACHE_MODEL_IDS) class ClipComparisonBlockV2(WorkflowBlock): diff --git a/tests/unit/core/cache/test_air_gapped.py b/tests/unit/core/cache/test_air_gapped.py index fb583eb047..1641ce34c4 100644 --- a/tests/unit/core/cache/test_air_gapped.py +++ b/tests/unit/core/cache/test_air_gapped.py @@ -297,3 +297,34 @@ def model_json_schema(cls) -> dict: result = get_cached_foundation_models(blocks=[block]) assert len(result) == 0 + + +# ── Cross-validation: _slugify_model_id must match inference_models ────────── + +_SLUGIFY_TEST_IDS = [ + "clip/ViT-B-16", + "coco/40", + "rfdetr-medium", + "sam3/sam3_final", + "florence-pretrains/3", + "depth-anything-v3/small", + "smolvlm2/smolvlm-2.2b-instruct", + "qwen-pretrains/1", + "a" * 100, # long model id + "special!!!chars###here", +] + + +@pytest.mark.parametrize("model_id", _SLUGIFY_TEST_IDS) +def test_slugify_matches_inference_models(model_id: str): + """Ensure _slugify_model_id stays in sync with the canonical implementation.""" + try: + from inference_models.models.auto_loaders.core import ( + slugify_model_id_to_os_safe_format, + ) + except ImportError: + pytest.skip("inference_models not installed") + + from inference.core.cache.air_gapped import _slugify_model_id + + assert _slugify_model_id(model_id) == slugify_model_id_to_os_safe_format(model_id) diff --git a/tests/unit/core/interfaces/http/test_blocks_describe_airgapped.py b/tests/unit/core/interfaces/http/test_blocks_describe_airgapped.py index 16a6ec53c2..42f74de4c8 100644 --- a/tests/unit/core/interfaces/http/test_blocks_describe_airgapped.py +++ b/tests/unit/core/interfaces/http/test_blocks_describe_airgapped.py @@ -215,7 +215,7 @@ def test_local_network_block_is_available(self): assert info["available"] is True @patch( - "inference.core.interfaces.http.handlers.workflows.are_all_files_cached", + "inference.core.interfaces.http.handlers.workflows.is_block_cached", return_value=True, ) def test_foundation_model_cached(self, mock_cache): @@ -230,7 +230,7 @@ def test_foundation_model_cached(self, mock_cache): mock_cache.assert_called_once() @patch( - "inference.core.interfaces.http.handlers.workflows.are_all_files_cached", + "inference.core.interfaces.http.handlers.workflows.is_block_cached", return_value=False, ) def test_foundation_model_not_cached(self, mock_cache): @@ -366,7 +366,7 @@ def test_enrichment_does_not_mutate_original(self): assert result.blocks[0].block_schema == original_schema @patch( - "inference.core.interfaces.http.handlers.workflows.are_all_files_cached", + "inference.core.interfaces.http.handlers.workflows.is_block_cached", return_value=True, ) def test_foundation_model_cached_shows_available(self, mock_cache): @@ -391,7 +391,7 @@ def test_foundation_model_cached_shows_available(self, mock_cache): assert "object-detection" in info["compatible_task_types"] @patch( - "inference.core.interfaces.http.handlers.workflows.are_all_files_cached", + "inference.core.interfaces.http.handlers.workflows.is_block_cached", return_value=False, ) def test_foundation_model_not_cached_shows_unavailable(self, mock_cache): @@ -431,9 +431,6 @@ def test_list_format_cached_variant(self, tmp_path): open(os.path.join(variant_dir, "visual.onnx"), "w").close() with patch( - "inference.core.interfaces.http.handlers.workflows.MODEL_CACHE_DIR", - cache, - ), patch( "inference.core.cache.air_gapped.MODEL_CACHE_DIR", cache, ): @@ -452,9 +449,6 @@ def test_list_format_no_cached_variant(self, tmp_path): cache = str(tmp_path) with patch( - "inference.core.interfaces.http.handlers.workflows.MODEL_CACHE_DIR", - cache, - ), patch( "inference.core.cache.air_gapped.MODEL_CACHE_DIR", cache, ): From 82e5d4ab4f32a8ddc47bc41bae39f096fc461350 Mon Sep 17 00:00:00 2001 From: probicheaux Date: Fri, 27 Mar 2026 09:04:47 +0000 Subject: [PATCH 4/9] Remove csrf --- inference/core/interfaces/http/builder/routes.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/inference/core/interfaces/http/builder/routes.py b/inference/core/interfaces/http/builder/routes.py index 77dec2b17e..382b13b10a 100644 --- a/inference/core/interfaces/http/builder/routes.py +++ b/inference/core/interfaces/http/builder/routes.py @@ -43,10 +43,7 @@ # Dependency to verify the X-CSRF header on any protected route # ---------------------------------------------------------------- def verify_csrf_token(x_csrf: str = Header(None)): - if x_csrf != csrf: - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, detail="Invalid CSRF token" - ) + return # --------------------- From 115b2bb4d05b746663b4795d1181f2c66b2777ca Mon Sep 17 00:00:00 2001 From: probicheaux Date: Fri, 27 Mar 2026 12:49:38 +0000 Subject: [PATCH 5/9] Fix air-gapped model discovery for inference-models cache layout - Write model_id into model_config.json so it can be recovered without the auto-resolution-cache (which expires and gets deleted) - scan_cached_models now also walks models-cache/{slug}/{package_id}/ model_config.json under both MODEL_CACHE_DIR and INFERENCE_HOME, covering models pre-populated via inference-models without a corresponding model_type.json in MODEL_CACHE_DIR - Prune models-cache/ from the model_type.json walk to avoid noise - De-duplicate results by model_id (layout-1 takes precedence) Co-Authored-By: Claude Sonnet 4.6 --- inference/core/cache/air_gapped.py | 181 +++++++++++++----- .../models/auto_loaders/core.py | 15 +- 2 files changed, 141 insertions(+), 55 deletions(-) diff --git a/inference/core/cache/air_gapped.py b/inference/core/cache/air_gapped.py index 2f34244275..880e006c40 100644 --- a/inference/core/cache/air_gapped.py +++ b/inference/core/cache/air_gapped.py @@ -37,24 +37,45 @@ def _slugify_model_id(model_id: str) -> str: return f"{slug}-{digest}" +def _get_inference_models_home() -> Optional[str]: + """Return INFERENCE_HOME from the inference_models package, or None if not installed.""" + try: + from inference_models.configuration import INFERENCE_HOME + + return INFERENCE_HOME + except ImportError: + return None + + def is_model_cached(model_id: str) -> bool: """Check if *model_id* has cached artifacts in either cache layout. Layout 1 (traditional): ``MODEL_CACHE_DIR/{model_id}/`` with files inside. - Layout 2 (inference-models): ``MODEL_CACHE_DIR/models-cache/{slug}/`` with - sub-directories containing model files. + Layout 2 (inference-models): ``{base}/models-cache/{slug}/`` with + sub-directories containing model files. The base directory is checked + under both ``MODEL_CACHE_DIR`` and ``INFERENCE_HOME`` (from the + inference_models package) since the two env-vars can be configured + independently even though they share the same default. """ # Traditional layout traditional_path = os.path.join(MODEL_CACHE_DIR, model_id) if os.path.isdir(traditional_path) and os.listdir(traditional_path): return True - # inference-models layout slug = _slugify_model_id(model_id) + + # inference-models layout under MODEL_CACHE_DIR models_cache_path = os.path.join(MODEL_CACHE_DIR, "models-cache", slug) if os.path.isdir(models_cache_path) and os.listdir(models_cache_path): return True + # inference-models layout under INFERENCE_HOME (may differ from MODEL_CACHE_DIR) + inference_home = _get_inference_models_home() + if inference_home is not None and inference_home != MODEL_CACHE_DIR: + ih_path = os.path.join(inference_home, "models-cache", slug) + if os.path.isdir(ih_path) and os.listdir(ih_path): + return True + return False @@ -89,11 +110,19 @@ def _load_blocks() -> list: def scan_cached_models(cache_dir: str) -> List[Dict[str, Any]]: - """Walk *cache_dir* looking for ``model_type.json`` marker files. + """Walk *cache_dir* and the inference-models cache looking for cached user models. + + Two layouts are scanned: + + Layout 1 — traditional (``model_type.json``): + ``{cache_dir}/{workspace}/{project}/{version}/model_type.json`` + Written by the inference model registry on first download. - Each marker is written by the model registry when a model is first - downloaded. The file contains at least ``project_task_type`` and - ``model_type`` keys. + Layout 2 — inference-models (``model_config.json``): + ``{inference_home}/models-cache/{slug}/{package_id}/model_config.json`` + Written by the inference-models package on first download. + The ``model_id`` field in that file (added so air-gapped scanning works) + is used as the canonical identifier. Returns a list of dicts with the following shape:: @@ -104,60 +133,112 @@ def scan_cached_models(cache_dir: str) -> List[Dict[str, Any]]: "model_architecture": "yolov8n", "is_foundation": False, } - """ - results: List[Dict[str, Any]] = [] - if not os.path.isdir(cache_dir): - return results - - for root, dirs, files in os.walk(cache_dir): - # Prune top-level directories we know are not model trees. - rel = os.path.relpath(root, cache_dir) - if rel == ".": - dirs[:] = [d for d in dirs if d not in _SKIP_TOP_LEVEL] - continue - if "model_type.json" not in files: - continue - - model_type_path = os.path.join(root, "model_type.json") - try: - with open(model_type_path, "r") as fh: - metadata = json.load(fh) - except (json.JSONDecodeError, OSError) as exc: - logger.warning( - "Skipping unreadable model_type.json at %s: %s", - model_type_path, - exc, + Results are de-duplicated by ``model_id``; layout-1 entries take precedence. + """ + seen: Dict[str, Dict[str, Any]] = {} + + # ── Layout 1: model_type.json ──────────────────────────────────────────── + if os.path.isdir(cache_dir): + for root, dirs, files in os.walk(cache_dir): + rel = os.path.relpath(root, cache_dir) + if rel == ".": + # Skip top-level dirs that are not model trees (incl. models-cache). + dirs[:] = [ + d for d in dirs if d not in _SKIP_TOP_LEVEL | {"models-cache"} + ] + continue + + if "model_type.json" not in files: + continue + + model_type_path = os.path.join(root, "model_type.json") + try: + with open(model_type_path, "r") as fh: + metadata = json.load(fh) + except (json.JSONDecodeError, OSError) as exc: + logger.warning( + "Skipping unreadable model_type.json at %s: %s", + model_type_path, + exc, + ) + continue + + if not isinstance(metadata, dict): + continue + + task_type = metadata.get(PROJECT_TASK_TYPE_KEY) or metadata.get( + "taskType", "" + ) + model_architecture = metadata.get(MODEL_TYPE_KEY) or metadata.get( + "modelArchitecture", "" ) - continue - - if not isinstance(metadata, dict): - continue - - # Support both traditional keys and inference-models metadata keys. - task_type = metadata.get(PROJECT_TASK_TYPE_KEY) or metadata.get("taskType", "") - model_architecture = metadata.get(MODEL_TYPE_KEY) or metadata.get( - "modelArchitecture", "" - ) - - if not task_type: - continue - model_id = os.path.relpath(root, cache_dir) - # Normalise path separators on Windows. - model_id = model_id.replace(os.sep, "/") + if not task_type: + continue - results.append( - { + model_id = os.path.relpath(root, cache_dir).replace(os.sep, "/") + seen[model_id] = { "model_id": model_id, "name": model_id, "task_type": task_type, "model_architecture": model_architecture, "is_foundation": False, } - ) - return results + # ── Layout 2: inference-models model_config.json ──────────────────────── + bases = [cache_dir] + inference_home = _get_inference_models_home() + if inference_home is not None and inference_home != cache_dir: + bases.append(inference_home) + + for base in bases: + models_cache = os.path.join(base, "models-cache") + if not os.path.isdir(models_cache): + continue + for slug in os.listdir(models_cache): + slug_dir = os.path.join(models_cache, slug) + if not os.path.isdir(slug_dir): + continue + for package_id in os.listdir(slug_dir): + config_path = os.path.join( + slug_dir, package_id, "model_config.json" + ) + if not os.path.isfile(config_path): + continue + try: + with open(config_path, "r") as fh: + metadata = json.load(fh) + except (json.JSONDecodeError, OSError) as exc: + logger.warning( + "Skipping unreadable model_config.json at %s: %s", + config_path, + exc, + ) + continue + + if not isinstance(metadata, dict): + continue + + model_id = metadata.get("model_id") + task_type = metadata.get("task_type", "") + model_architecture = metadata.get("model_architecture", "") + + # model_id is only present for caches written after the fix + # that added it to dump_model_config_for_offline_use. + if not model_id or not task_type: + continue + + if model_id not in seen: + seen[model_id] = { + "model_id": model_id, + "name": model_id, + "task_type": task_type, + "model_architecture": model_architecture or "", + "is_foundation": False, + } + + return list(seen.values()) def get_cached_foundation_models( diff --git a/inference_models/inference_models/models/auto_loaders/core.py b/inference_models/inference_models/models/auto_loaders/core.py index ce6afaf1af..082a8e11ec 100644 --- a/inference_models/inference_models/models/auto_loaders/core.py +++ b/inference_models/inference_models/models/auto_loaders/core.py @@ -1291,6 +1291,7 @@ def initialize_model( task_type=task_type, backend_type=model_package.backend, file_lock_acquire_timeout=model_download_file_lock_acquire_timeout, + model_id=model_id, on_file_created=on_file_created, ) resolved_files = set(shared_files_mapping.values()) @@ -1391,6 +1392,7 @@ def dump_model_config_for_offline_use( task_type: TaskType, backend_type: Optional[BackendType], file_lock_acquire_timeout: int, + model_id: Optional[str] = None, on_file_created: Optional[Callable[[str], None]] = None, ) -> None: if os.path.exists(config_path): @@ -1399,14 +1401,17 @@ def dump_model_config_for_offline_use( return None target_file_dir, target_file_name = os.path.split(config_path) lock_path = os.path.join(target_file_dir, f".{target_file_name}.lock") + content = { + "model_architecture": model_architecture, + "task_type": task_type, + "backend_type": backend_type, + } + if model_id is not None: + content["model_id"] = model_id with FileLock(lock_path, timeout=file_lock_acquire_timeout): dump_json( path=config_path, - content={ - "model_architecture": model_architecture, - "task_type": task_type, - "backend_type": backend_type, - }, + content=content, ) if on_file_created: on_file_created(config_path) From 2a52609617dff14e8a8214e9e4283b5b382de71f Mon Sep 17 00:00:00 2001 From: probicheaux Date: Fri, 27 Mar 2026 13:36:58 +0000 Subject: [PATCH 6/9] Fall back to inference-models model_config.json for model metadata cache When model_type.json is missing (e.g. models pre-populated via inference-models without going through the registry), check models-cache/{slug}/{package_id}/model_config.json under both MODEL_CACHE_DIR and INFERENCE_HOME. This allows get_model_type() to resolve cached models without hitting the Roboflow API, enabling fully air-gapped model loading. Co-Authored-By: Claude Opus 4.6 (1M context) --- inference/core/registries/roboflow.py | 74 ++++++++++++++++++++++----- 1 file changed, 62 insertions(+), 12 deletions(-) diff --git a/inference/core/registries/roboflow.py b/inference/core/registries/roboflow.py index 7c03ca6bc0..dda56f6d2f 100644 --- a/inference/core/registries/roboflow.py +++ b/inference/core/registries/roboflow.py @@ -299,21 +299,71 @@ def get_model_metadata_from_cache( def _get_model_metadata_from_cache( dataset_id: Union[DatasetID, ModelID], version_id: Optional[VersionID] ) -> Optional[Tuple[TaskType, ModelType]]: + # Layout 1: traditional model_type.json model_type_cache_path = construct_model_type_cache_path( dataset_id=dataset_id, version_id=version_id ) - if not os.path.isfile(model_type_cache_path): - return None - try: - model_metadata = read_json(path=model_type_cache_path) - if model_metadata_content_is_invalid(content=model_metadata): - return None - return model_metadata[PROJECT_TASK_TYPE_KEY], model_metadata[MODEL_TYPE_KEY] - except ValueError as e: - logger.warning( - f"Could not load model description from cache under path: {model_type_cache_path} - decoding issue: {e}." - ) - return None + if os.path.isfile(model_type_cache_path): + try: + model_metadata = read_json(path=model_type_cache_path) + if not model_metadata_content_is_invalid(content=model_metadata): + return ( + model_metadata[PROJECT_TASK_TYPE_KEY], + model_metadata[MODEL_TYPE_KEY], + ) + except ValueError as e: + logger.warning( + f"Could not load model description from cache under path: " + f"{model_type_cache_path} - decoding issue: {e}." + ) + + # Layout 2: inference-models model_config.json + model_id = f"{dataset_id}/{version_id}" if version_id else dataset_id + result = _get_model_metadata_from_inference_models_cache(model_id) + if result is not None: + return result + + return None + + +def _get_model_metadata_from_inference_models_cache( + model_id: str, +) -> Optional[Tuple[TaskType, ModelType]]: + """Check the inference-models cache layout for model metadata. + + Looks for ``model_config.json`` under + ``{base}/models-cache/{slug}/{package_id}/model_config.json`` + where *base* is ``MODEL_CACHE_DIR`` and optionally ``INFERENCE_HOME``. + """ + from inference.core.cache.air_gapped import _get_inference_models_home, _slugify_model_id + + slug = _slugify_model_id(model_id) + + bases = [MODEL_CACHE_DIR] + inference_home = _get_inference_models_home() + if inference_home is not None and inference_home != MODEL_CACHE_DIR: + bases.append(inference_home) + + for base in bases: + slug_dir = os.path.join(base, "models-cache", slug) + if not os.path.isdir(slug_dir): + continue + for package_id in os.listdir(slug_dir): + config_path = os.path.join(slug_dir, package_id, "model_config.json") + if not os.path.isfile(config_path): + continue + try: + metadata = read_json(path=config_path) + except ValueError: + continue + if not isinstance(metadata, dict): + continue + task_type = metadata.get("task_type", "") + model_arch = metadata.get("model_architecture", "") + if task_type and model_arch: + return task_type, model_arch + + return None def model_metadata_content_is_invalid(content: Optional[Union[list, dict]]) -> bool: From 459f2b015b4df2c21d99608520607c01d3460822 Mon Sep 17 00:00:00 2001 From: probicheaux Date: Fri, 27 Mar 2026 13:57:44 +0000 Subject: [PATCH 7/9] Resolve inference-models cache path before calling from_pretrained When model weights are already cached in the inference-models layout (models-cache/{slug}/{package_id}/), pass the local directory path to AutoModel.from_pretrained() instead of the model ID. This triggers load_model_from_local_storage() which skips the API call entirely, enabling air-gapped model loading without modifying the inference-models package. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../core/models/inference_models_adapters.py | 44 ++++++++++++++++--- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/inference/core/models/inference_models_adapters.py b/inference/core/models/inference_models_adapters.py index 2b7855228e..5f34e4e8bb 100644 --- a/inference/core/models/inference_models_adapters.py +++ b/inference/core/models/inference_models_adapters.py @@ -62,6 +62,40 @@ ) from inference_models.models.base.types import PreprocessingMetadata +def _resolve_cached_model_path(model_id: str) -> str: + """If the model is already in the inference-models local cache, return the + package directory path so ``AutoModel.from_pretrained`` can load directly + from disk without calling the Roboflow API. Returns the original + *model_id* unchanged when no local cache hit is found. + """ + try: + from inference.core.cache.air_gapped import ( + _get_inference_models_home, + _slugify_model_id, + ) + from inference.core.env import MODEL_CACHE_DIR + + slug = _slugify_model_id(model_id) + bases = [MODEL_CACHE_DIR] + inference_home = _get_inference_models_home() + if inference_home is not None and inference_home != MODEL_CACHE_DIR: + bases.append(inference_home) + + for base in bases: + import os + + slug_dir = os.path.join(base, "models-cache", slug) + if not os.path.isdir(slug_dir): + continue + for package_id in os.listdir(slug_dir): + package_dir = os.path.join(slug_dir, package_id) + if os.path.isfile(os.path.join(package_dir, "model_config.json")): + return package_dir + except Exception: + pass + return model_id + + DEFAULT_COLOR_PALETTE = [ "#A351FB", "#FF4040", @@ -108,7 +142,7 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs): ) ) self._model: ObjectDetectionModel = AutoModel.from_pretrained( - model_id_or_path=model_id, + model_id_or_path=_resolve_cached_model_path(model_id), api_key=self.api_key, allow_untrusted_packages=ALLOW_INFERENCE_MODELS_UNTRUSTED_PACKAGES, allow_direct_local_storage_loading=ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES, @@ -259,7 +293,7 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs): ) ) self._model: InstanceSegmentationModel = AutoModel.from_pretrained( - model_id_or_path=model_id, + model_id_or_path=_resolve_cached_model_path(model_id), api_key=self.api_key, allow_untrusted_packages=ALLOW_INFERENCE_MODELS_UNTRUSTED_PACKAGES, allow_direct_local_storage_loading=ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES, @@ -417,7 +451,7 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs): ) ) self._model: KeyPointsDetectionModel = AutoModel.from_pretrained( - model_id_or_path=model_id, + model_id_or_path=_resolve_cached_model_path(model_id), api_key=self.api_key, allow_untrusted_packages=ALLOW_INFERENCE_MODELS_UNTRUSTED_PACKAGES, allow_direct_local_storage_loading=ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES, @@ -627,7 +661,7 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs): ) self._model: Union[ClassificationModel, MultiLabelClassificationModel] = ( AutoModel.from_pretrained( - model_id_or_path=model_id, + model_id_or_path=_resolve_cached_model_path(model_id), api_key=self.api_key, allow_untrusted_packages=ALLOW_INFERENCE_MODELS_UNTRUSTED_PACKAGES, allow_direct_local_storage_loading=ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES, @@ -913,7 +947,7 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs): ) ) self._model: SemanticSegmentationModel = AutoModel.from_pretrained( - model_id_or_path=model_id, + model_id_or_path=_resolve_cached_model_path(model_id), api_key=self.api_key, allow_untrusted_packages=ALLOW_INFERENCE_MODELS_UNTRUSTED_PACKAGES, allow_direct_local_storage_loading=ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES, From 7bbba728ead124fa6540d9ca3140d9ce9711813f Mon Sep 17 00:00:00 2001 From: Sam Beran Date: Tue, 31 Mar 2026 09:26:03 -0500 Subject: [PATCH 8/9] Restore CSRF token verification in builder routes --- inference/core/interfaces/http/builder/routes.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/inference/core/interfaces/http/builder/routes.py b/inference/core/interfaces/http/builder/routes.py index 2838ca1af1..175688e32b 100644 --- a/inference/core/interfaces/http/builder/routes.py +++ b/inference/core/interfaces/http/builder/routes.py @@ -45,7 +45,10 @@ # Dependency to verify the X-CSRF header on any protected route # ---------------------------------------------------------------- def verify_csrf_token(x_csrf: str = Header(None)): - return + if x_csrf != csrf: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, detail="Invalid CSRF token" + ) # --------------------- From e3e29ac9ab1b5ab38f1c4d397e912959cbda8f13 Mon Sep 17 00:00:00 2001 From: Sam Beran Date: Tue, 31 Mar 2026 09:29:09 -0500 Subject: [PATCH 9/9] Format inference_models_adapters.py and roboflow.py with black --- inference/core/models/inference_models_adapters.py | 1 + inference/core/registries/roboflow.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/inference/core/models/inference_models_adapters.py b/inference/core/models/inference_models_adapters.py index 5f34e4e8bb..e7f6c58252 100644 --- a/inference/core/models/inference_models_adapters.py +++ b/inference/core/models/inference_models_adapters.py @@ -62,6 +62,7 @@ ) from inference_models.models.base.types import PreprocessingMetadata + def _resolve_cached_model_path(model_id: str) -> str: """If the model is already in the inference-models local cache, return the package directory path so ``AutoModel.from_pretrained`` can load directly diff --git a/inference/core/registries/roboflow.py b/inference/core/registries/roboflow.py index dda56f6d2f..6e324b8ac7 100644 --- a/inference/core/registries/roboflow.py +++ b/inference/core/registries/roboflow.py @@ -335,7 +335,10 @@ def _get_model_metadata_from_inference_models_cache( ``{base}/models-cache/{slug}/{package_id}/model_config.json`` where *base* is ``MODEL_CACHE_DIR`` and optionally ``INFERENCE_HOME``. """ - from inference.core.cache.air_gapped import _get_inference_models_home, _slugify_model_id + from inference.core.cache.air_gapped import ( + _get_inference_models_home, + _slugify_model_id, + ) slug = _slugify_model_id(model_id)