Skip to content
Draft
2 changes: 2 additions & 0 deletions inference/core/interfaces/http/handlers/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
)
from inference.core.workflows.prototypes.block import BlockAirGappedInfo

logger = logging.getLogger(__name__)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this needed

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure, probably not



def handle_describe_workflows_blocks_request(
dynamic_blocks_definitions: Optional[List[DynamicBlockDefinition]] = None,
Expand Down
44 changes: 39 additions & 5 deletions inference/core/models/inference_models_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,40 @@
)
from inference_models.models.base.types import PreprocessingMetadata

def _resolve_cached_model_path(model_id: str) -> str:
"""If the model is already in the inference-models local cache, return the
package directory path so ``AutoModel.from_pretrained`` can load directly
from disk without calling the Roboflow API. Returns the original
*model_id* unchanged when no local cache hit is found.
"""
try:
from inference.core.cache.air_gapped import (
_get_inference_models_home,
_slugify_model_id,
)
from inference.core.env import MODEL_CACHE_DIR

slug = _slugify_model_id(model_id)
bases = [MODEL_CACHE_DIR]
inference_home = _get_inference_models_home()
if inference_home is not None and inference_home != MODEL_CACHE_DIR:
bases.append(inference_home)

for base in bases:
import os

slug_dir = os.path.join(base, "models-cache", slug)
if not os.path.isdir(slug_dir):
continue
for package_id in os.listdir(slug_dir):
package_dir = os.path.join(slug_dir, package_id)
if os.path.isfile(os.path.join(package_dir, "model_config.json")):
return package_dir
except Exception:
pass
return model_id


DEFAULT_COLOR_PALETTE = [
"#A351FB",
"#FF4040",
Expand Down Expand Up @@ -108,7 +142,7 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
)
)
self._model: ObjectDetectionModel = AutoModel.from_pretrained(
model_id_or_path=model_id,
model_id_or_path=_resolve_cached_model_path(model_id),
api_key=self.api_key,
allow_untrusted_packages=ALLOW_INFERENCE_MODELS_UNTRUSTED_PACKAGES,
allow_direct_local_storage_loading=ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES,
Expand Down Expand Up @@ -259,7 +293,7 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
)
)
self._model: InstanceSegmentationModel = AutoModel.from_pretrained(
model_id_or_path=model_id,
model_id_or_path=_resolve_cached_model_path(model_id),
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why this is needed?
wouldn't just work to extend auto-loading cache expiry? It loads for me even in detached mode and you will most likely hit ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES guard in standard setup

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than manually loading from cache, we can extend auto loader TTL, and we will need to add a flag to throw an error rather than loading from API when files are missing or integrity is not verified. This will be a trivial change to AutoLoader.

api_key=self.api_key,
allow_untrusted_packages=ALLOW_INFERENCE_MODELS_UNTRUSTED_PACKAGES,
allow_direct_local_storage_loading=ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES,
Expand Down Expand Up @@ -417,7 +451,7 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
)
)
self._model: KeyPointsDetectionModel = AutoModel.from_pretrained(
model_id_or_path=model_id,
model_id_or_path=_resolve_cached_model_path(model_id),
api_key=self.api_key,
allow_untrusted_packages=ALLOW_INFERENCE_MODELS_UNTRUSTED_PACKAGES,
allow_direct_local_storage_loading=ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES,
Expand Down Expand Up @@ -627,7 +661,7 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
)
self._model: Union[ClassificationModel, MultiLabelClassificationModel] = (
AutoModel.from_pretrained(
model_id_or_path=model_id,
model_id_or_path=_resolve_cached_model_path(model_id),
api_key=self.api_key,
allow_untrusted_packages=ALLOW_INFERENCE_MODELS_UNTRUSTED_PACKAGES,
allow_direct_local_storage_loading=ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES,
Expand Down Expand Up @@ -913,7 +947,7 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
)
)
self._model: SemanticSegmentationModel = AutoModel.from_pretrained(
model_id_or_path=model_id,
model_id_or_path=_resolve_cached_model_path(model_id),
api_key=self.api_key,
allow_untrusted_packages=ALLOW_INFERENCE_MODELS_UNTRUSTED_PACKAGES,
allow_direct_local_storage_loading=ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES,
Expand Down
74 changes: 62 additions & 12 deletions inference/core/registries/roboflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,21 +299,71 @@ def get_model_metadata_from_cache(
def _get_model_metadata_from_cache(
dataset_id: Union[DatasetID, ModelID], version_id: Optional[VersionID]
) -> Optional[Tuple[TaskType, ModelType]]:
# Layout 1: traditional model_type.json
model_type_cache_path = construct_model_type_cache_path(
dataset_id=dataset_id, version_id=version_id
)
if not os.path.isfile(model_type_cache_path):
return None
try:
model_metadata = read_json(path=model_type_cache_path)
if model_metadata_content_is_invalid(content=model_metadata):
return None
return model_metadata[PROJECT_TASK_TYPE_KEY], model_metadata[MODEL_TYPE_KEY]
except ValueError as e:
logger.warning(
f"Could not load model description from cache under path: {model_type_cache_path} - decoding issue: {e}."
)
return None
if os.path.isfile(model_type_cache_path):
try:
model_metadata = read_json(path=model_type_cache_path)
if not model_metadata_content_is_invalid(content=model_metadata):
return (
model_metadata[PROJECT_TASK_TYPE_KEY],
model_metadata[MODEL_TYPE_KEY],
)
except ValueError as e:
logger.warning(
f"Could not load model description from cache under path: "
f"{model_type_cache_path} - decoding issue: {e}."
)

# Layout 2: inference-models model_config.json
model_id = f"{dataset_id}/{version_id}" if version_id else dataset_id
result = _get_model_metadata_from_inference_models_cache(model_id)
if result is not None:
return result

return None


def _get_model_metadata_from_inference_models_cache(
model_id: str,
) -> Optional[Tuple[TaskType, ModelType]]:
"""Check the inference-models cache layout for model metadata.

Looks for ``model_config.json`` under
``{base}/models-cache/{slug}/{package_id}/model_config.json``
where *base* is ``MODEL_CACHE_DIR`` and optionally ``INFERENCE_HOME``.
"""
from inference.core.cache.air_gapped import _get_inference_models_home, _slugify_model_id

slug = _slugify_model_id(model_id)

bases = [MODEL_CACHE_DIR]
inference_home = _get_inference_models_home()
if inference_home is not None and inference_home != MODEL_CACHE_DIR:
bases.append(inference_home)

for base in bases:
slug_dir = os.path.join(base, "models-cache", slug)
if not os.path.isdir(slug_dir):
continue
for package_id in os.listdir(slug_dir):
config_path = os.path.join(slug_dir, package_id, "model_config.json")
if not os.path.isfile(config_path):
continue
try:
metadata = read_json(path=config_path)
except ValueError:
continue
if not isinstance(metadata, dict):
continue
task_type = metadata.get("task_type", "")
model_arch = metadata.get("model_architecture", "")
if task_type and model_arch:
return task_type, model_arch

return None


def model_metadata_content_is_invalid(content: Optional[Union[list, dict]]) -> bool:
Expand Down
15 changes: 10 additions & 5 deletions inference_models/inference_models/models/auto_loaders/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1292,6 +1292,7 @@ def initialize_model(
task_type=task_type,
backend_type=model_package.backend,
file_lock_acquire_timeout=model_download_file_lock_acquire_timeout,
model_id=model_id,
on_file_created=on_file_created,
)
resolved_files = set(shared_files_mapping.values())
Expand Down Expand Up @@ -1392,6 +1393,7 @@ def dump_model_config_for_offline_use(
task_type: TaskType,
backend_type: Optional[BackendType],
file_lock_acquire_timeout: int,
model_id: Optional[str] = None,
on_file_created: Optional[Callable[[str], None]] = None,
) -> None:
if os.path.exists(config_path):
Expand All @@ -1400,14 +1402,17 @@ def dump_model_config_for_offline_use(
return None
target_file_dir, target_file_name = os.path.split(config_path)
lock_path = os.path.join(target_file_dir, f".{target_file_name}.lock")
content = {
"model_architecture": model_architecture,
"task_type": task_type,
"backend_type": backend_type,
}
if model_id is not None:
content["model_id"] = model_id
with FileLock(lock_path, timeout=file_lock_acquire_timeout):
dump_json(
path=config_path,
content={
"model_architecture": model_architecture,
"task_type": task_type,
"backend_type": backend_type,
},
content=content,
)
if on_file_created:
on_file_created(config_path)
Expand Down
Loading