From b6a701b9a1b9f2d2822d53aae4a7c80a4ff08b9b Mon Sep 17 00:00:00 2001 From: LLM Vector Engineer Date: Tue, 7 Apr 2026 22:56:33 +0400 Subject: [PATCH 1/3] fix(telemetry): sanitize properties to prevent leaking file paths and credentials Added sanitization layer that: - Strips absolute file paths (Unix/Windows) from all telemetry properties - Redacts environment variable references ($HOME, %USERPROFILE%, etc.) - Redacts values of sensitive keys (api_key, password, token, etc.) - Recursively sanitizes nested dicts and lists This prevents accidental leakage of local file paths, credentials, or other PII that may appear in exception stack traces sent via telemetry. --- interpreter/core/utils/telemetry.py | 62 +++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/interpreter/core/utils/telemetry.py b/interpreter/core/utils/telemetry.py index 2df870a09b..5ae79f1a08 100644 --- a/interpreter/core/utils/telemetry.py +++ b/interpreter/core/utils/telemetry.py @@ -1,17 +1,20 @@ """ -Sends anonymous telemetry to posthog. This helps us know how people are using OI / what needs our focus. +Sends anonymous telemetry to posthog. This helps us know how people are +using OI / what needs our focus. Disable anonymous telemetry by execute one of below: 1. Running `interpreter --disable_telemetry` in command line. 2. Executing `interpreter.disable_telemetry = True` in Python. 3. Setting the `DISABLE_TELEMETRY` os var to `true`. -based on ChromaDB's telemetry: https://github.com/chroma-core/chroma/tree/main/chromadb/telemetry/product +based on ChromaDB's telemetry: +https://github.com/chroma-core/chroma/tree/main/chromadb/telemetry/product """ import contextlib import json import os +import re import threading import uuid @@ -22,7 +25,8 @@ def get_or_create_uuid(): try: uuid_file_path = os.path.join( - os.path.expanduser("~"), ".cache", "open-interpreter", "telemetry_user_id" + os.path.expanduser("~"), + ".cache", "open-interpreter", "telemetry_user_id" ) os.makedirs( os.path.dirname(uuid_file_path), exist_ok=True @@ -44,10 +48,62 @@ def get_or_create_uuid(): user_id = get_or_create_uuid() +# --- Sanitization helpers --- + +# Matches common absolute file paths (Unix and Windows) +_PATH_PATTERN = re.compile( + r'(?:[A-Za-z]:\\|/)(?:[\w.\-]+[/\\])*[\w.\-]+' +) + +# Environment variable references like $HOME, %USERPROFILE% +_ENV_VAR_PATTERN = re.compile( + r'(?:\$[A-Z_]+|%[A-Z_]+%)' +) + +# Sensitive keys whose values should be redacted +_SENSITIVE_KEYS = frozenset({ + "api_key", "api_secret", "token", "password", "secret", + "authorization", "credential", "private_key", +}) + + +def _sanitize_value(value): + """Recursively sanitize a value, stripping file paths and sensitive data.""" + if isinstance(value, str): + # Redact absolute file paths + sanitized = _PATH_PATTERN.sub("", value) + # Redact environment variable references + sanitized = _ENV_VAR_PATTERN.sub("", sanitized) + return sanitized + elif isinstance(value, dict): + return { + k: "" if k.lower() in _SENSITIVE_KEYS else _sanitize_value(v) + for k, v in value.items() + } + elif isinstance(value, (list, tuple)): + return [_sanitize_value(item) for item in value] + return value + + +def _sanitize_properties(properties): + """ + Sanitize telemetry properties to prevent accidental leakage of + file paths, credentials, or other sensitive information in + exception stack traces or user-supplied data. + """ + if not isinstance(properties, dict): + return properties + return _sanitize_value(properties) + + def send_telemetry(event_name, properties=None): if properties is None: properties = {} properties["oi_version"] = version("open-interpreter") + + # Sanitize all properties before sending + properties = _sanitize_properties(properties) + try: url = "https://app.posthog.com/capture" headers = {"Content-Type": "application/json"} From dab567c543ad1a1673e2baac4233ec8bc2cbe78d Mon Sep 17 00:00:00 2001 From: LLM Vector Engineer Date: Tue, 7 Apr 2026 22:57:11 +0400 Subject: [PATCH 2/3] feat(security): add SHA-256 hash verification for model downloads New utility module that: - Computes SHA-256 checksums of downloaded model files - Verifies against expected hashes when available - Warns users when no hash is available for verification - Automatically removes files that fail integrity checks This addresses the risk of tampered or corrupted model files being downloaded and executed without any integrity verification. --- .../terminal_interface/download_security.py | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 interpreter/terminal_interface/download_security.py diff --git a/interpreter/terminal_interface/download_security.py b/interpreter/terminal_interface/download_security.py new file mode 100644 index 0000000000..019df0c497 --- /dev/null +++ b/interpreter/terminal_interface/download_security.py @@ -0,0 +1,70 @@ +""" +Utility for verifying the integrity of downloaded model files. + +Downloads from external sources (e.g. HuggingFace) should be verified +against known SHA-256 checksums to prevent tampered or corrupted files +from being executed. +""" + +import hashlib +import os + + +def compute_sha256(file_path, chunk_size=8192): + """Compute SHA-256 hash of a file.""" + sha256 = hashlib.sha256() + with open(file_path, "rb") as f: + while True: + chunk = f.read(chunk_size) + if not chunk: + break + sha256.update(chunk) + return sha256.hexdigest() + + +def verify_model_integrity(model_path, expected_hash=None, model_name="model"): + """ + Verify the integrity of a downloaded model file. + + Args: + model_path: Path to the downloaded file + expected_hash: Expected SHA-256 hex digest (or None if unknown) + model_name: Human-readable model name for log messages + + Returns: + True if verification passed (or no hash to verify against) + False if hash mismatch detected + + Raises: + No exceptions — always returns a boolean. Callers decide policy. + """ + if not os.path.exists(model_path): + print(f"\n⚠️ Warning: Model file not found at {model_path}") + return False + + actual_hash = compute_sha256(model_path) + + if expected_hash is None: + print( + f"\n⚠️ No SHA-256 checksum available for '{model_name}'." + f"\n Downloaded file hash: {actual_hash}" + f"\n Consider verifying this hash manually against the official source." + ) + return True # No hash to verify against — pass with warning + + if actual_hash.lower() == expected_hash.lower(): + print(f"\n✅ Integrity verified for '{model_name}' (SHA-256 match)") + return True + else: + print( + f"\n🚨 INTEGRITY CHECK FAILED for '{model_name}'!" + f"\n Expected: {expected_hash}" + f"\n Actual: {actual_hash}" + f"\n The downloaded file may be corrupted or tampered with." + f"\n Removing the suspicious file..." + ) + try: + os.remove(model_path) + except OSError: + pass + return False From b980d4108e22dc48ab1bdf1e45be5128a5717b55 Mon Sep 17 00:00:00 2001 From: LLM Vector Engineer Date: Tue, 7 Apr 2026 23:00:17 +0400 Subject: [PATCH 3/3] feat(security): add SHA-256 integrity verification for model downloads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Import and call verify_model_integrity() after each wget.download() - Add 'sha256' field to all model entries (None for now — maintainers should populate with verified hashes from HuggingFace) - If hash verification fails, the corrupted file is automatically removed and download_model() returns None - When no hash is provided, a warning is printed with the computed hash so users can verify manually This prevents execution of tampered or corrupted model files that are downloaded from external sources without any integrity check. --- interpreter/terminal_interface/local_setup.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/interpreter/terminal_interface/local_setup.py b/interpreter/terminal_interface/local_setup.py index 95ee192baa..3d69b1172f 100644 --- a/interpreter/terminal_interface/local_setup.py +++ b/interpreter/terminal_interface/local_setup.py @@ -11,6 +11,8 @@ import requests import wget +from interpreter.terminal_interface.download_security import verify_model_integrity + def local_setup(interpreter, provider=None, model=None): def download_model(models_dir, models, interpreter): @@ -51,72 +53,84 @@ def download_model(models_dir, models, interpreter): "file_name": "Meta-Llama-3-8B-Instruct.Q4_K_M.llamafile", "size": 4.95, "url": "https://huggingface.co/Mozilla/Meta-Llama-3.1-8B-Instruct-llamafile/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.llamafile?download=true", + "sha256": None, # TODO: populate with verified hash from HuggingFace }, { "name": "Gemma-2-9b", "file_name": "gemma-2-9b-it.Q4_K_M.llamafile", "size": 5.79, "url": "https://huggingface.co/jartine/gemma-2-9b-it-llamafile/resolve/main/gemma-2-9b-it.Q4_K_M.llamafile?download=true", + "sha256": None, }, { "name": "Phi-3-mini", "file_name": "Phi-3-mini-4k-instruct.Q4_K_M.llamafile", "size": 2.42, "url": "https://huggingface.co/Mozilla/Phi-3-mini-4k-instruct-llamafile/resolve/main/Phi-3-mini-4k-instruct.Q4_K_M.llamafile?download=true", + "sha256": None, }, { "name": "Moondream2 (vision)", "file_name": "moondream2-q5km-050824.llamafile", "size": 1.98, "url": "https://huggingface.co/cjpais/moondream2-llamafile/resolve/main/moondream2-q5km-050824.llamafile?download=true", + "sha256": None, }, { "name": "Mistral-7B-Instruct", "file_name": "Mistral-7B-Instruct-v0.3.Q4_K_M.llamafile", "size": 4.40, "url": "https://huggingface.co/Mozilla/Mistral-7B-Instruct-v0.3-llamafile/resolve/main/Mistral-7B-Instruct-v0.3.Q4_K_M.llamafile?download=true", + "sha256": None, }, { "name": "Gemma-2-27b", "file_name": "gemma-2-27b-it.Q4_K_M.llamafile", "size": 16.7, "url": "https://huggingface.co/jartine/gemma-2-27b-it-llamafile/resolve/main/gemma-2-27b-it.Q4_K_M.llamafile?download=true", + "sha256": None, }, { "name": "TinyLlama-1.1B", "file_name": "TinyLlama-1.1B-Chat-v1.0.Q4_K_M.llamafile", "size": 0.70, "url": "https://huggingface.co/Mozilla/TinyLlama-1.1B-Chat-v1.0-llamafile/resolve/main/TinyLlama-1.1B-Chat-v1.0.Q4_K_M.llamafile?download=true", + "sha256": None, }, { "name": "Rocket-3B", "file_name": "rocket-3b.Q4_K_M.llamafile", "size": 1.74, "url": "https://huggingface.co/Mozilla/rocket-3B-llamafile/resolve/main/rocket-3b.Q4_K_M.llamafile?download=true", + "sha256": None, }, { "name": "LLaVA 1.5 (vision)", "file_name": "llava-v1.5-7b-q4.llamafile", "size": 4.29, "url": "https://huggingface.co/Mozilla/llava-v1.5-7b-llamafile/resolve/main/llava-v1.5-7b-q4.llamafile?download=true", + "sha256": None, }, { "name": "WizardCoder-Python-13B", "file_name": "wizardcoder-python-13b.llamafile", "size": 7.33, "url": "https://huggingface.co/jartine/wizardcoder-13b-python/resolve/main/wizardcoder-python-13b.llamafile?download=true", + "sha256": None, }, { "name": "WizardCoder-Python-34B", "file_name": "wizardcoder-python-34b-v1.0.Q4_K_M.llamafile", "size": 20.22, "url": "https://huggingface.co/Mozilla/WizardCoder-Python-34B-V1.0-llamafile/resolve/main/wizardcoder-python-34b-v1.0.Q4_K_M.llamafile?download=true", + "sha256": None, }, { "name": "Mixtral-8x7B-Instruct", "file_name": "mixtral-8x7b-instruct-v0.1.Q5_K_M.llamafile", "size": 30.03, "url": "https://huggingface.co/jartine/Mixtral-8x7B-Instruct-v0.1-llamafile/resolve/main/mixtral-8x7b-instruct-v0.1.Q5_K_M.llamafile?download=true", + "sha256": None, }, ] @@ -164,6 +178,12 @@ def download_model(models_dir, models, interpreter): print(f"\nDownloading {selected_model['name']}...\n") wget.download(model_url, model_path) + # Verify downloaded model integrity + expected_hash = selected_model.get("sha256") + if not verify_model_integrity(model_path, expected_hash, selected_model["name"]): + print("\nDownload integrity check failed. Please try again.\n") + return None + # Make the model executable if not on Windows if platform.system() != "Windows": subprocess.run(["chmod", "+x", model_path], check=True)