diff --git a/interpreter/core/utils/telemetry.py b/interpreter/core/utils/telemetry.py index 2df870a09b..5ae79f1a08 100644 --- a/interpreter/core/utils/telemetry.py +++ b/interpreter/core/utils/telemetry.py @@ -1,17 +1,20 @@ """ -Sends anonymous telemetry to posthog. This helps us know how people are using OI / what needs our focus. +Sends anonymous telemetry to posthog. This helps us know how people are +using OI / what needs our focus. Disable anonymous telemetry by execute one of below: 1. Running `interpreter --disable_telemetry` in command line. 2. Executing `interpreter.disable_telemetry = True` in Python. 3. Setting the `DISABLE_TELEMETRY` os var to `true`. -based on ChromaDB's telemetry: https://github.com/chroma-core/chroma/tree/main/chromadb/telemetry/product +based on ChromaDB's telemetry: +https://github.com/chroma-core/chroma/tree/main/chromadb/telemetry/product """ import contextlib import json import os +import re import threading import uuid @@ -22,7 +25,8 @@ def get_or_create_uuid(): try: uuid_file_path = os.path.join( - os.path.expanduser("~"), ".cache", "open-interpreter", "telemetry_user_id" + os.path.expanduser("~"), + ".cache", "open-interpreter", "telemetry_user_id" ) os.makedirs( os.path.dirname(uuid_file_path), exist_ok=True @@ -44,10 +48,62 @@ def get_or_create_uuid(): user_id = get_or_create_uuid() +# --- Sanitization helpers --- + +# Matches common absolute file paths (Unix and Windows) +_PATH_PATTERN = re.compile( + r'(?:[A-Za-z]:\\|/)(?:[\w.\-]+[/\\])*[\w.\-]+' +) + +# Environment variable references like $HOME, %USERPROFILE% +_ENV_VAR_PATTERN = re.compile( + r'(?:\$[A-Z_]+|%[A-Z_]+%)' +) + +# Sensitive keys whose values should be redacted +_SENSITIVE_KEYS = frozenset({ + "api_key", "api_secret", "token", "password", "secret", + "authorization", "credential", "private_key", +}) + + +def _sanitize_value(value): + """Recursively sanitize a value, stripping file paths and sensitive data.""" + if isinstance(value, str): + # Redact absolute file paths + sanitized = _PATH_PATTERN.sub("", value) + # Redact environment variable references + sanitized = _ENV_VAR_PATTERN.sub("", sanitized) + return sanitized + elif isinstance(value, dict): + return { + k: "" if k.lower() in _SENSITIVE_KEYS else _sanitize_value(v) + for k, v in value.items() + } + elif isinstance(value, (list, tuple)): + return [_sanitize_value(item) for item in value] + return value + + +def _sanitize_properties(properties): + """ + Sanitize telemetry properties to prevent accidental leakage of + file paths, credentials, or other sensitive information in + exception stack traces or user-supplied data. + """ + if not isinstance(properties, dict): + return properties + return _sanitize_value(properties) + + def send_telemetry(event_name, properties=None): if properties is None: properties = {} properties["oi_version"] = version("open-interpreter") + + # Sanitize all properties before sending + properties = _sanitize_properties(properties) + try: url = "https://app.posthog.com/capture" headers = {"Content-Type": "application/json"} diff --git a/interpreter/terminal_interface/download_security.py b/interpreter/terminal_interface/download_security.py new file mode 100644 index 0000000000..019df0c497 --- /dev/null +++ b/interpreter/terminal_interface/download_security.py @@ -0,0 +1,70 @@ +""" +Utility for verifying the integrity of downloaded model files. + +Downloads from external sources (e.g. HuggingFace) should be verified +against known SHA-256 checksums to prevent tampered or corrupted files +from being executed. +""" + +import hashlib +import os + + +def compute_sha256(file_path, chunk_size=8192): + """Compute SHA-256 hash of a file.""" + sha256 = hashlib.sha256() + with open(file_path, "rb") as f: + while True: + chunk = f.read(chunk_size) + if not chunk: + break + sha256.update(chunk) + return sha256.hexdigest() + + +def verify_model_integrity(model_path, expected_hash=None, model_name="model"): + """ + Verify the integrity of a downloaded model file. + + Args: + model_path: Path to the downloaded file + expected_hash: Expected SHA-256 hex digest (or None if unknown) + model_name: Human-readable model name for log messages + + Returns: + True if verification passed (or no hash to verify against) + False if hash mismatch detected + + Raises: + No exceptions — always returns a boolean. Callers decide policy. + """ + if not os.path.exists(model_path): + print(f"\n⚠️ Warning: Model file not found at {model_path}") + return False + + actual_hash = compute_sha256(model_path) + + if expected_hash is None: + print( + f"\n⚠️ No SHA-256 checksum available for '{model_name}'." + f"\n Downloaded file hash: {actual_hash}" + f"\n Consider verifying this hash manually against the official source." + ) + return True # No hash to verify against — pass with warning + + if actual_hash.lower() == expected_hash.lower(): + print(f"\n✅ Integrity verified for '{model_name}' (SHA-256 match)") + return True + else: + print( + f"\n🚨 INTEGRITY CHECK FAILED for '{model_name}'!" + f"\n Expected: {expected_hash}" + f"\n Actual: {actual_hash}" + f"\n The downloaded file may be corrupted or tampered with." + f"\n Removing the suspicious file..." + ) + try: + os.remove(model_path) + except OSError: + pass + return False diff --git a/interpreter/terminal_interface/local_setup.py b/interpreter/terminal_interface/local_setup.py index 95ee192baa..3d69b1172f 100644 --- a/interpreter/terminal_interface/local_setup.py +++ b/interpreter/terminal_interface/local_setup.py @@ -11,6 +11,8 @@ import requests import wget +from interpreter.terminal_interface.download_security import verify_model_integrity + def local_setup(interpreter, provider=None, model=None): def download_model(models_dir, models, interpreter): @@ -51,72 +53,84 @@ def download_model(models_dir, models, interpreter): "file_name": "Meta-Llama-3-8B-Instruct.Q4_K_M.llamafile", "size": 4.95, "url": "https://huggingface.co/Mozilla/Meta-Llama-3.1-8B-Instruct-llamafile/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.llamafile?download=true", + "sha256": None, # TODO: populate with verified hash from HuggingFace }, { "name": "Gemma-2-9b", "file_name": "gemma-2-9b-it.Q4_K_M.llamafile", "size": 5.79, "url": "https://huggingface.co/jartine/gemma-2-9b-it-llamafile/resolve/main/gemma-2-9b-it.Q4_K_M.llamafile?download=true", + "sha256": None, }, { "name": "Phi-3-mini", "file_name": "Phi-3-mini-4k-instruct.Q4_K_M.llamafile", "size": 2.42, "url": "https://huggingface.co/Mozilla/Phi-3-mini-4k-instruct-llamafile/resolve/main/Phi-3-mini-4k-instruct.Q4_K_M.llamafile?download=true", + "sha256": None, }, { "name": "Moondream2 (vision)", "file_name": "moondream2-q5km-050824.llamafile", "size": 1.98, "url": "https://huggingface.co/cjpais/moondream2-llamafile/resolve/main/moondream2-q5km-050824.llamafile?download=true", + "sha256": None, }, { "name": "Mistral-7B-Instruct", "file_name": "Mistral-7B-Instruct-v0.3.Q4_K_M.llamafile", "size": 4.40, "url": "https://huggingface.co/Mozilla/Mistral-7B-Instruct-v0.3-llamafile/resolve/main/Mistral-7B-Instruct-v0.3.Q4_K_M.llamafile?download=true", + "sha256": None, }, { "name": "Gemma-2-27b", "file_name": "gemma-2-27b-it.Q4_K_M.llamafile", "size": 16.7, "url": "https://huggingface.co/jartine/gemma-2-27b-it-llamafile/resolve/main/gemma-2-27b-it.Q4_K_M.llamafile?download=true", + "sha256": None, }, { "name": "TinyLlama-1.1B", "file_name": "TinyLlama-1.1B-Chat-v1.0.Q4_K_M.llamafile", "size": 0.70, "url": "https://huggingface.co/Mozilla/TinyLlama-1.1B-Chat-v1.0-llamafile/resolve/main/TinyLlama-1.1B-Chat-v1.0.Q4_K_M.llamafile?download=true", + "sha256": None, }, { "name": "Rocket-3B", "file_name": "rocket-3b.Q4_K_M.llamafile", "size": 1.74, "url": "https://huggingface.co/Mozilla/rocket-3B-llamafile/resolve/main/rocket-3b.Q4_K_M.llamafile?download=true", + "sha256": None, }, { "name": "LLaVA 1.5 (vision)", "file_name": "llava-v1.5-7b-q4.llamafile", "size": 4.29, "url": "https://huggingface.co/Mozilla/llava-v1.5-7b-llamafile/resolve/main/llava-v1.5-7b-q4.llamafile?download=true", + "sha256": None, }, { "name": "WizardCoder-Python-13B", "file_name": "wizardcoder-python-13b.llamafile", "size": 7.33, "url": "https://huggingface.co/jartine/wizardcoder-13b-python/resolve/main/wizardcoder-python-13b.llamafile?download=true", + "sha256": None, }, { "name": "WizardCoder-Python-34B", "file_name": "wizardcoder-python-34b-v1.0.Q4_K_M.llamafile", "size": 20.22, "url": "https://huggingface.co/Mozilla/WizardCoder-Python-34B-V1.0-llamafile/resolve/main/wizardcoder-python-34b-v1.0.Q4_K_M.llamafile?download=true", + "sha256": None, }, { "name": "Mixtral-8x7B-Instruct", "file_name": "mixtral-8x7b-instruct-v0.1.Q5_K_M.llamafile", "size": 30.03, "url": "https://huggingface.co/jartine/Mixtral-8x7B-Instruct-v0.1-llamafile/resolve/main/mixtral-8x7b-instruct-v0.1.Q5_K_M.llamafile?download=true", + "sha256": None, }, ] @@ -164,6 +178,12 @@ def download_model(models_dir, models, interpreter): print(f"\nDownloading {selected_model['name']}...\n") wget.download(model_url, model_path) + # Verify downloaded model integrity + expected_hash = selected_model.get("sha256") + if not verify_model_integrity(model_path, expected_hash, selected_model["name"]): + print("\nDownload integrity check failed. Please try again.\n") + return None + # Make the model executable if not on Windows if platform.system() != "Windows": subprocess.run(["chmod", "+x", model_path], check=True)