Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 59 additions & 3 deletions interpreter/core/utils/telemetry.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
"""
Sends anonymous telemetry to posthog. This helps us know how people are using OI / what needs our focus.
Sends anonymous telemetry to posthog. This helps us know how people are
using OI / what needs our focus.

Disable anonymous telemetry by execute one of below:
1. Running `interpreter --disable_telemetry` in command line.
2. Executing `interpreter.disable_telemetry = True` in Python.
3. Setting the `DISABLE_TELEMETRY` os var to `true`.

based on ChromaDB's telemetry: https://github.com/chroma-core/chroma/tree/main/chromadb/telemetry/product
based on ChromaDB's telemetry:
https://github.com/chroma-core/chroma/tree/main/chromadb/telemetry/product
"""

import contextlib
import json
import os
import re
import threading
import uuid

Expand All @@ -22,7 +25,8 @@
def get_or_create_uuid():
try:
uuid_file_path = os.path.join(
os.path.expanduser("~"), ".cache", "open-interpreter", "telemetry_user_id"
os.path.expanduser("~"),
".cache", "open-interpreter", "telemetry_user_id"
)
os.makedirs(
os.path.dirname(uuid_file_path), exist_ok=True
Expand All @@ -44,10 +48,62 @@ def get_or_create_uuid():
user_id = get_or_create_uuid()


# --- Sanitization helpers ---

# Matches common absolute file paths (Unix and Windows)
_PATH_PATTERN = re.compile(
r'(?:[A-Za-z]:\\|/)(?:[\w.\-]+[/\\])*[\w.\-]+'
)

# Environment variable references like $HOME, %USERPROFILE%
_ENV_VAR_PATTERN = re.compile(
r'(?:\$[A-Z_]+|%[A-Z_]+%)'
)

# Sensitive keys whose values should be redacted
_SENSITIVE_KEYS = frozenset({
"api_key", "api_secret", "token", "password", "secret",
"authorization", "credential", "private_key",
})


def _sanitize_value(value):
"""Recursively sanitize a value, stripping file paths and sensitive data."""
if isinstance(value, str):
# Redact absolute file paths
sanitized = _PATH_PATTERN.sub("<path>", value)
# Redact environment variable references
sanitized = _ENV_VAR_PATTERN.sub("<env>", sanitized)
return sanitized
elif isinstance(value, dict):
return {
k: "<redacted>" if k.lower() in _SENSITIVE_KEYS else _sanitize_value(v)
for k, v in value.items()
}
elif isinstance(value, (list, tuple)):
return [_sanitize_value(item) for item in value]
return value


def _sanitize_properties(properties):
"""
Sanitize telemetry properties to prevent accidental leakage of
file paths, credentials, or other sensitive information in
exception stack traces or user-supplied data.
"""
if not isinstance(properties, dict):
return properties
return _sanitize_value(properties)


def send_telemetry(event_name, properties=None):
if properties is None:
properties = {}
properties["oi_version"] = version("open-interpreter")

# Sanitize all properties before sending
properties = _sanitize_properties(properties)

try:
url = "https://app.posthog.com/capture"
headers = {"Content-Type": "application/json"}
Expand Down
70 changes: 70 additions & 0 deletions interpreter/terminal_interface/download_security.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""
Utility for verifying the integrity of downloaded model files.

Downloads from external sources (e.g. HuggingFace) should be verified
against known SHA-256 checksums to prevent tampered or corrupted files
from being executed.
"""

import hashlib
import os


def compute_sha256(file_path, chunk_size=8192):
"""Compute SHA-256 hash of a file."""
sha256 = hashlib.sha256()
with open(file_path, "rb") as f:
while True:
chunk = f.read(chunk_size)
if not chunk:
break
sha256.update(chunk)
return sha256.hexdigest()


def verify_model_integrity(model_path, expected_hash=None, model_name="model"):
"""
Verify the integrity of a downloaded model file.

Args:
model_path: Path to the downloaded file
expected_hash: Expected SHA-256 hex digest (or None if unknown)
model_name: Human-readable model name for log messages

Returns:
True if verification passed (or no hash to verify against)
False if hash mismatch detected

Raises:
No exceptions — always returns a boolean. Callers decide policy.
"""
if not os.path.exists(model_path):
print(f"\n⚠️ Warning: Model file not found at {model_path}")
return False

actual_hash = compute_sha256(model_path)

if expected_hash is None:
print(
f"\n⚠️ No SHA-256 checksum available for '{model_name}'."
f"\n Downloaded file hash: {actual_hash}"
f"\n Consider verifying this hash manually against the official source."
)
return True # No hash to verify against — pass with warning

if actual_hash.lower() == expected_hash.lower():
print(f"\n✅ Integrity verified for '{model_name}' (SHA-256 match)")
return True
else:
print(
f"\n🚨 INTEGRITY CHECK FAILED for '{model_name}'!"
f"\n Expected: {expected_hash}"
f"\n Actual: {actual_hash}"
f"\n The downloaded file may be corrupted or tampered with."
f"\n Removing the suspicious file..."
)
try:
os.remove(model_path)
except OSError:
pass
return False
20 changes: 20 additions & 0 deletions interpreter/terminal_interface/local_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import requests
import wget

from interpreter.terminal_interface.download_security import verify_model_integrity


def local_setup(interpreter, provider=None, model=None):
def download_model(models_dir, models, interpreter):
Expand Down Expand Up @@ -51,72 +53,84 @@ def download_model(models_dir, models, interpreter):
"file_name": "Meta-Llama-3-8B-Instruct.Q4_K_M.llamafile",
"size": 4.95,
"url": "https://huggingface.co/Mozilla/Meta-Llama-3.1-8B-Instruct-llamafile/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.llamafile?download=true",
"sha256": None, # TODO: populate with verified hash from HuggingFace
},
{
"name": "Gemma-2-9b",
"file_name": "gemma-2-9b-it.Q4_K_M.llamafile",
"size": 5.79,
"url": "https://huggingface.co/jartine/gemma-2-9b-it-llamafile/resolve/main/gemma-2-9b-it.Q4_K_M.llamafile?download=true",
"sha256": None,
},
{
"name": "Phi-3-mini",
"file_name": "Phi-3-mini-4k-instruct.Q4_K_M.llamafile",
"size": 2.42,
"url": "https://huggingface.co/Mozilla/Phi-3-mini-4k-instruct-llamafile/resolve/main/Phi-3-mini-4k-instruct.Q4_K_M.llamafile?download=true",
"sha256": None,
},
{
"name": "Moondream2 (vision)",
"file_name": "moondream2-q5km-050824.llamafile",
"size": 1.98,
"url": "https://huggingface.co/cjpais/moondream2-llamafile/resolve/main/moondream2-q5km-050824.llamafile?download=true",
"sha256": None,
},
{
"name": "Mistral-7B-Instruct",
"file_name": "Mistral-7B-Instruct-v0.3.Q4_K_M.llamafile",
"size": 4.40,
"url": "https://huggingface.co/Mozilla/Mistral-7B-Instruct-v0.3-llamafile/resolve/main/Mistral-7B-Instruct-v0.3.Q4_K_M.llamafile?download=true",
"sha256": None,
},
{
"name": "Gemma-2-27b",
"file_name": "gemma-2-27b-it.Q4_K_M.llamafile",
"size": 16.7,
"url": "https://huggingface.co/jartine/gemma-2-27b-it-llamafile/resolve/main/gemma-2-27b-it.Q4_K_M.llamafile?download=true",
"sha256": None,
},
{
"name": "TinyLlama-1.1B",
"file_name": "TinyLlama-1.1B-Chat-v1.0.Q4_K_M.llamafile",
"size": 0.70,
"url": "https://huggingface.co/Mozilla/TinyLlama-1.1B-Chat-v1.0-llamafile/resolve/main/TinyLlama-1.1B-Chat-v1.0.Q4_K_M.llamafile?download=true",
"sha256": None,
},
{
"name": "Rocket-3B",
"file_name": "rocket-3b.Q4_K_M.llamafile",
"size": 1.74,
"url": "https://huggingface.co/Mozilla/rocket-3B-llamafile/resolve/main/rocket-3b.Q4_K_M.llamafile?download=true",
"sha256": None,
},
{
"name": "LLaVA 1.5 (vision)",
"file_name": "llava-v1.5-7b-q4.llamafile",
"size": 4.29,
"url": "https://huggingface.co/Mozilla/llava-v1.5-7b-llamafile/resolve/main/llava-v1.5-7b-q4.llamafile?download=true",
"sha256": None,
},
{
"name": "WizardCoder-Python-13B",
"file_name": "wizardcoder-python-13b.llamafile",
"size": 7.33,
"url": "https://huggingface.co/jartine/wizardcoder-13b-python/resolve/main/wizardcoder-python-13b.llamafile?download=true",
"sha256": None,
},
{
"name": "WizardCoder-Python-34B",
"file_name": "wizardcoder-python-34b-v1.0.Q4_K_M.llamafile",
"size": 20.22,
"url": "https://huggingface.co/Mozilla/WizardCoder-Python-34B-V1.0-llamafile/resolve/main/wizardcoder-python-34b-v1.0.Q4_K_M.llamafile?download=true",
"sha256": None,
},
{
"name": "Mixtral-8x7B-Instruct",
"file_name": "mixtral-8x7b-instruct-v0.1.Q5_K_M.llamafile",
"size": 30.03,
"url": "https://huggingface.co/jartine/Mixtral-8x7B-Instruct-v0.1-llamafile/resolve/main/mixtral-8x7b-instruct-v0.1.Q5_K_M.llamafile?download=true",
"sha256": None,
},
]

Expand Down Expand Up @@ -164,6 +178,12 @@ def download_model(models_dir, models, interpreter):
print(f"\nDownloading {selected_model['name']}...\n")
wget.download(model_url, model_path)

# Verify downloaded model integrity
expected_hash = selected_model.get("sha256")
if not verify_model_integrity(model_path, expected_hash, selected_model["name"]):
print("\nDownload integrity check failed. Please try again.\n")
return None

# Make the model executable if not on Windows
if platform.system() != "Windows":
subprocess.run(["chmod", "+x", model_path], check=True)
Expand Down