diff --git a/claude-code-tracing/hooks/notification.sh b/claude-code-tracing/hooks/notification.sh index 6daafab..1467b5e 100644 --- a/claude-code-tracing/hooks/notification.sh +++ b/claude-code-tracing/hooks/notification.sh @@ -14,6 +14,8 @@ trace_id=$(get_state "current_trace_id") session_id=$(get_state "session_id") message=$(echo "$input" | jq -r '.message // empty' 2>/dev/null || echo "") title=$(echo "$input" | jq -r '.title // empty' 2>/dev/null || echo "") +message=$(redact_content "$ARIZE_LOG_PROMPTS" "$message") +title=$(redact_content "$ARIZE_LOG_PROMPTS" "$title") notif_type=$(echo "$input" | jq -r '.notification_type // "info"' 2>/dev/null || echo "info") span_id=$(generate_uuid | tr -d '-' | cut -c1-16) diff --git a/claude-code-tracing/hooks/permission_request.sh b/claude-code-tracing/hooks/permission_request.sh index 2c36c99..cc2af72 100644 --- a/claude-code-tracing/hooks/permission_request.sh +++ b/claude-code-tracing/hooks/permission_request.sh @@ -16,6 +16,7 @@ trace_id=$(get_state "current_trace_id") permission=$(echo "$input" | jq -r '.permission // empty' 2>/dev/null || echo "") tool=$(echo "$input" | jq -r '.tool_name // empty' 2>/dev/null || echo "") tool_input=$(echo "$input" | jq -c '.tool_input // empty' 2>/dev/null || echo "") +tool_input=$(redact_content "$ARIZE_LOG_TOOL_DETAILS" "$tool_input") span_id=$(generate_uuid | tr -d '-' | cut -c1-16) ts=$(get_timestamp_ms) diff --git a/claude-code-tracing/hooks/post_tool_use.sh b/claude-code-tracing/hooks/post_tool_use.sh index 5247dec..84fb972 100644 --- a/claude-code-tracing/hooks/post_tool_use.sh +++ b/claude-code-tracing/hooks/post_tool_use.sh @@ -73,6 +73,15 @@ span_id=$(generate_uuid | tr -d '-' | cut -c1-16) user_id=$(get_state "user_id") +# Redact content based on logging level +tool_input=$(redact_content "$ARIZE_LOG_TOOL_CONTENT" "$tool_input") +tool_response=$(redact_content "$ARIZE_LOG_TOOL_CONTENT" "$tool_response") +tool_description=$(redact_content "$ARIZE_LOG_TOOL_DETAILS" "$tool_description") +[[ -n "$tool_command" ]] && tool_command=$(redact_content "$ARIZE_LOG_TOOL_DETAILS" "$tool_command") +[[ -n "$tool_file_path" ]] && tool_file_path=$(redact_content "$ARIZE_LOG_TOOL_DETAILS" "$tool_file_path") +[[ -n "$tool_url" ]] && tool_url=$(redact_content "$ARIZE_LOG_TOOL_DETAILS" "$tool_url") +[[ -n "$tool_query" ]] && tool_query=$(redact_content "$ARIZE_LOG_TOOL_DETAILS" "$tool_query") + # Build base attributes attrs=$(jq -n \ --arg sid "$session_id" --arg tool "$tool_name" \ diff --git a/claude-code-tracing/hooks/stop.sh b/claude-code-tracing/hooks/stop.sh index cff47ad..4dc45f7 100644 --- a/claude-code-tracing/hooks/stop.sh +++ b/claude-code-tracing/hooks/stop.sh @@ -53,6 +53,9 @@ fi output=$(printf '%s' "$output" | head -c 5000) [[ -z "$output" ]] && output="(No response)" +# Prompt is already redacted at capture time in user_prompt_submit.sh. +# Model responses are always included — they are the primary value of tracing. + # Compute total token count total_tokens=$((in_tokens + out_tokens)) diff --git a/claude-code-tracing/hooks/subagent_stop.sh b/claude-code-tracing/hooks/subagent_stop.sh index f9dbe62..7ae78cc 100644 --- a/claude-code-tracing/hooks/subagent_stop.sh +++ b/claude-code-tracing/hooks/subagent_stop.sh @@ -69,6 +69,9 @@ if [[ -n "$transcript_path" && -f "$transcript_path" ]]; then subagent_output=$(echo "$subagent_output" | head -c 5000) fi +# Redact subagent output unless opted in +subagent_output=$(redact_content "$ARIZE_LOG_TOOL_CONTENT" "$subagent_output") + # Fall back to current time if no start time found [[ -z "$start_time" ]] && start_time="$end_time" diff --git a/claude-code-tracing/hooks/user_prompt_submit.sh b/claude-code-tracing/hooks/user_prompt_submit.sh index 6046d61..c07d520 100644 --- a/claude-code-tracing/hooks/user_prompt_submit.sh +++ b/claude-code-tracing/hooks/user_prompt_submit.sh @@ -46,7 +46,9 @@ inc_state "trace_count" set_state "current_trace_id" "$(generate_uuid | tr -d '-')" set_state "current_trace_span_id" "$(generate_uuid | tr -d '-' | cut -c1-16)" set_state "current_trace_start_time" "$(get_timestamp_ms)" -set_state "current_trace_prompt" "$(echo "$input" | jq -r '.prompt // empty' 2>/dev/null | head -c 1000)" +prompt=$(echo "$input" | jq -r '.prompt // empty' 2>/dev/null | head -c 1000) +prompt=$(redact_content "$ARIZE_LOG_PROMPTS" "$prompt") +set_state "current_trace_prompt" "$prompt" # Track transcript position for parsing AI response later transcript=$(echo "$input" | jq -r '.transcript_path // empty' 2>/dev/null || echo "") diff --git a/claude-code-tracing/scripts/setup.sh b/claude-code-tracing/scripts/setup.sh index 1b40294..4705f56 100644 --- a/claude-code-tracing/scripts/setup.sh +++ b/claude-code-tracing/scripts/setup.sh @@ -87,7 +87,8 @@ case "$choice" in 2|arize|ax|AX) echo "" - read -p "Arize API Key: " api_key + read -sp "Arize API Key: " api_key + echo "" read -p "Arize Space ID: " space_id if [[ -z "$api_key" || -z "$space_id" ]]; then @@ -130,6 +131,32 @@ if [[ -n "$user_id" ]]; then echo -e "${GREEN}✓${NC} User ID set: $user_id" fi +# Content logging levels +echo "" +echo -e "${YELLOW}Security:${NC} Traces can contain sensitive data — credentials, PII, file contents." +echo "Tool details and tool content are redacted by default. Prompts are included" +echo "by default for debugging visibility. Adjust these settings to match your" +echo "security requirements." +echo "" + +echo -e " Log user prompts? [Y/n]: \c" +read -p "" log_prompts +echo -e " Log what tools were asked to do (commands, file paths, URLs)? [y/N]: \c" +read -p "" log_tool_details +echo -e " Log what tools returned (file contents, command output)? [y/N]: \c" +read -p "" log_tool_content + +log_env='{"ARIZE_LOG_PROMPTS": "true"}' +[[ "$log_prompts" =~ ^[Nn]$ ]] && log_env='{}' +[[ "$log_tool_details" =~ ^[Yy]$ ]] && log_env=$(echo "$log_env" | jq '. + {"ARIZE_LOG_TOOL_DETAILS": "true"}') +[[ "$log_tool_content" =~ ^[Yy]$ ]] && log_env=$(echo "$log_env" | jq '. + {"ARIZE_LOG_TOOL_CONTENT": "true"}') + +if [[ "$log_env" != "{}" ]]; then + ensure_settings_file + jq --argjson logging "$log_env" '.env = (.env // {}) + $logging' \ + "$SETTINGS_FILE" > "${SETTINGS_FILE}.tmp" && mv "${SETTINGS_FILE}.tmp" "$SETTINGS_FILE" +fi + echo "" echo "Configuration saved to $SETTINGS_FILE" echo "" diff --git a/codex-tracing/hooks/notify.sh b/codex-tracing/hooks/notify.sh index f380b98..eea7ba5 100644 --- a/codex-tracing/hooks/notify.sh +++ b/codex-tracing/hooks/notify.sh @@ -87,6 +87,9 @@ fi user_prompt=$(printf '%s' "$user_prompt" | head -c 5000) assistant_output=$(printf '%s' "$assistant_output" | head -c 5000) [[ -z "$assistant_output" ]] && assistant_output="(No response)" + +# Redact user prompt unless opted in. Model responses are always included. +user_prompt=$(redact_content "$ARIZE_LOG_PROMPTS" "$user_prompt") debug_dump "${debug_prefix}_text" "$(jq -nc --arg user "$user_prompt" --arg assistant "$assistant_output" '{input:$user,assistant:$assistant}')" # --- Generate span IDs --- @@ -338,6 +341,7 @@ if [[ "$event_count" -gt 0 ]]; then if [[ -n "$result" && "$result" != "null" ]]; then result_time_ns=$(echo "$result" | jq -r '.time_ns // "0"' 2>/dev/null) tool_output=$(echo "$result" | jq -r '.attrs.output // .attrs.result // .attrs["tool.output"] // ""' 2>/dev/null | head -c 2000) + tool_output=$(redact_content "$ARIZE_LOG_TOOL_CONTENT" "$tool_output") fi # Convert nanosecond timestamps to milliseconds for build_span diff --git a/core/common.sh b/core/common.sh index e42f7bc..a706764 100644 --- a/core/common.sh +++ b/core/common.sh @@ -25,12 +25,37 @@ ARIZE_VERBOSE="${ARIZE_VERBOSE:-false}" ARIZE_TRACE_DEBUG="${ARIZE_TRACE_DEBUG:-false}" ARIZE_LOG_FILE="${ARIZE_LOG_FILE:-/tmp/arize-agent-kit.log}" +# --- Content redaction controls --- +# Prompts, tool arguments, and tool output may contain credentials, PII, or +# confidential data. These flags control what is included in exported spans. +# When a flag is off, spans record only content length: . +# +# ARIZE_LOG_PROMPTS: User prompts (default: on). Model responses are always included. +# ARIZE_LOG_TOOL_DETAILS: What was requested — bash commands, file paths, URLs, grep patterns (default: off). +# ARIZE_LOG_TOOL_CONTENT: What was returned — full tool output such as file contents, command output, API responses (default: off). +ARIZE_LOG_PROMPTS="${ARIZE_LOG_PROMPTS:-true}" +ARIZE_LOG_TOOL_DETAILS="${ARIZE_LOG_TOOL_DETAILS:-false}" +ARIZE_LOG_TOOL_CONTENT="${ARIZE_LOG_TOOL_CONTENT:-false}" + # --- Logging --- _log_to_file() { [[ -n "$ARIZE_LOG_FILE" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >> "$ARIZE_LOG_FILE" || true; } log() { [[ "$ARIZE_VERBOSE" == "true" ]] && { echo "[arize] $*" >&2; _log_to_file "$*"; } || true; } log_always() { echo "[arize] $*" >&2; _log_to_file "$*"; } error() { echo "[arize] ERROR: $*" >&2; _log_to_file "ERROR: $*"; } +# --- Content Redaction Helpers --- +# Returns redacted placeholder with content length, or the original content if logging is enabled. +# Usage: redact_content "$ARIZE_LOG_PROMPTS" "$content" +redact_content() { + local flag_value="$1" content="$2" + if [[ "$flag_value" == "true" ]]; then + printf '%s' "$content" + else + local len=${#content} + printf '' "$len" + fi +} + # --- Utilities --- generate_uuid() { uuidgen 2>/dev/null | tr '[:upper:]' '[:lower:]' || \