From 1ca9e6fb92cea8a19b8eddc616ffb548ba9b40da Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Mon, 9 Mar 2026 18:01:08 +0800 Subject: [PATCH 01/45] feat(agent): add AiAgentConfig for AI agent governance Add inputs.proc.ai_agent config section with http_endpoints (default: /v1/chat/completions, /v1/embeddings), max_payload_size (default: 1MB), and file_io_enabled. Forward to LogParserConfig. Co-Authored-By: Claude Opus 4.6 --- agent/src/config/config.rs | 23 +++++++++++++++++++++++ agent/src/config/handler.rs | 11 +++++++++++ 2 files changed, 34 insertions(+) diff --git a/agent/src/config/config.rs b/agent/src/config/config.rs index 6d9d6503fc5..c76728f5fc3 100644 --- a/agent/src/config/config.rs +++ b/agent/src/config/config.rs @@ -558,6 +558,27 @@ pub struct SymbolTable { pub java: Java, } +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[serde(default)] +pub struct AiAgentConfig { + pub http_endpoints: Vec, + pub max_payload_size: usize, + pub file_io_enabled: bool, +} + +impl Default for AiAgentConfig { + fn default() -> Self { + Self { + http_endpoints: vec![ + "/v1/chat/completions".to_string(), + "/v1/embeddings".to_string(), + ], + max_payload_size: 1_048_576, // 1MB + file_io_enabled: true, + } + } +} + #[derive(Clone, Debug, Deserialize, PartialEq, Eq)] #[serde(default)] pub struct Proc { @@ -572,6 +593,7 @@ pub struct Proc { pub process_blacklist: Vec, pub process_matcher: Vec, pub symbol_table: SymbolTable, + pub ai_agent: AiAgentConfig, } impl Default for Proc { @@ -658,6 +680,7 @@ impl Default for Proc { }, ], symbol_table: SymbolTable::default(), + ai_agent: AiAgentConfig::default(), }; p.process_blacklist.sort_unstable(); p.process_blacklist.dedup(); diff --git a/agent/src/config/handler.rs b/agent/src/config/handler.rs index 4ba453c60ff..2fba3a1f096 100755 --- a/agent/src/config/handler.rs +++ b/agent/src/config/handler.rs @@ -1150,6 +1150,8 @@ pub struct LogParserConfig { pub unconcerned_dns_nxdomain_trie: DomainNameTrie, pub mysql_decompress_payload: bool, pub custom_app: CustomAppConfig, + pub ai_agent_endpoints: Vec, + pub ai_agent_max_payload_size: usize, } impl Default for LogParserConfig { @@ -1169,6 +1171,11 @@ impl Default for LogParserConfig { unconcerned_dns_nxdomain_trie: DomainNameTrie::default(), mysql_decompress_payload: true, custom_app: CustomAppConfig::default(), + ai_agent_endpoints: vec![ + "/v1/chat/completions".to_string(), + "/v1/embeddings".to_string(), + ], + ai_agent_max_payload_size: 1_048_576, } } } @@ -1215,6 +1222,8 @@ impl fmt::Debug for LogParserConfig { ) .field("mysql_decompress_payload", &self.mysql_decompress_payload) .field("custom_app", &self.custom_app) + .field("ai_agent_endpoints", &self.ai_agent_endpoints) + .field("ai_agent_max_payload_size", &self.ai_agent_max_payload_size) .finish() } } @@ -2335,6 +2344,8 @@ impl TryFrom<(Config, UserConfig)> for ModuleConfig { None }, }, + ai_agent_endpoints: conf.inputs.proc.ai_agent.http_endpoints.clone(), + ai_agent_max_payload_size: conf.inputs.proc.ai_agent.max_payload_size, }, debug: DebugConfig { agent_id: conf.global.common.agent_id as u16, From 60bddd8435b244fd4942e1ddc58d239a8481eae6 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Mon, 9 Mar 2026 18:12:18 +0800 Subject: [PATCH 02/45] feat(agent): add BIZ_TYPE_AI_AGENT constant Add BIZ_TYPE_DEFAULT (0) and BIZ_TYPE_AI_AGENT (1) constants for process classification in AI agent governance. Co-Authored-By: Claude Opus 4.6 --- agent/src/common/flow.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/agent/src/common/flow.rs b/agent/src/common/flow.rs index 7104dd1b25b..585f00d948e 100644 --- a/agent/src/common/flow.rs +++ b/agent/src/common/flow.rs @@ -539,6 +539,10 @@ impl From for flow_log::FlowPerfStats { } } +// Business type constants for process classification +pub const BIZ_TYPE_DEFAULT: u8 = 0; +pub const BIZ_TYPE_AI_AGENT: u8 = 1; + #[derive(Clone, Debug, Default)] pub struct L7Stats { pub stats: L7PerfStats, From 245b0caa6784f53016d1ca9c1292e0173b601c2c Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Mon, 9 Mar 2026 18:14:14 +0800 Subject: [PATCH 03/45] feat(agent): add ai_agent stub in enterprise-utils Stub module for AI Agent governance. Returns no-ops in open source. Real implementation provided by enterprise enterprise-utils crate. Co-Authored-By: Claude Opus 4.6 --- agent/crates/enterprise-utils/src/lib.rs | 53 ++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/agent/crates/enterprise-utils/src/lib.rs b/agent/crates/enterprise-utils/src/lib.rs index 56832c98bf1..80f7cf445b6 100644 --- a/agent/crates/enterprise-utils/src/lib.rs +++ b/agent/crates/enterprise-utils/src/lib.rs @@ -434,3 +434,56 @@ pub mod rpc { } } } + +pub mod ai_agent { + use std::time::Duration; + + #[derive(Debug, Clone, Default)] + pub struct AgentMeta { + pub first_seen: Duration, + pub last_seen: Duration, + pub matched_endpoint: String, + } + + #[derive(Debug, Clone, Default)] + pub struct AiAgentRegistry; + + impl AiAgentRegistry { + pub fn new() -> Self { + AiAgentRegistry + } + + pub fn register(&self, _pid: u32, _endpoint: &str, _now: Duration) -> bool { + false + } + + pub fn is_ai_agent(&self, _pid: u32) -> bool { + false + } + + pub fn get_all_pids(&self) -> Vec { + vec![] + } + + pub fn cleanup_dead_pids(&self, _alive_pids: &[u32]) -> Vec { + vec![] + } + + pub fn len(&self) -> usize { + 0 + } + + pub fn is_empty(&self) -> bool { + true + } + + pub fn sync_bpf_map_add(&self, _pid: u32) {} + + pub fn sync_bpf_map_remove(&self, _pid: u32) {} + } + + /// Check if a URL path matches an AI Agent endpoint pattern. + pub fn match_ai_agent_endpoint(_endpoints: &[String], _path: &str) -> Option { + None + } +} From f4f1130c72b395cd00e4deb830583a876a334f61 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Mon, 9 Mar 2026 18:53:44 +0800 Subject: [PATCH 04/45] feat(agent): add AI agent URL detection hook in HTTP parser Enterprise-gated hook calls enterprise_utils::ai_agent::match_ai_agent_endpoint to detect LLM API URLs. Sets endpoint and biz_type=AI_AGENT on match. Priority: WASM/biz_field > AI Agent detection > http_endpoint config. Co-Authored-By: Claude Opus 4.6 --- .../src/flow_generator/protocol_logs/http.rs | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/agent/src/flow_generator/protocol_logs/http.rs b/agent/src/flow_generator/protocol_logs/http.rs index 789ee9679f0..4fc1d203f93 100644 --- a/agent/src/flow_generator/protocol_logs/http.rs +++ b/agent/src/flow_generator/protocol_logs/http.rs @@ -72,6 +72,9 @@ if #[cfg(feature = "enterprise")] { use public::l7_protocol::NativeTag; use crate::flow_generator::protocol_logs::{auto_merge_custom_field, CUSTOM_FIELD_POLICY_PRIORITY}; + + use enterprise_utils::ai_agent::match_ai_agent_endpoint; + use crate::common::flow::BIZ_TYPE_AI_AGENT; } } @@ -307,6 +310,9 @@ pub struct HttpInfo { pub grpc_status_code: Option, endpoint: Option, + // set when AI Agent URL is detected (enterprise only) + #[serde(skip)] + protocol_str: Option, // set by wasm plugin #[l7_log(response_result)] custom_result: Option, @@ -913,6 +919,7 @@ impl From for L7ProtocolSendLog { user_agent: f.user_agent, referer: f.referer, rpc_service: f.service_name, + protocol_str: f.protocol_str, attributes: { if f.attributes.is_empty() { None @@ -1248,11 +1255,30 @@ impl HttpLog { info.service_name = info.grpc_package_service_name(); if !config.http_endpoint_disabled && info.path.len() > 0 { // Priority use of info.endpoint, because info.endpoint may be set by the wasm plugin + let _endpoint_already_set = matches!(info.endpoint.as_ref(), Some(p) if !p.is_empty()); let path = match info.endpoint.as_ref() { - Some(p) if !p.is_empty() => p, - _ => &info.path, + Some(p) if !p.is_empty() => p.clone(), + _ => info.path.clone(), + }; + // Priority chain: WASM/biz_field > AI Agent detection > http_endpoint Trie + #[cfg(feature = "enterprise")] + let ai_agent_matched = if !_endpoint_already_set { + if let Some(matched_path) = match_ai_agent_endpoint(&config.ai_agent_endpoints, &path) { + info.endpoint = Some(matched_path); + info.biz_type = BIZ_TYPE_AI_AGENT; + info.protocol_str = Some("LLM".to_string()); + true + } else { + false + } + } else { + false }; - info.endpoint = Some(handle_endpoint(config, path)); + #[cfg(not(feature = "enterprise"))] + let ai_agent_matched = false; + if !ai_agent_matched { + info.endpoint = Some(handle_endpoint(config, &path)); + } } let l7_dynamic_config = &config.l7_log_dynamic; From 3330804eb5e96565a242f23575e088c106eacc37 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Mon, 9 Mar 2026 19:10:56 +0800 Subject: [PATCH 05/45] feat(agent): add biz_type to ProcessData and ProcessInfo proto MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AI Agent processes will be synced to controller with biz_type=1 (AI_AGENT). Field plumbing only — registry integration in a later task. Co-Authored-By: Claude Opus 4.6 --- agent/src/platform/platform_synchronizer/linux_process.rs | 8 ++++++++ message/agent.proto | 1 + 2 files changed, 9 insertions(+) diff --git a/agent/src/platform/platform_synchronizer/linux_process.rs b/agent/src/platform/platform_synchronizer/linux_process.rs index 28b26162f41..7c2ccede5ef 100644 --- a/agent/src/platform/platform_synchronizer/linux_process.rs +++ b/agent/src/platform/platform_synchronizer/linux_process.rs @@ -63,6 +63,8 @@ pub struct ProcessData { pub netns_id: u32, // pod container id in kubernetes pub container_id: String, + // business type, e.g. BIZ_TYPE_AI_AGENT = 1 + pub biz_type: u8, } impl ProcessDataOp for Vec { @@ -195,6 +197,7 @@ impl TryFrom<&Process> for ProcessData { os_app_tags: vec![], netns_id: get_proc_netns(proc).unwrap_or_default() as u32, container_id: get_container_id(proc).unwrap_or("".to_string()), + biz_type: 0, }) } } @@ -221,6 +224,7 @@ impl From<&ProcessData> for ProcessInfo { }, netns_id: Some(p.netns_id), container_id: Some(p.container_id.clone()), + biz_type: Some(p.biz_type as u32), } } } @@ -565,6 +569,7 @@ mod test { }], netns_id: 1, container_id: "".into(), + biz_type: 0, }, ProcessData { name: "parent".into(), @@ -582,6 +587,7 @@ mod test { }], netns_id: 1, container_id: "".into(), + biz_type: 0, }, ProcessData { name: "child".into(), @@ -599,6 +605,7 @@ mod test { }], netns_id: 1, container_id: "".into(), + biz_type: 0, }, ProcessData { name: "other".into(), @@ -616,6 +623,7 @@ mod test { }], netns_id: 1, container_id: "".into(), + biz_type: 0, }, ]; diff --git a/message/agent.proto b/message/agent.proto index 37d7e44c453..38c8863a7db 100644 --- a/message/agent.proto +++ b/message/agent.proto @@ -550,6 +550,7 @@ message ProcessInfo { optional uint32 netns_id = 7 [default = 0]; optional string container_id = 8 [default = ""]; repeated Tag os_app_tags = 11; + optional uint32 biz_type = 12; } message GenesisProcessData { From d98f869b89f784054a0d0b8c8be674e063af3c7e Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Mon, 9 Mar 2026 19:32:57 +0800 Subject: [PATCH 06/45] feat(agent): bypass flow reassembly limits for AI agent traffic AI Agent flows use ai_agent_max_payload_size (1MB default) instead of l7_log_packet_size to preserve full LLM request/response bodies for governance audit. Changes: - Add is_ai_agent flag to FlowLog (enterprise-gated) to track flows identified as AI Agent traffic via biz_type detection - In l7_parse_log, use ai_agent_max_payload_size for payload truncation when the flow is marked as AI Agent - After parse_payload returns, check parsed result for BIZ_TYPE_AI_AGENT and set the flag for subsequent packets in the flow - Add L7ParseResult::has_biz_type() helper to check parsed results - Saturate ParseParam::buf_size to u16::MAX to avoid overflow with larger AI Agent payload sizes Enterprise feature only. Original behavior preserved for non-AI-Agent flows and non-enterprise builds. Co-Authored-By: Claude Opus 4.6 --- agent/src/common/l7_protocol_log.rs | 18 +++++++++++++- agent/src/flow_generator/perf/mod.rs | 35 ++++++++++++++++++++++++++-- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/agent/src/common/l7_protocol_log.rs b/agent/src/common/l7_protocol_log.rs index 507bcb1bc17..afc27e0c36e 100644 --- a/agent/src/common/l7_protocol_log.rs +++ b/agent/src/common/l7_protocol_log.rs @@ -248,6 +248,16 @@ impl L7ParseResult { L7ParseResult::None => panic!("parse result is none but unwrap multi"), } } + + /// Check if any parsed result has the given biz_type. + /// Used to detect AI Agent flows after parsing. + pub fn has_biz_type(&self, biz_type: u8) -> bool { + match self { + L7ParseResult::Single(info) => info.get_biz_type() == biz_type, + L7ParseResult::Multi(infos) => infos.iter().any(|i| i.get_biz_type() == biz_type), + L7ParseResult::None => false, + } + } } #[enum_dispatch] @@ -811,7 +821,13 @@ impl<'a> ParseParam<'a> { } pub fn set_buf_size(&mut self, buf_size: usize) { - self.buf_size = buf_size as u16; + // Saturate to u16::MAX to avoid overflow when AI Agent flows use larger payload sizes. + // buf_size is informational for plugins; actual payload truncation uses the usize value directly. + self.buf_size = if buf_size > u16::MAX as usize { + u16::MAX + } else { + buf_size as u16 + }; } pub fn set_captured_byte(&mut self, captured_byte: usize) { diff --git a/agent/src/flow_generator/perf/mod.rs b/agent/src/flow_generator/perf/mod.rs index c9bcad51c03..6ff9ba828aa 100644 --- a/agent/src/flow_generator/perf/mod.rs +++ b/agent/src/flow_generator/perf/mod.rs @@ -41,6 +41,8 @@ use super::{ protocol_logs::AppProtoHead, }; +#[cfg(feature = "enterprise")] +use crate::common::flow::BIZ_TYPE_AI_AGENT; use crate::common::l7_protocol_log::L7PerfCache; use crate::common::{ flow::{Flow, L7PerfStats}, @@ -237,6 +239,12 @@ pub struct FlowLog { ntp_diff: Arc, obfuscate_cache: Option, + + // Enterprise: set to true when AI Agent traffic is detected (biz_type == BIZ_TYPE_AI_AGENT). + // When true, subsequent packets use ai_agent_max_payload_size instead of l7_log_packet_size + // to preserve full LLM request/response bodies for audit. + #[cfg(feature = "enterprise")] + is_ai_agent: bool, } impl FlowLog { @@ -272,6 +280,17 @@ impl FlowLog { remote_epc: i32, ) -> Result { if let Some(payload) = packet.get_l7() { + // Enterprise: AI Agent flows use a larger payload size to preserve full + // LLM request/response bodies for governance audit. + #[cfg(feature = "enterprise")] + let pkt_size = if self.is_ai_agent { + log_parser_config.ai_agent_max_payload_size + } else { + flow_config.l7_log_packet_size as usize + }; + #[cfg(not(feature = "enterprise"))] + let pkt_size = flow_config.l7_log_packet_size as usize; + let mut parse_param = ParseParam::new( &*packet, Some(self.perf_cache.clone()), @@ -285,7 +304,7 @@ impl FlowLog { #[cfg(any(target_os = "linux", target_os = "android"))] parse_param.set_counter(self.stats_counter.clone()); parse_param.set_rrt_timeout(self.rrt_timeout); - parse_param.set_buf_size(flow_config.l7_log_packet_size as usize); + parse_param.set_buf_size(pkt_size); parse_param.set_captured_byte(packet.get_captured_byte()); parse_param.set_oracle_conf(flow_config.oracle_parse_conf); parse_param.set_iso8583_conf(&flow_config.iso8583_parse_conf); @@ -304,7 +323,6 @@ impl FlowLog { let ret = parser.parse_payload( { - let pkt_size = flow_config.l7_log_packet_size as usize; if pkt_size > payload.len() { payload } else { @@ -314,6 +332,17 @@ impl FlowLog { &parse_param, ); + // Enterprise: detect AI Agent traffic from parsed result and set the flag + // so subsequent packets in this flow use the larger payload size. + #[cfg(feature = "enterprise")] + if !self.is_ai_agent { + if let Ok(ref result) = ret { + if result.has_biz_type(BIZ_TYPE_AI_AGENT) { + self.is_ai_agent = true; + } + } + } + let mut cache_proto = |proto: L7ProtocolEnum| match packet.signal_source { SignalSource::EBPF => { app_table.set_protocol_from_ebpf(packet, proto, local_epc, remote_epc) @@ -589,6 +618,8 @@ impl FlowLog { l7_protocol_inference_ttl, ntp_diff, obfuscate_cache, + #[cfg(feature = "enterprise")] + is_ai_agent: false, }) } From 2f703fe714e6267de3adbee222666d558ea633ee Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Mon, 9 Mar 2026 19:42:37 +0800 Subject: [PATCH 07/45] feat(ebpf): add access_permission field and AI agent hook point Add access_permission (__u16) to __io_event_buffer struct for exposing file permission bits (inode->i_mode & 0xFFF) in I/O events. Add #ifdef EXTENDED_AI_AGENT_FILE_IO hook in trace_io_event_common() that allows enterprise extensions to bypass the latency filter for AI agent processes and populate access_permission from the inode. Co-Authored-By: Claude Opus 4.6 --- agent/src/ebpf/kernel/files_rw.bpf.c | 16 ++++++++++++++++ .../ebpf/kernel/include/socket_trace_common.h | 3 +++ 2 files changed, 19 insertions(+) diff --git a/agent/src/ebpf/kernel/files_rw.bpf.c b/agent/src/ebpf/kernel/files_rw.bpf.c index c95e16e8324..6349704df99 100644 --- a/agent/src/ebpf/kernel/files_rw.bpf.c +++ b/agent/src/ebpf/kernel/files_rw.bpf.c @@ -370,10 +370,20 @@ static __inline int trace_io_event_common(void *ctx, latency = TIME_ROLLBACK_DEFAULT_LATENCY_NS; } +#ifdef EXTENDED_AI_AGENT_FILE_IO + if (is_ai_agent_process(pid_tgid)) { + goto skip_latency_filter; + } +#endif + if (latency < tracer_ctx->io_event_minimal_duration) { return -1; } +#ifdef EXTENDED_AI_AGENT_FILE_IO +skip_latency_filter: +#endif + struct __io_event_buffer *buffer = io_event_buffer__lookup(&k0); if (!buffer) { return -1; @@ -382,6 +392,12 @@ static __inline int trace_io_event_common(void *ctx, buffer->bytes_count = data_args->bytes_count; buffer->latency = latency; buffer->operation = direction; +#ifdef EXTENDED_AI_AGENT_FILE_IO + buffer->access_permission = + ai_agent_get_access_permission(pid_tgid, data_args->fd, offset); +#else + buffer->access_permission = 0; +#endif struct __socket_data_buffer *v_buff = bpf_map_lookup_elem(&NAME(data_buf), &k0); if (!v_buff) diff --git a/agent/src/ebpf/kernel/include/socket_trace_common.h b/agent/src/ebpf/kernel/include/socket_trace_common.h index 4e4e84a6060..a710e3ed3db 100644 --- a/agent/src/ebpf/kernel/include/socket_trace_common.h +++ b/agent/src/ebpf/kernel/include/socket_trace_common.h @@ -290,6 +290,9 @@ struct __io_event_buffer { // Mount namespace ID of the file’s mount __u32 mntns_id; + // File access permission bits (inode->i_mode & 0xFFF) + __u16 access_permission; + // filename length __u32 len; From 0776f2caa86d0972de346cf5e219821a47f4e26f Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Mon, 9 Mar 2026 21:53:27 +0800 Subject: [PATCH 08/45] feat(agent): wire AiAgentRegistry into agent lifecycle - Add global registry accessors (init_global_registry, global_registry) to enterprise-utils ai_agent module (stub returns None in open source) - Initialize registry at startup in trident.rs (enterprise only) - Register AI Agent PIDs in perf/mod.rs when biz_type detection fires - proc_scan_hook checks registry to set biz_type=AI_AGENT on ProcessData Enterprise feature only. Co-Authored-By: Claude Opus 4.6 --- agent/crates/enterprise-utils/src/lib.rs | 13 ++++++++++ agent/src/flow_generator/perf/mod.rs | 12 +++++++++ .../platform_synchronizer/proc_scan_hook.rs | 26 ++++++++----------- agent/src/trident.rs | 6 +++++ 4 files changed, 42 insertions(+), 15 deletions(-) diff --git a/agent/crates/enterprise-utils/src/lib.rs b/agent/crates/enterprise-utils/src/lib.rs index 80f7cf445b6..64de67801fa 100644 --- a/agent/crates/enterprise-utils/src/lib.rs +++ b/agent/crates/enterprise-utils/src/lib.rs @@ -436,6 +436,7 @@ pub mod rpc { } pub mod ai_agent { + use std::sync::Arc; use std::time::Duration; #[derive(Debug, Clone, Default)] @@ -486,4 +487,16 @@ pub mod ai_agent { pub fn match_ai_agent_endpoint(_endpoints: &[String], _path: &str) -> Option { None } + + /// Initialize the global AI Agent registry. Returns the registry Arc. + /// Stub: returns a no-op registry. + pub fn init_global_registry() -> Arc { + Arc::new(AiAgentRegistry::new()) + } + + /// Get a reference to the global AI Agent registry. + /// Stub: always returns None. + pub fn global_registry() -> Option<&'static Arc> { + None + } } diff --git a/agent/src/flow_generator/perf/mod.rs b/agent/src/flow_generator/perf/mod.rs index 6ff9ba828aa..c9684df2b88 100644 --- a/agent/src/flow_generator/perf/mod.rs +++ b/agent/src/flow_generator/perf/mod.rs @@ -339,6 +339,18 @@ impl FlowLog { if let Ok(ref result) = ret { if result.has_biz_type(BIZ_TYPE_AI_AGENT) { self.is_ai_agent = true; + // Register PID in the global AI Agent registry + if packet.process_id > 0 { + if let Some(registry) = + enterprise_utils::ai_agent::global_registry() + { + registry.register( + packet.process_id, + "", + get_timestamp(0), + ); + } + } } } } diff --git a/agent/src/platform/platform_synchronizer/proc_scan_hook.rs b/agent/src/platform/platform_synchronizer/proc_scan_hook.rs index 05b9493681c..85052fa75ca 100644 --- a/agent/src/platform/platform_synchronizer/proc_scan_hook.rs +++ b/agent/src/platform/platform_synchronizer/proc_scan_hook.rs @@ -16,20 +16,16 @@ use super::ProcessData; -pub fn proc_scan_hook(_: &mut Vec) { - // the hook logic here - - /* - use super::get_self_proc; - - let Ok(self_proc) = get_self_proc() else{ - return - }; - - info!("self proc: {:#?}",self_proc); - - for i in _.iter_mut() { - // handle every process +pub fn proc_scan_hook(process_datas: &mut Vec) { + // Enterprise: mark AI Agent processes with biz_type + #[cfg(feature = "enterprise")] + { + if let Some(registry) = enterprise_utils::ai_agent::global_registry() { + for pd in process_datas.iter_mut() { + if registry.is_ai_agent(pd.pid as u32) { + pd.biz_type = crate::common::flow::BIZ_TYPE_AI_AGENT; + } + } } - */ + } } diff --git a/agent/src/trident.rs b/agent/src/trident.rs index 99bb35f6401..8ffc1665d25 100644 --- a/agent/src/trident.rs +++ b/agent/src/trident.rs @@ -920,6 +920,12 @@ impl Trident { #[cfg(feature = "enterprise")] Trident::kernel_version_check(&state, &exception_handler); + #[cfg(feature = "enterprise")] + { + let _ai_agent_registry = enterprise_utils::ai_agent::init_global_registry(); + info!("AI Agent governance registry initialized"); + } + let mut components: Option = None; let mut first_run = true; let mut config_initialized = false; From 5c864b5007086c29c5998a7577c92b05cfde62d6 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Mon, 9 Mar 2026 22:26:07 +0800 Subject: [PATCH 09/45] fix(agent): add missing L7ProtocolInfoInterface import and fix unused var - Import L7ProtocolInfoInterface trait for get_biz_type() in l7_protocol_log.rs - Prefix process_datas with underscore in proc_scan_hook.rs to suppress unused variable warning in non-enterprise builds Co-Authored-By: Claude Opus 4.6 --- agent/src/common/l7_protocol_log.rs | 2 ++ agent/src/flow_generator/perf/mod.rs | 10 ++-------- agent/src/flow_generator/protocol_logs/http.rs | 4 +++- .../platform/platform_synchronizer/proc_scan_hook.rs | 4 ++-- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/agent/src/common/l7_protocol_log.rs b/agent/src/common/l7_protocol_log.rs index afc27e0c36e..3672d6451f5 100644 --- a/agent/src/common/l7_protocol_log.rs +++ b/agent/src/common/l7_protocol_log.rs @@ -24,6 +24,8 @@ use std::sync::{ }; use std::time::Duration; +use crate::common::l7_protocol_info::L7ProtocolInfoInterface; + use enum_dispatch::enum_dispatch; use log::debug; use lru::LruCache; diff --git a/agent/src/flow_generator/perf/mod.rs b/agent/src/flow_generator/perf/mod.rs index c9684df2b88..9cb5b6f94ab 100644 --- a/agent/src/flow_generator/perf/mod.rs +++ b/agent/src/flow_generator/perf/mod.rs @@ -341,14 +341,8 @@ impl FlowLog { self.is_ai_agent = true; // Register PID in the global AI Agent registry if packet.process_id > 0 { - if let Some(registry) = - enterprise_utils::ai_agent::global_registry() - { - registry.register( - packet.process_id, - "", - get_timestamp(0), - ); + if let Some(registry) = enterprise_utils::ai_agent::global_registry() { + registry.register(packet.process_id, "", get_timestamp(0)); } } } diff --git a/agent/src/flow_generator/protocol_logs/http.rs b/agent/src/flow_generator/protocol_logs/http.rs index 4fc1d203f93..036dbafc860 100644 --- a/agent/src/flow_generator/protocol_logs/http.rs +++ b/agent/src/flow_generator/protocol_logs/http.rs @@ -1263,7 +1263,9 @@ impl HttpLog { // Priority chain: WASM/biz_field > AI Agent detection > http_endpoint Trie #[cfg(feature = "enterprise")] let ai_agent_matched = if !_endpoint_already_set { - if let Some(matched_path) = match_ai_agent_endpoint(&config.ai_agent_endpoints, &path) { + if let Some(matched_path) = + match_ai_agent_endpoint(&config.ai_agent_endpoints, &path) + { info.endpoint = Some(matched_path); info.biz_type = BIZ_TYPE_AI_AGENT; info.protocol_str = Some("LLM".to_string()); diff --git a/agent/src/platform/platform_synchronizer/proc_scan_hook.rs b/agent/src/platform/platform_synchronizer/proc_scan_hook.rs index 85052fa75ca..82f434090b6 100644 --- a/agent/src/platform/platform_synchronizer/proc_scan_hook.rs +++ b/agent/src/platform/platform_synchronizer/proc_scan_hook.rs @@ -16,12 +16,12 @@ use super::ProcessData; -pub fn proc_scan_hook(process_datas: &mut Vec) { +pub fn proc_scan_hook(_process_datas: &mut Vec) { // Enterprise: mark AI Agent processes with biz_type #[cfg(feature = "enterprise")] { if let Some(registry) = enterprise_utils::ai_agent::global_registry() { - for pd in process_datas.iter_mut() { + for pd in _process_datas.iter_mut() { if registry.is_ai_agent(pd.pid as u32) { pd.biz_type = crate::common::flow::BIZ_TYPE_AI_AGENT; } From c2f2e65725e6e74fd60800ba1951e4118d1049b4 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Mon, 9 Mar 2026 23:55:11 +0800 Subject: [PATCH 10/45] fix(ebpf): add null statement after skip_latency_filter label In C, a label must be followed by a statement, not a declaration. The struct declaration after skip_latency_filter: causes a compile error when EXTENDED_AI_AGENT_FILE_IO is defined. Add a null statement (;) to satisfy the grammar requirement. Co-Authored-By: Claude Opus 4.6 --- agent/src/ebpf/kernel/files_rw.bpf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/agent/src/ebpf/kernel/files_rw.bpf.c b/agent/src/ebpf/kernel/files_rw.bpf.c index 6349704df99..55df6d89b80 100644 --- a/agent/src/ebpf/kernel/files_rw.bpf.c +++ b/agent/src/ebpf/kernel/files_rw.bpf.c @@ -382,6 +382,7 @@ static __inline int trace_io_event_common(void *ctx, #ifdef EXTENDED_AI_AGENT_FILE_IO skip_latency_filter: + ; /* null statement - labels cannot be followed by declarations in C */ #endif struct __io_event_buffer *buffer = io_event_buffer__lookup(&k0); From 4bc1a0308da764275125e56d2ea48e92fde942f8 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 10 Mar 2026 13:25:31 +0800 Subject: [PATCH 11/45] feat(agent): AI Agent governance - fix blockers, unlimited reassembly, PRD 2.2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 - Fix 4 blockers: - B3: Add u32-key BPF map helpers (table.c/h) and Rust FFI for PID→BPF sync - B1: Add process_id to ParseParam, update match_ai_agent_endpoint for uniqueness - B4: Propagate access_permission through kernel→userspace→Rust→proto chain - B2: Wire file_io_enabled config through LogParserConfig to AiAgentRegistry Phase 2 - Unlimited stream reassembly: - Interpret max_payload_size=0 as usize::MAX (truly unlimited) - Change default max_payload_size from 1MB to 0 (unlimited) Phase 3 - PRD 2.2 event types: - Add FileOpEvent, PermOpEvent, ProcLifecycleEvent to EventType enum - Add proto messages: FileOpEventData, PermOpEventData, ProcLifecycleEventData - Add sched_comm_fork_ctx for process lifecycle BPF hooks - Wire BPF map FD at startup in ebpf_dispatcher Co-Authored-By: Claude Opus 4.6 --- agent/crates/enterprise-utils/src/lib.rs | 16 ++++- agent/src/common/l7_protocol_log.rs | 4 ++ agent/src/common/proc_event/linux.rs | 16 ++++- agent/src/config/config.rs | 2 +- agent/src/config/handler.rs | 20 ++++++- .../ebpf/kernel/include/socket_trace_common.h | 1 + agent/src/ebpf/mod.rs | 16 +++++ agent/src/ebpf/user/mount.c | 1 + agent/src/ebpf/user/table.c | 38 ++++++++++++ agent/src/ebpf/user/table.h | 5 ++ agent/src/ebpf_dispatcher.rs | 16 +++++ .../src/flow_generator/protocol_logs/http.rs | 9 ++- message/metric.proto | 60 +++++++++++++++++++ 13 files changed, 195 insertions(+), 9 deletions(-) diff --git a/agent/crates/enterprise-utils/src/lib.rs b/agent/crates/enterprise-utils/src/lib.rs index 64de67801fa..0b5e70ffc77 100644 --- a/agent/crates/enterprise-utils/src/lib.rs +++ b/agent/crates/enterprise-utils/src/lib.rs @@ -481,10 +481,24 @@ pub mod ai_agent { pub fn sync_bpf_map_add(&self, _pid: u32) {} pub fn sync_bpf_map_remove(&self, _pid: u32) {} + + #[cfg(target_os = "linux")] + pub fn set_bpf_map_fd(&self, _fd: i32) {} + + pub fn set_file_io_enabled(&self, _enabled: bool) {} + + pub fn record_endpoint_hit(&self, _pid: u32, _endpoint: &str, _now: Duration) -> bool { + false + } } /// Check if a URL path matches an AI Agent endpoint pattern. - pub fn match_ai_agent_endpoint(_endpoints: &[String], _path: &str) -> Option { + pub fn match_ai_agent_endpoint( + _endpoints: &[String], + _path: &str, + _pid: u32, + _now: Duration, + ) -> Option { None } diff --git a/agent/src/common/l7_protocol_log.rs b/agent/src/common/l7_protocol_log.rs index 3672d6451f5..a86865e2835 100644 --- a/agent/src/common/l7_protocol_log.rs +++ b/agent/src/common/l7_protocol_log.rs @@ -702,6 +702,8 @@ pub struct ParseParam<'a> { pub oracle_parse_conf: OracleConfig, pub iso8583_parse_conf: Iso8583ParseConfig, pub web_sphere_mq_parse_conf: WebSphereMqParseConfig, + + pub process_id: u32, } impl<'a> fmt::Debug for ParseParam<'a> { @@ -805,6 +807,8 @@ impl<'a> ParseParam<'a> { oracle_parse_conf: OracleConfig::default(), iso8583_parse_conf: Iso8583ParseConfig::default(), web_sphere_mq_parse_conf: WebSphereMqParseConfig::default(), + + process_id: packet.process_id, } } } diff --git a/agent/src/common/proc_event/linux.rs b/agent/src/common/proc_event/linux.rs index 3d6abce0748..bf8e5b17e44 100644 --- a/agent/src/common/proc_event/linux.rs +++ b/agent/src/common/proc_event/linux.rs @@ -21,7 +21,7 @@ use std::{ use prost::Message; use public::{ - bytes::{read_u32_le, read_u64_le}, + bytes::{read_u16_le, read_u32_le, read_u64_le}, proto::metric, sender::{SendMessageType, Sendable}, }; @@ -40,7 +40,10 @@ const IO_FILE_NAME_OFFSET: usize = 28; const IO_MOUNT_SOURCE_OFFSET: usize = 284; const IO_MOUNT_POINT_OFFSET: usize = 796; const IO_FILE_DIR_OFFSET: usize = 1052; -const IO_EVENT_BUFF_SIZE: usize = 1564; +const IO_MNT_ID_OFFSET: usize = 1564; +const IO_MNTNS_ID_OFFSET: usize = 1568; +const IO_ACCESS_PERMISSION_OFFSET: usize = 1572; +const IO_EVENT_BUFF_SIZE: usize = 1574; struct IoEventData { bytes_count: u32, // Number of bytes read and written operation: u32, // 0: write 1: read @@ -51,6 +54,7 @@ struct IoEventData { mount_source: Vec, mount_point: Vec, file_dir: Vec, + access_permission: u16, // File permission bits (inode->i_mode & 0xFFF) } impl TryFrom<&[u8]> for IoEventData { @@ -79,6 +83,7 @@ impl TryFrom<&[u8]> for IoEventData { mount_source: parse_cstring_slice(&raw_data[IO_MOUNT_SOURCE_OFFSET..]), mount_point: parse_cstring_slice(&raw_data[IO_MOUNT_POINT_OFFSET..]), file_dir: parse_cstring_slice(&raw_data[IO_FILE_DIR_OFFSET..]), + access_permission: read_u16_le(&raw_data[IO_ACCESS_PERMISSION_OFFSET..]), }; Ok(io_event_data) } @@ -96,6 +101,7 @@ impl From for metric::IoEventData { mount_point: io_event_data.mount_point, file_dir: io_event_data.file_dir, file_type: io_event_data.file_type as i32, + access_permission: io_event_data.access_permission as u32, } } } @@ -125,6 +131,9 @@ impl Debug for EventData { pub enum EventType { OtherEvent = 0, IoEvent = 1, + FileOpEvent = 2, + PermOpEvent = 3, + ProcLifecycleEvent = 4, } impl From for EventType { @@ -147,6 +156,9 @@ impl fmt::Display for EventType { match self { Self::OtherEvent => write!(f, "other_event"), Self::IoEvent => write!(f, "io_event"), + Self::FileOpEvent => write!(f, "file_op_event"), + Self::PermOpEvent => write!(f, "perm_op_event"), + Self::ProcLifecycleEvent => write!(f, "proc_lifecycle_event"), } } } diff --git a/agent/src/config/config.rs b/agent/src/config/config.rs index c76728f5fc3..abb9f6766f2 100644 --- a/agent/src/config/config.rs +++ b/agent/src/config/config.rs @@ -573,7 +573,7 @@ impl Default for AiAgentConfig { "/v1/chat/completions".to_string(), "/v1/embeddings".to_string(), ], - max_payload_size: 1_048_576, // 1MB + max_payload_size: 0, // 0 means unlimited file_io_enabled: true, } } diff --git a/agent/src/config/handler.rs b/agent/src/config/handler.rs index 2fba3a1f096..ac1419fe54f 100755 --- a/agent/src/config/handler.rs +++ b/agent/src/config/handler.rs @@ -1152,6 +1152,7 @@ pub struct LogParserConfig { pub custom_app: CustomAppConfig, pub ai_agent_endpoints: Vec, pub ai_agent_max_payload_size: usize, + pub ai_agent_file_io_enabled: bool, } impl Default for LogParserConfig { @@ -1175,7 +1176,8 @@ impl Default for LogParserConfig { "/v1/chat/completions".to_string(), "/v1/embeddings".to_string(), ], - ai_agent_max_payload_size: 1_048_576, + ai_agent_max_payload_size: usize::MAX, // default: unlimited (config 0 → usize::MAX) + ai_agent_file_io_enabled: true, } } } @@ -2345,7 +2347,12 @@ impl TryFrom<(Config, UserConfig)> for ModuleConfig { }, }, ai_agent_endpoints: conf.inputs.proc.ai_agent.http_endpoints.clone(), - ai_agent_max_payload_size: conf.inputs.proc.ai_agent.max_payload_size, + ai_agent_max_payload_size: if conf.inputs.proc.ai_agent.max_payload_size == 0 { + usize::MAX // 0 means unlimited + } else { + conf.inputs.proc.ai_agent.max_payload_size + }, + ai_agent_file_io_enabled: conf.inputs.proc.ai_agent.file_io_enabled, }, debug: DebugConfig { agent_id: conf.global.common.agent_id as u16, @@ -5659,6 +5666,15 @@ impl ConfigHandler { }, ..new_config.log_parser.clone() }; + + // Propagate file_io_enabled toggle to AiAgentRegistry + #[cfg(feature = "enterprise")] + { + if let Some(registry) = enterprise_utils::ai_agent::global_registry() { + registry + .set_file_io_enabled(candidate_config.log_parser.ai_agent_file_io_enabled); + } + } } if candidate_config.synchronizer != new_config.synchronizer { diff --git a/agent/src/ebpf/kernel/include/socket_trace_common.h b/agent/src/ebpf/kernel/include/socket_trace_common.h index a710e3ed3db..9ebfc0efae2 100644 --- a/agent/src/ebpf/kernel/include/socket_trace_common.h +++ b/agent/src/ebpf/kernel/include/socket_trace_common.h @@ -321,6 +321,7 @@ struct user_io_event_buffer { char file_dir[FILE_PATH_SZ]; int mnt_id; __u32 mntns_id; + __u16 access_permission; } __attribute__ ((packed)); // struct ebpf_proc_info -> offsets[] arrays index. diff --git a/agent/src/ebpf/mod.rs b/agent/src/ebpf/mod.rs index e47bb80995c..7ef46c56ec2 100644 --- a/agent/src/ebpf/mod.rs +++ b/agent/src/ebpf/mod.rs @@ -157,6 +157,12 @@ pub const DATA_SOURCE_OPENSSL_UPROBE: u8 = 3; #[allow(dead_code)] pub const DATA_SOURCE_IO_EVENT: u8 = 4; #[allow(dead_code)] +pub const DATA_SOURCE_FILE_OP_EVENT: u8 = 9; +#[allow(dead_code)] +pub const DATA_SOURCE_PERM_OP_EVENT: u8 = 10; +#[allow(dead_code)] +pub const DATA_SOURCE_PROC_LIFECYCLE_EVENT: u8 = 11; +#[allow(dead_code)] pub const DATA_SOURCE_GO_HTTP2_DATAFRAME_UPROBE: u8 = 5; #[allow(dead_code)] pub const DATA_SOURCE_UNIX_SOCKET: u8 = 8; @@ -836,6 +842,16 @@ extern "C" { pub fn enable_fentry(); pub fn set_virtual_file_collect(enabled: bool) -> c_int; + // BPF map helpers for u32-key maps (used by AI Agent PID tracking) + pub fn bpf_table_get_map_fd(tracer_name: *const c_char, map_name: *const c_char) -> c_int; + pub fn bpf_table_update_u32_key( + map_fd: c_int, + key: c_uint, + val_buf: *const c_void, + val_size: c_int, + ) -> c_int; + pub fn bpf_table_delete_u32_key(map_fd: c_int, key: c_uint) -> c_int; + cfg_if::cfg_if! { if #[cfg(feature = "extended_observability")] { pub fn enable_offcpu_profiler() -> c_int; diff --git a/agent/src/ebpf/user/mount.c b/agent/src/ebpf/user/mount.c index 9ab27d7d90b..b7b34805a1a 100644 --- a/agent/src/ebpf/user/mount.c +++ b/agent/src/ebpf/user/mount.c @@ -792,6 +792,7 @@ u32 copy_file_metrics(int pid, void *dst, void *src, int len, u_event->file_type = file_type; u_event->mnt_id = event->mnt_id; u_event->mntns_id = event->mntns_id; + u_event->access_permission = event->access_permission; strcpy_s_inline(u_event->mount_source, sizeof(u_event->mount_source), mount_source, strlen(mount_source)); fast_strncat_trunc(mntns_str, mount_point, u_event->mount_point, diff --git a/agent/src/ebpf/user/table.c b/agent/src/ebpf/user/table.c index 72ec4822222..eb57cf23fe8 100644 --- a/agent/src/ebpf/user/table.c +++ b/agent/src/ebpf/user/table.c @@ -154,3 +154,41 @@ int bpf_table_get_fd(struct bpf_tracer *tracer, const char *tb_name) return map->fd; } + +int bpf_table_get_map_fd(const char *tracer_name, const char *map_name) +{ + struct bpf_tracer *tracer = find_bpf_tracer(tracer_name); + if (tracer == NULL) { + ebpf_warning("[%s] tracer \"%s\" not found.\n", __func__, + tracer_name); + return -1; + } + + return bpf_table_get_fd(tracer, map_name); +} + +int bpf_table_update_u32_key(int map_fd, uint32_t key, void *val_buf, + int val_size) +{ + (void)val_size; + if (bpf_update_elem(map_fd, &key, val_buf, BPF_ANY) != 0) { + ebpf_warning("[%s] bpf_map_update_elem failed, fd: %d, " + "key: %u, err: %s\n", __func__, map_fd, + key, strerror(errno)); + return -1; + } + + return 0; +} + +int bpf_table_delete_u32_key(int map_fd, uint32_t key) +{ + if (bpf_delete_elem(map_fd, &key) != 0) { + ebpf_debug("[%s] bpf_map_delete_elem failed, fd: %d, " + "key: %u, err: %s\n", __func__, map_fd, + key, strerror(errno)); + return -1; + } + + return 0; +} diff --git a/agent/src/ebpf/user/table.h b/agent/src/ebpf/user/table.h index 07e95c10879..15df114fa55 100644 --- a/agent/src/ebpf/user/table.h +++ b/agent/src/ebpf/user/table.h @@ -36,4 +36,9 @@ void insert_prog_to_map(struct bpf_tracer *tracer, const char *map_name, const char *prog_name, int key); int bpf_table_get_fd(struct bpf_tracer *tracer, const char *tb_name); + +int bpf_table_get_map_fd(const char *tracer_name, const char *map_name); +int bpf_table_update_u32_key(int map_fd, uint32_t key, void *val_buf, + int val_size); +int bpf_table_delete_u32_key(int map_fd, uint32_t key); #endif /* DF_BPF_TABLE_H */ diff --git a/agent/src/ebpf_dispatcher.rs b/agent/src/ebpf_dispatcher.rs index 8c471fd1c84..9abdad6bd3c 100644 --- a/agent/src/ebpf_dispatcher.rs +++ b/agent/src/ebpf_dispatcher.rs @@ -1279,6 +1279,22 @@ impl EbpfCollector { ebpf::bpf_tracer_finish(); + // Wire AI Agent PID → BPF map fd after all tracers are loaded + #[cfg(feature = "enterprise")] + { + use enterprise_utils::ai_agent::global_registry; + let fd = unsafe { + ebpf::bpf_table_get_map_fd(c"socket-trace".as_ptr(), c"__ai_agent_pids".as_ptr()) + }; + if fd >= 0 { + if let Some(registry) = global_registry() { + registry.set_bpf_map_fd(fd); + } + } else { + warn!("AI Agent: could not find __ai_agent_pids BPF map (fd={}), file I/O monitoring will not work", fd); + } + } + Ok(handle) } diff --git a/agent/src/flow_generator/protocol_logs/http.rs b/agent/src/flow_generator/protocol_logs/http.rs index 036dbafc860..17e99c2fab4 100644 --- a/agent/src/flow_generator/protocol_logs/http.rs +++ b/agent/src/flow_generator/protocol_logs/http.rs @@ -1263,9 +1263,12 @@ impl HttpLog { // Priority chain: WASM/biz_field > AI Agent detection > http_endpoint Trie #[cfg(feature = "enterprise")] let ai_agent_matched = if !_endpoint_already_set { - if let Some(matched_path) = - match_ai_agent_endpoint(&config.ai_agent_endpoints, &path) - { + if let Some(matched_path) = match_ai_agent_endpoint( + &config.ai_agent_endpoints, + &path, + param.process_id, + std::time::Duration::from_micros(param.time), + ) { info.endpoint = Some(matched_path); info.biz_type = BIZ_TYPE_AI_AGENT; info.protocol_str = Some("LLM".to_string()); diff --git a/message/metric.proto b/message/metric.proto index 1c5754beae6..e6b4e468348 100644 --- a/message/metric.proto +++ b/message/metric.proto @@ -257,11 +257,68 @@ message IoEventData { bytes mount_point= 7; bytes file_dir = 8; FileType file_type = 9; + uint32 access_permission = 10; // File permission bits (inode->i_mode & 0xFFF) } enum EventType { OtherEvent = 0; IoEvent = 1; + FileOpEvent = 2; // File creation/deletion/chmod/chown + PermOpEvent = 3; // setuid/setgid/setreuid/setregid + ProcLifecycleEvent = 4; // fork/exec/exit +} + +enum FileOpType { + FileOpUnknown = 0; + FileOpCreate = 1; + FileOpDelete = 2; + FileOpChmod = 3; + FileOpChown = 4; +} + +message FileOpEventData { + FileOpType op_type = 1; + uint32 pid = 2; + uint32 uid = 3; + uint32 gid = 4; + uint32 mode = 5; + uint64 timestamp = 6; + bytes filename = 7; +} + +enum PermOpType { + PermOpUnknown = 0; + PermOpSetuid = 1; + PermOpSetgid = 2; + PermOpSetreuid = 3; + PermOpSetregid = 4; +} + +message PermOpEventData { + PermOpType op_type = 1; + uint32 pid = 2; + uint32 old_uid = 3; + uint32 old_gid = 4; + uint32 new_uid = 5; + uint32 new_gid = 6; + uint64 timestamp = 7; +} + +enum ProcLifecycleType { + ProcLifecycleUnknown = 0; + ProcLifecycleFork = 1; + ProcLifecycleExec = 2; + ProcLifecycleExit = 3; +} + +message ProcLifecycleEventData { + ProcLifecycleType lifecycle_type = 1; + uint32 pid = 2; + uint32 parent_pid = 3; + uint32 uid = 4; + uint32 gid = 5; + uint64 timestamp = 6; + bytes comm = 7; } message ProcEvent { @@ -275,6 +332,9 @@ message ProcEvent { IoEventData io_event_data = 8; // Deprecated in v6.4.1: uint32 netns_id = 9; uint32 pod_id = 10; + FileOpEventData file_op_event_data = 11; + PermOpEventData perm_op_event_data = 12; + ProcLifecycleEventData proc_lifecycle_event_data = 13; } message PrometheusMetric { From a6a0feff62bc9d4613abc8fca6e804998a2760fe Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 10 Mar 2026 15:03:35 +0800 Subject: [PATCH 12/45] fix(agent): fix blocking issues in AI Agent governance event pipeline Blocking fixes: - Remove duplicate sched_comm_fork_ctx struct in socket_trace.h - Add DATA_SOURCE constants for new event types in BPF common.h - Add FILE_OP_EVENT/PERM_OP_EVENT/PROC_LIFECYCLE_EVENT constants to ebpf.rs - Complete ProcEvent parsing: add FileOpEventData, PermOpEventData, ProcLifecycleEventData with packed struct parsing and proto encoding - Update EventType::from(u8) to dispatch new source types High-risk fixes: - Add server-side decoder handling for new event types in decoder.go - Wire cleanup_dead_pids to periodic proc_scan_hook for runtime cleanup Co-Authored-By: Claude Opus 4.6 --- agent/src/common/ebpf.rs | 4 + agent/src/common/proc_event/linux.rs | 245 +++++++++++++++++- agent/src/ebpf/kernel/include/common.h | 3 + .../platform_synchronizer/proc_scan_hook.rs | 6 +- server/ingester/event/decoder/decoder.go | 19 ++ 5 files changed, 271 insertions(+), 6 deletions(-) diff --git a/agent/src/common/ebpf.rs b/agent/src/common/ebpf.rs index 47c6ea5af80..06ede88e067 100644 --- a/agent/src/common/ebpf.rs +++ b/agent/src/common/ebpf.rs @@ -40,6 +40,10 @@ pub const GO_HTTP2_UPROBE_DATA: u8 = 5; pub const SOCKET_CLOSE_EVENT: u8 = 6; // unix socket pub const UNIX_SOCKET: u8 = 8; +// AI Agent governance event types +pub const FILE_OP_EVENT: u8 = 9; +pub const PERM_OP_EVENT: u8 = 10; +pub const PROC_LIFECYCLE_EVENT: u8 = 11; const EBPF_TYPE_TRACEPOINT: u8 = 0; const EBPF_TYPE_TLS_UPROBE: u8 = 1; diff --git a/agent/src/common/proc_event/linux.rs b/agent/src/common/proc_event/linux.rs index bf8e5b17e44..8742e439b2c 100644 --- a/agent/src/common/proc_event/linux.rs +++ b/agent/src/common/proc_event/linux.rs @@ -27,11 +27,12 @@ use public::{ }; use crate::common::{ - ebpf::IO_EVENT, + ebpf::{FILE_OP_EVENT, IO_EVENT, PERM_OP_EVENT, PROC_LIFECYCLE_EVENT}, error::Error::{self, ParseEventData}, }; use crate::ebpf::SK_BPF_DATA; +// ── IoEventData offsets (matches user_io_event_buffer) ────────────────── const IO_OPERATION_OFFSET: usize = 4; const IO_LATENCY_OFFSET: usize = 8; const IO_OFF_BYTES_OFFSET: usize = 16; @@ -44,6 +45,7 @@ const IO_MNT_ID_OFFSET: usize = 1564; const IO_MNTNS_ID_OFFSET: usize = 1568; const IO_ACCESS_PERMISSION_OFFSET: usize = 1572; const IO_EVENT_BUFF_SIZE: usize = 1574; + struct IoEventData { bytes_count: u32, // Number of bytes read and written operation: u32, // 0: write 1: read @@ -70,7 +72,7 @@ impl TryFrom<&[u8]> for IoEventData { let length = raw_data.len(); if length < IO_EVENT_BUFF_SIZE { return Err(ParseEventData(format!( - "parse io event data failed, raw data length: {length} < {IO_OFF_BYTES_OFFSET}" + "parse io event data failed, raw data length: {length} < {IO_EVENT_BUFF_SIZE}" ))); } let io_event_data = Self { @@ -106,9 +108,199 @@ impl From for metric::IoEventData { } } +// ── FileOpEventData offsets (packed __ai_agent_file_op_event) ─────────── +// Layout: event_type(1) + pid(4) + uid(4) + gid(4) + mode(4) + timestamp(8) + filename(256) +const FILE_OP_MIN_SIZE: usize = 25; // without filename +const FILE_OP_PID_OFF: usize = 1; +const FILE_OP_UID_OFF: usize = 5; +const FILE_OP_GID_OFF: usize = 9; +const FILE_OP_MODE_OFF: usize = 13; +const FILE_OP_TS_OFF: usize = 17; +const FILE_OP_FNAME_OFF: usize = 25; + +struct FileOpEventData { + op_type: u8, + pid: u32, + uid: u32, + gid: u32, + mode: u32, + timestamp: u64, + filename: Vec, +} + +impl TryFrom<&[u8]> for FileOpEventData { + type Error = Error; + + fn try_from(raw: &[u8]) -> Result { + if raw.len() < FILE_OP_MIN_SIZE { + return Err(ParseEventData(format!( + "file_op event too short: {} < {FILE_OP_MIN_SIZE}", + raw.len() + ))); + } + let filename = if raw.len() > FILE_OP_FNAME_OFF { + let slice = &raw[FILE_OP_FNAME_OFF..]; + match slice.iter().position(|&b| b == b'\0') { + Some(i) => slice[..i].to_vec(), + None => slice.to_vec(), + } + } else { + vec![] + }; + Ok(Self { + op_type: raw[0], + pid: read_u32_le(&raw[FILE_OP_PID_OFF..]), + uid: read_u32_le(&raw[FILE_OP_UID_OFF..]), + gid: read_u32_le(&raw[FILE_OP_GID_OFF..]), + mode: read_u32_le(&raw[FILE_OP_MODE_OFF..]), + timestamp: read_u64_le(&raw[FILE_OP_TS_OFF..]), + filename, + }) + } +} + +impl From for metric::FileOpEventData { + fn from(d: FileOpEventData) -> Self { + Self { + op_type: d.op_type as i32, + pid: d.pid, + uid: d.uid, + gid: d.gid, + mode: d.mode, + timestamp: d.timestamp, + filename: d.filename, + } + } +} + +// ── PermOpEventData offsets (packed __ai_agent_perm_event) ────────────── +// Layout: event_type(1) + pid(4) + old_uid(4) + old_gid(4) + new_uid(4) + new_gid(4) + timestamp(8) +const PERM_OP_SIZE: usize = 29; +const PERM_OP_PID_OFF: usize = 1; +const PERM_OP_OLD_UID_OFF: usize = 5; +const PERM_OP_OLD_GID_OFF: usize = 9; +const PERM_OP_NEW_UID_OFF: usize = 13; +const PERM_OP_NEW_GID_OFF: usize = 17; +const PERM_OP_TS_OFF: usize = 21; + +struct PermOpEventData { + op_type: u8, + pid: u32, + old_uid: u32, + old_gid: u32, + new_uid: u32, + new_gid: u32, + timestamp: u64, +} + +impl TryFrom<&[u8]> for PermOpEventData { + type Error = Error; + + fn try_from(raw: &[u8]) -> Result { + if raw.len() < PERM_OP_SIZE { + return Err(ParseEventData(format!( + "perm_op event too short: {} < {PERM_OP_SIZE}", + raw.len() + ))); + } + Ok(Self { + op_type: raw[0], + pid: read_u32_le(&raw[PERM_OP_PID_OFF..]), + old_uid: read_u32_le(&raw[PERM_OP_OLD_UID_OFF..]), + old_gid: read_u32_le(&raw[PERM_OP_OLD_GID_OFF..]), + new_uid: read_u32_le(&raw[PERM_OP_NEW_UID_OFF..]), + new_gid: read_u32_le(&raw[PERM_OP_NEW_GID_OFF..]), + timestamp: read_u64_le(&raw[PERM_OP_TS_OFF..]), + }) + } +} + +impl From for metric::PermOpEventData { + fn from(d: PermOpEventData) -> Self { + Self { + op_type: d.op_type as i32, + pid: d.pid, + old_uid: d.old_uid, + old_gid: d.old_gid, + new_uid: d.new_uid, + new_gid: d.new_gid, + timestamp: d.timestamp, + } + } +} + +// ── ProcLifecycleEventData offsets (packed __ai_agent_proc_event) ─────── +// Layout: event_type(1) + pid(4) + parent_pid(4) + uid(4) + gid(4) + timestamp(8) + comm(16) +const PROC_LIFECYCLE_MIN_SIZE: usize = 25; // without comm +const PROC_LC_PID_OFF: usize = 1; +const PROC_LC_PPID_OFF: usize = 5; +const PROC_LC_UID_OFF: usize = 9; +const PROC_LC_GID_OFF: usize = 13; +const PROC_LC_TS_OFF: usize = 17; +const PROC_LC_COMM_OFF: usize = 25; + +struct ProcLifecycleEventData { + lifecycle_type: u8, + pid: u32, + parent_pid: u32, + uid: u32, + gid: u32, + timestamp: u64, + comm: Vec, +} + +impl TryFrom<&[u8]> for ProcLifecycleEventData { + type Error = Error; + + fn try_from(raw: &[u8]) -> Result { + if raw.len() < PROC_LIFECYCLE_MIN_SIZE { + return Err(ParseEventData(format!( + "proc_lifecycle event too short: {} < {PROC_LIFECYCLE_MIN_SIZE}", + raw.len() + ))); + } + let comm = if raw.len() > PROC_LC_COMM_OFF { + let slice = &raw[PROC_LC_COMM_OFF..]; + match slice.iter().position(|&b| b == b'\0') { + Some(i) => slice[..i].to_vec(), + None => slice.to_vec(), + } + } else { + vec![] + }; + Ok(Self { + lifecycle_type: raw[0], + pid: read_u32_le(&raw[PROC_LC_PID_OFF..]), + parent_pid: read_u32_le(&raw[PROC_LC_PPID_OFF..]), + uid: read_u32_le(&raw[PROC_LC_UID_OFF..]), + gid: read_u32_le(&raw[PROC_LC_GID_OFF..]), + timestamp: read_u64_le(&raw[PROC_LC_TS_OFF..]), + comm, + }) + } +} + +impl From for metric::ProcLifecycleEventData { + fn from(d: ProcLifecycleEventData) -> Self { + Self { + lifecycle_type: d.lifecycle_type as i32, + pid: d.pid, + parent_pid: d.parent_pid, + uid: d.uid, + gid: d.gid, + timestamp: d.timestamp, + comm: d.comm, + } + } +} + +// ── EventData ────────────────────────────────────────────────────────── enum EventData { OtherEvent, IoEvent(IoEventData), + FileOpEvent(FileOpEventData), + PermOpEvent(PermOpEventData), + ProcLifecycleEvent(ProcLifecycleEventData), } impl Debug for EventData { @@ -122,11 +314,26 @@ impl Debug for EventData { d.latency, d.off_bytes )), + EventData::FileOpEvent(d) => f.write_fmt(format_args!( + "FileOpEventData {{ op_type: {}, pid: {}, filename: {} }}", + d.op_type, + d.pid, + str::from_utf8(&d.filename).unwrap_or("") + )), + EventData::PermOpEvent(d) => f.write_fmt(format_args!( + "PermOpEventData {{ op_type: {}, pid: {}, new_uid: {}, new_gid: {} }}", + d.op_type, d.pid, d.new_uid, d.new_gid + )), + EventData::ProcLifecycleEvent(d) => f.write_fmt(format_args!( + "ProcLifecycleEventData {{ type: {}, pid: {}, parent_pid: {} }}", + d.lifecycle_type, d.pid, d.parent_pid + )), _ => f.write_str("other event"), } } } +// ── EventType ────────────────────────────────────────────────────────── #[derive(PartialEq)] pub enum EventType { OtherEvent = 0, @@ -140,6 +347,9 @@ impl From for EventType { fn from(source: u8) -> Self { match source { IO_EVENT => Self::IoEvent, + FILE_OP_EVENT => Self::FileOpEvent, + PERM_OP_EVENT => Self::PermOpEvent, + PROC_LIFECYCLE_EVENT => Self::ProcLifecycleEvent, _ => Self::OtherEvent, } } @@ -163,6 +373,7 @@ impl fmt::Display for EventType { } } +// ── ProcEvent ────────────────────────────────────────────────────────── pub struct ProcEvent { pub pid: u32, pub pod_id: u32, @@ -187,10 +398,25 @@ impl ProcEvent { let mut end_time = 0; match event_type { EventType::IoEvent => { - let io_event_data = IoEventData::try_from(raw_data)?; // Try to parse IoEventData from data.cap_data + let io_event_data = IoEventData::try_from(raw_data)?; end_time = start_time + io_event_data.latency; event_data = EventData::IoEvent(io_event_data); } + EventType::FileOpEvent => { + let d = FileOpEventData::try_from(raw_data)?; + end_time = start_time; + event_data = EventData::FileOpEvent(d); + } + EventType::PermOpEvent => { + let d = PermOpEventData::try_from(raw_data)?; + end_time = start_time; + event_data = EventData::PermOpEvent(d); + } + EventType::ProcLifecycleEvent => { + let d = ProcLifecycleEventData::try_from(raw_data)?; + end_time = start_time; + event_data = EventData::ProcLifecycleEvent(d); + } _ => {} } @@ -242,8 +468,17 @@ impl Sendable for BoxedProcEvents { ..Default::default() }; match self.0.event_data { - EventData::IoEvent(io_event_data) => { - pb_proc_event.io_event_data = Some(io_event_data.into()) + EventData::IoEvent(d) => { + pb_proc_event.io_event_data = Some(d.into()); + } + EventData::FileOpEvent(d) => { + pb_proc_event.file_op_event_data = Some(d.into()); + } + EventData::PermOpEvent(d) => { + pb_proc_event.perm_op_event_data = Some(d.into()); + } + EventData::ProcLifecycleEvent(d) => { + pb_proc_event.proc_lifecycle_event_data = Some(d.into()); } _ => {} } diff --git a/agent/src/ebpf/kernel/include/common.h b/agent/src/ebpf/kernel/include/common.h index 9a89b5e7f80..f3e00f40300 100644 --- a/agent/src/ebpf/kernel/include/common.h +++ b/agent/src/ebpf/kernel/include/common.h @@ -113,6 +113,9 @@ enum process_data_extra_source { DATA_SOURCE_RESERVED, DATA_SOURCE_DPDK, DATA_SOURCE_UNIX_SOCKET, + DATA_SOURCE_FILE_OP_EVENT, + DATA_SOURCE_PERM_OP_EVENT, + DATA_SOURCE_PROC_LIFECYCLE_EVENT, }; struct protocol_message_t { diff --git a/agent/src/platform/platform_synchronizer/proc_scan_hook.rs b/agent/src/platform/platform_synchronizer/proc_scan_hook.rs index 82f434090b6..2cc9e1960e4 100644 --- a/agent/src/platform/platform_synchronizer/proc_scan_hook.rs +++ b/agent/src/platform/platform_synchronizer/proc_scan_hook.rs @@ -17,10 +17,14 @@ use super::ProcessData; pub fn proc_scan_hook(_process_datas: &mut Vec) { - // Enterprise: mark AI Agent processes with biz_type + // Enterprise: clean dead AI Agent PIDs and mark alive ones with biz_type #[cfg(feature = "enterprise")] { if let Some(registry) = enterprise_utils::ai_agent::global_registry() { + // Collect alive PIDs from proc scan to clean stale entries + let alive_pids: Vec = _process_datas.iter().map(|pd| pd.pid as u32).collect(); + registry.cleanup_dead_pids(&alive_pids); + for pd in _process_datas.iter_mut() { if registry.is_ai_agent(pd.pid as u32) { pd.biz_type = crate::common::flow::BIZ_TYPE_AI_AGENT; diff --git a/server/ingester/event/decoder/decoder.go b/server/ingester/event/decoder/decoder.go index 4745a82d674..1427fab62eb 100644 --- a/server/ingester/event/decoder/decoder.go +++ b/server/ingester/event/decoder/decoder.go @@ -196,6 +196,25 @@ func (d *Decoder) WriteFileEvent(vtapId uint16, e *pb.ProcEvent) { s.MountPoint = string(ioData.MountPoint) s.Bytes = ioData.BytesCount s.Duration = uint64(s.EndTime - s.StartTime) + } else if e.FileOpEventData != nil { + d := e.FileOpEventData + s.EventType = strings.ToLower(d.OpType.String()) + s.ProcessKName = string(e.ProcessKname) + s.FileName = string(d.Filename) + s.SyscallThread = e.ThreadId + s.SyscallCoroutine = e.CoroutineId + } else if e.PermOpEventData != nil { + d := e.PermOpEventData + s.EventType = strings.ToLower(d.OpType.String()) + s.ProcessKName = string(e.ProcessKname) + s.SyscallThread = e.ThreadId + s.SyscallCoroutine = e.CoroutineId + } else if e.ProcLifecycleEventData != nil { + d := e.ProcLifecycleEventData + s.EventType = strings.ToLower(d.LifecycleType.String()) + s.ProcessKName = string(d.Comm) + s.SyscallThread = e.ThreadId + s.SyscallCoroutine = e.CoroutineId } s.VTAPID = vtapId s.L3EpcID = d.platformData.QueryVtapEpc0(s.OrgId, vtapId) From cc6bbd820de5f7c04720001c0f8bbcea95df7b5a Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 10 Mar 2026 18:01:29 +0800 Subject: [PATCH 13/45] agent/controller: ai agent reassembly and process biz_type --- agent/src/config/handler.rs | 3 + .../ebpf/kernel/include/protocol_inference.h | 12 +++- .../ebpf/kernel/include/socket_trace_common.h | 5 +- agent/src/ebpf/kernel/socket_trace.bpf.c | 25 ++++++++ agent/src/ebpf/mod.rs | 1 + agent/src/ebpf/user/socket.c | 63 +++++++++++++++++++ agent/src/ebpf/user/socket.h | 1 + agent/src/ebpf_dispatcher.rs | 27 +++++++- .../src/flow_generator/protocol_logs/http.rs | 1 + agent/src/plugin/shared_obj/test.rs | 2 + agent/src/plugin/wasm/test.rs | 2 + server/controller/cloud/cloud.go | 1 + server/controller/cloud/model/model.go | 1 + .../schema/rawsql/mysql/ddl_create_table.sql | 2 + .../schema/rawsql/mysql/issu/7.1.0.34.sql | 32 ++++++++++ .../rawsql/postgres/ddl_create_table.sql | 2 + .../db/metadb/model/platform_rsc_model.go | 1 + server/controller/genesis/updater/sync.go | 1 + server/controller/model/model.go | 1 + .../recorder/cache/diffbase/process.go | 3 + .../recorder/pubsub/message/update.go | 1 + server/controller/recorder/updater/process.go | 5 ++ 22 files changed, 185 insertions(+), 7 deletions(-) create mode 100644 server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.34.sql diff --git a/agent/src/config/handler.rs b/agent/src/config/handler.rs index ac1419fe54f..8aa7c1bff2a 100755 --- a/agent/src/config/handler.rs +++ b/agent/src/config/handler.rs @@ -1299,6 +1299,7 @@ pub struct EbpfConfig { pub agent_id: u16, pub epc_id: u32, pub l7_log_packet_size: usize, + pub ai_agent_max_payload_size: usize, // 静态配置 pub l7_protocol_inference_max_fail_count: usize, pub l7_protocol_inference_ttl: usize, @@ -1324,6 +1325,7 @@ impl fmt::Debug for EbpfConfig { .field("agent_id", &self.agent_id) .field("epc_id", &self.epc_id) .field("l7_log_packet_size", &self.l7_log_packet_size) + .field("ai_agent_max_payload_size", &self.ai_agent_max_payload_size) .field( "l7_protocol_inference_max_fail_count", &self.l7_protocol_inference_max_fail_count, @@ -2390,6 +2392,7 @@ impl TryFrom<(Config, UserConfig)> for ModuleConfig { epc_id: conf.global.common.vpc_id, l7_log_packet_size: crate::ebpf::CAP_LEN_MAX .min(conf.processors.request_log.tunning.payload_truncation as usize), + ai_agent_max_payload_size: conf.inputs.proc.ai_agent.max_payload_size, l7_log_tap_types: generate_tap_types_array( &conf.outputs.flow_log.filters.l7_capture_network_types, ), diff --git a/agent/src/ebpf/kernel/include/protocol_inference.h b/agent/src/ebpf/kernel/include/protocol_inference.h index 400f2f04bbf..203cec8796c 100644 --- a/agent/src/ebpf/kernel/include/protocol_inference.h +++ b/agent/src/ebpf/kernel/include/protocol_inference.h @@ -3942,6 +3942,12 @@ static __inline void check_and_set_data_reassembly(struct conn_info_s tracer_ctx_map__lookup(&k0); if (tracer_ctx == NULL) return; + __u32 data_limit_max = tracer_ctx->data_limit_max; +#ifdef EXTENDED_AI_AGENT_FILE_IO + if (conn_info->socket_info_ptr->is_ai_agent) + data_limit_max = + tracer_ctx->ai_agent_data_limit_max; +#endif /* * Here, the length is checked, and if it has already reached * the configured limit, assembly will not proceed. @@ -3956,9 +3962,9 @@ static __inline void check_and_set_data_reassembly(struct conn_info_s * reassembly is needed (whether to decide to push to the upper layer * for reassembly). */ - if (conn_info->socket_info_ptr->reasm_bytes >= - tracer_ctx->data_limit_max - || conn_info->prev_count > 0) + if ((data_limit_max > 0 && + conn_info->socket_info_ptr->reasm_bytes >= + data_limit_max) || conn_info->prev_count > 0) conn_info->enable_reasm = false; } else { conn_info->enable_reasm = false; diff --git a/agent/src/ebpf/kernel/include/socket_trace_common.h b/agent/src/ebpf/kernel/include/socket_trace_common.h index 9ebfc0efae2..372977959dd 100644 --- a/agent/src/ebpf/kernel/include/socket_trace_common.h +++ b/agent/src/ebpf/kernel/include/socket_trace_common.h @@ -23,6 +23,7 @@ #define DF_BPF_SOCKET_TRACE_COMMON_H #define CAP_DATA_SIZE 1024 // For no-brust send buffer #define BURST_DATA_BUF_SIZE 16384 // For brust send buffer +#define AI_AGENT_DATA_LIMIT_MAX_UNLIMITED 0x7fffffff #include "../config.h" @@ -181,7 +182,8 @@ struct socket_info_s { */ __u16 no_trace:1; __u16 data_source:4; // The source of the stored data, defined in the 'enum process_data_extra_source'. - __u16 unused_bits:7; + __u16 is_ai_agent:1; + __u16 unused_bits:6; __u32 reasm_bytes; // The amount of data bytes that have been reassembled. /* @@ -237,6 +239,7 @@ struct tracer_ctx_s { __u64 coroutine_trace_id; /**< Data forwarding association within the same coroutine */ __u64 thread_trace_id; /**< Data forwarding association within the same process/thread, used for multi-transaction scenarios */ __u32 data_limit_max; /**< Maximum number of data transfers */ + __u32 ai_agent_data_limit_max; /**< AI Agent max reassembly limit (0 = unlimited) */ __u32 go_tracing_timeout; /**< Go tracing timeout */ __u32 io_event_collect_mode; /**< IO event collection mode */ __u64 io_event_minimal_duration; /**< Minimum duration for IO events */ diff --git a/agent/src/ebpf/kernel/socket_trace.bpf.c b/agent/src/ebpf/kernel/socket_trace.bpf.c index 291cf8bed06..db4e715c37b 100644 --- a/agent/src/ebpf/kernel/socket_trace.bpf.c +++ b/agent/src/ebpf/kernel/socket_trace.bpf.c @@ -39,6 +39,10 @@ #define __user +#ifdef EXTENDED_AI_AGENT_FILE_IO +static __inline int is_ai_agent_process(__u64 pid_tgid); +#endif + /* *INDENT-OFF* */ /*********************************************************** * map definitions @@ -1364,6 +1368,14 @@ __data_submit(struct pt_regs *ctx, struct conn_info_s *conn_info, * so they are saved here. */ int data_max_sz = tracer_ctx->data_limit_max; +#ifdef EXTENDED_AI_AGENT_FILE_IO + bool is_ai_agent = is_ai_agent_process(pid_tgid); + if (is_ai_agent) { + __u32 ai_limit = tracer_ctx->ai_agent_data_limit_max; + data_max_sz = ai_limit == 0 ? + AI_AGENT_DATA_LIMIT_MAX_UNLIMITED : ai_limit; + } +#endif struct trace_stats *trace_stats = trace_stats_map__lookup(&k0); if (trace_stats == NULL) @@ -1443,6 +1455,9 @@ __data_submit(struct pt_regs *ctx, struct conn_info_s *conn_info, syscall_len > data_max_sz ? data_max_sz : syscall_len; } +#ifdef EXTENDED_AI_AGENT_FILE_IO + sk_info->is_ai_agent = is_ai_agent; +#endif sk_info->direction = conn_info->direction; sk_info->pre_direction = conn_info->direction; sk_info->role = conn_info->role; @@ -1525,6 +1540,9 @@ __data_submit(struct pt_regs *ctx, struct conn_info_s *conn_info, if (is_socket_info_valid(socket_info_ptr)) { sk_info->uid = socket_info_ptr->uid; sk_info->allow_reassembly = socket_info_ptr->allow_reassembly; +#ifdef EXTENDED_AI_AGENT_FILE_IO + socket_info_ptr->is_ai_agent = is_ai_agent; +#endif /* * The kernel syscall interface determines that it is the TLS @@ -2870,6 +2888,13 @@ static __inline void __push_close_event(__u64 pid_tgid, __u64 uid, __u64 seq, if (tracer_ctx == NULL) return; int data_max_sz = tracer_ctx->data_limit_max; +#ifdef EXTENDED_AI_AGENT_FILE_IO + if (is_ai_agent_process(pid_tgid)) { + __u32 ai_limit = tracer_ctx->ai_agent_data_limit_max; + data_max_sz = ai_limit == 0 ? + AI_AGENT_DATA_LIMIT_MAX_UNLIMITED : ai_limit; + } +#endif struct __socket_data_buffer *v_buff = bpf_map_lookup_elem(&NAME(data_buf), &k0); if (!v_buff) diff --git a/agent/src/ebpf/mod.rs b/agent/src/ebpf/mod.rs index 7ef46c56ec2..a7efbab0420 100644 --- a/agent/src/ebpf/mod.rs +++ b/agent/src/ebpf/mod.rs @@ -574,6 +574,7 @@ extern "C" { * @return the set maximum buffer size value on success, < 0 on failure. */ pub fn set_data_limit_max(limit_size: c_int) -> c_int; + pub fn set_ai_agent_data_limit_max(limit_size: c_int) -> c_int; pub fn set_go_tracing_timeout(timeout: c_int) -> c_int; pub fn set_io_event_collect_mode(mode: c_int) -> c_int; pub fn set_io_event_minimal_duration(duration: c_ulonglong) -> c_int; diff --git a/agent/src/ebpf/user/socket.c b/agent/src/ebpf/user/socket.c index 3ba3d766fb3..31668a01856 100644 --- a/agent/src/ebpf/user/socket.c +++ b/agent/src/ebpf/user/socket.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include "clib.h" @@ -117,6 +118,7 @@ static pthread_mutex_t datadump_mutex; * Set by set_data_limit_max() */ static uint32_t socket_data_limit_max; +static uint32_t ai_agent_data_limit_max; static uint32_t go_tracing_timeout = GO_TRACING_TIMEOUT_DEFAULT; @@ -2295,6 +2297,25 @@ static inline int __set_data_limit_max(int limit_size) return socket_data_limit_max; } +static inline int __set_ai_agent_data_limit_max(int limit_size) +{ + if (limit_size < 0) { + ebpf_warning("ai_agent limit_size cannot be negative\n"); + return ETR_INVAL; + } else if (limit_size == 0) { + ai_agent_data_limit_max = 0; + } else if (limit_size > INT_MAX) { + ai_agent_data_limit_max = INT_MAX; + } else { + ai_agent_data_limit_max = limit_size; + } + + ebpf_info("Received ai_agent limit_size (%d), the final value is set to '%u'\n", + limit_size, ai_agent_data_limit_max); + + return ai_agent_data_limit_max; +} + /** * Set maximum amount of data passed to the agent by eBPF programe. * @limit_size : The maximum length of data. If @limit_size exceeds 16384, @@ -2343,6 +2364,44 @@ int set_data_limit_max(int limit_size) return set_val; } +int set_ai_agent_data_limit_max(int limit_size) +{ + int set_val = __set_ai_agent_data_limit_max(limit_size); + if (set_val < 0) + return set_val; + + struct bpf_tracer *tracer = find_bpf_tracer(SK_TRACER_NAME); + if (tracer == NULL) { + /* + * Called before running_socket_tracer(), + * no need to update config map + */ + return set_val; + } + + int cpu; + int nr_cpus = get_num_possible_cpus(); + struct tracer_ctx_s values[nr_cpus]; + memset(values, 0, sizeof(values)); + + if (!bpf_table_get_value(tracer, MAP_TRACER_CTX_NAME, 0, values)) { + ebpf_warning("Get map '%s' failed.\n", MAP_TRACER_CTX_NAME); + return ETR_NOTEXIST; + } + + for (cpu = 0; cpu < nr_cpus; cpu++) { + values[cpu].ai_agent_data_limit_max = set_val; + } + + if (!bpf_table_set_value + (tracer, MAP_TRACER_CTX_NAME, 0, (void *)&values)) { + ebpf_warning("Set '%s' failed\n", MAP_TRACER_CTX_NAME); + return ETR_UPDATE_MAP_FAILD; + } + + return set_val; +} + int set_go_tracing_timeout(int timeout) { go_tracing_timeout = timeout; @@ -3028,6 +3087,8 @@ int running_socket_tracer(tracer_callback_t handle, // Set default maximum amount of data passed to the agent by eBPF. if (socket_data_limit_max == 0) __set_data_limit_max(0); + if (ai_agent_data_limit_max == 0) + __set_ai_agent_data_limit_max(0); uint64_t uid_base = (gettime(CLOCK_REALTIME, TIME_TYPE_NAN) / 100) & 0xffffffffffffffULL; @@ -3042,6 +3103,7 @@ int running_socket_tracer(tracer_callback_t handle, t_conf[cpu].coroutine_trace_id = t_conf[cpu].socket_id; t_conf[cpu].thread_trace_id = t_conf[cpu].socket_id; t_conf[cpu].data_limit_max = socket_data_limit_max; + t_conf[cpu].ai_agent_data_limit_max = ai_agent_data_limit_max; t_conf[cpu].io_event_collect_mode = io_event_collect_mode; t_conf[cpu].io_event_minimal_duration = io_event_minimal_duration; @@ -3056,6 +3118,7 @@ int running_socket_tracer(tracer_callback_t handle, return -EINVAL; ebpf_info("Config socket_data_limit_max: %d\n", socket_data_limit_max); + ebpf_info("Config ai_agent_data_limit_max: %u\n", ai_agent_data_limit_max); ebpf_info("Config io_event_collect_mode: %d\n", io_event_collect_mode); ebpf_info("Config io_event_minimal_duration: %llu ns\n", io_event_minimal_duration); ebpf_info("Config virtual_file_collect_enable: %d\n", virtual_file_collect_enable); diff --git a/agent/src/ebpf/user/socket.h b/agent/src/ebpf/user/socket.h index b09799fce57..fd5e72fcd74 100644 --- a/agent/src/ebpf/user/socket.h +++ b/agent/src/ebpf/user/socket.h @@ -408,6 +408,7 @@ prefetch_and_process_data(struct bpf_tracer *t, int id, int nb_rx, void **datas_ } int set_data_limit_max(int limit_size); +int set_ai_agent_data_limit_max(int limit_size); int set_go_tracing_timeout(int timeout); int set_io_event_collect_mode(uint32_t mode); int set_io_event_minimal_duration(uint64_t duration); diff --git a/agent/src/ebpf_dispatcher.rs b/agent/src/ebpf_dispatcher.rs index 9abdad6bd3c..330761dc285 100644 --- a/agent/src/ebpf_dispatcher.rs +++ b/agent/src/ebpf_dispatcher.rs @@ -1064,7 +1064,7 @@ impl EbpfCollector { return Err(Error::EbpfRunningError); } - Self::ebpf_on_config_change(config.l7_log_packet_size); + Self::ebpf_on_config_change(config.l7_log_packet_size, config.ai_agent_max_payload_size); let ebpf_conf = &config.ebpf; let on_cpu = &ebpf_conf.profile.on_cpu; @@ -1298,7 +1298,7 @@ impl EbpfCollector { Ok(handle) } - fn ebpf_on_config_change(l7_log_packet_size: usize) { + fn ebpf_on_config_change(l7_log_packet_size: usize, ai_agent_max_payload_size: usize) { unsafe { let n = ebpf::set_data_limit_max(l7_log_packet_size as c_int); if n < 0 { @@ -1312,6 +1312,24 @@ impl EbpfCollector { l7_log_packet_size, n ); } + + let ai_agent_limit = if ai_agent_max_payload_size == 0 { + 0 + } else { + ai_agent_max_payload_size.min(i32::MAX as usize) as c_int + }; + let n = ebpf::set_ai_agent_data_limit_max(ai_agent_limit); + if n < 0 { + warn!( + "ebpf set ai_agent_max_payload_size({}) failed.", + ai_agent_max_payload_size + ); + } else if ai_agent_limit != 0 && n != ai_agent_limit { + info!( + "ebpf set ai_agent_max_payload_size to {}, actual effective configuration is {}.", + ai_agent_max_payload_size, n + ); + } } } @@ -1520,7 +1538,10 @@ impl EbpfCollector { } } - Self::ebpf_on_config_change(config.l7_log_packet_size); + Self::ebpf_on_config_change( + config.l7_log_packet_size, + config.ai_agent_max_payload_size, + ); #[cfg(feature = "extended_observability")] { diff --git a/agent/src/flow_generator/protocol_logs/http.rs b/agent/src/flow_generator/protocol_logs/http.rs index 17e99c2fab4..803ff63f21e 100644 --- a/agent/src/flow_generator/protocol_logs/http.rs +++ b/agent/src/flow_generator/protocol_logs/http.rs @@ -2840,6 +2840,7 @@ mod tests { iso8583_parse_conf: Iso8583ParseConfig::default(), web_sphere_mq_parse_conf: WebSphereMqParseConfig::default(), icmp_data: None, + process_id: 0, }; //测试长度不正确 diff --git a/agent/src/plugin/shared_obj/test.rs b/agent/src/plugin/shared_obj/test.rs index 79c47ba739e..adf1a19ad52 100644 --- a/agent/src/plugin/shared_obj/test.rs +++ b/agent/src/plugin/shared_obj/test.rs @@ -92,6 +92,7 @@ fn get_req_param<'a>( iso8583_parse_conf: Iso8583ParseConfig::default(), web_sphere_mq_parse_conf: WebSphereMqParseConfig::default(), icmp_data: None, + process_id: 0, } } @@ -132,6 +133,7 @@ fn get_resp_param<'a>( iso8583_parse_conf: Iso8583ParseConfig::default(), web_sphere_mq_parse_conf: WebSphereMqParseConfig::default(), icmp_data: None, + process_id: 0, } } diff --git a/agent/src/plugin/wasm/test.rs b/agent/src/plugin/wasm/test.rs index 35e8b659c19..72b2c5a1c8d 100644 --- a/agent/src/plugin/wasm/test.rs +++ b/agent/src/plugin/wasm/test.rs @@ -84,6 +84,7 @@ fn get_req_param<'a>( iso8583_parse_conf: Iso8583ParseConfig::default(), web_sphere_mq_parse_conf: WebSphereMqParseConfig::default(), icmp_data: None, + process_id: 0, } } @@ -125,6 +126,7 @@ fn get_resq_param<'a>( iso8583_parse_conf: Iso8583ParseConfig::default(), web_sphere_mq_parse_conf: WebSphereMqParseConfig::default(), icmp_data: None, + process_id: 0, } } diff --git a/server/controller/cloud/cloud.go b/server/controller/cloud/cloud.go index df42d04ee56..3c3a25ffb4b 100644 --- a/server/controller/cloud/cloud.go +++ b/server/controller/cloud/cloud.go @@ -693,6 +693,7 @@ func (c *Cloud) appendResourceProcess(resource model.Resource) model.Resource { PID: sProcess.PID, NetnsID: sProcess.NetnsID, ProcessName: processName, + BizType: sProcess.BizType, CommandLine: sProcess.CMDLine, UserName: sProcess.UserName, ContainerID: sProcess.ContainerID, diff --git a/server/controller/cloud/model/model.go b/server/controller/cloud/model/model.go index bdf307d4367..0eda6e3e9a7 100644 --- a/server/controller/cloud/model/model.go +++ b/server/controller/cloud/model/model.go @@ -680,6 +680,7 @@ type Process struct { VTapID uint32 `json:"vtap_id" binding:"required"` PID uint64 `json:"pid" binding:"required"` ProcessName string `json:"process_name" binding:"required"` + BizType int `json:"biz_type"` CommandLine string `json:"command_line"` UserName string `json:"user_name"` StartTime time.Time `json:"start_time" binding:"required"` diff --git a/server/controller/db/metadb/migrator/schema/rawsql/mysql/ddl_create_table.sql b/server/controller/db/metadb/migrator/schema/rawsql/mysql/ddl_create_table.sql index ffb6aacfd47..b58354a220e 100644 --- a/server/controller/db/metadb/migrator/schema/rawsql/mysql/ddl_create_table.sql +++ b/server/controller/db/metadb/migrator/schema/rawsql/mysql/ddl_create_table.sql @@ -1256,6 +1256,7 @@ CREATE TABLE IF NOT EXISTS process ( vm_id INTEGER, epc_id INTEGER, process_name TEXT, + biz_type INTEGER DEFAULT 0, command_line TEXT, user_name VARCHAR(256) DEFAULT '', start_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, @@ -1492,6 +1493,7 @@ CREATE TABLE IF NOT EXISTS genesis_process ( lcuuid CHAR(64) DEFAULT '', name TEXT, process_name TEXT, + biz_type INTEGER DEFAULT 0, cmd_line TEXT, user_name VARCHAR(256) DEFAULT '', container_id CHAR(64) DEFAULT '', diff --git a/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.34.sql b/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.34.sql new file mode 100644 index 00000000000..a39082f9a1a --- /dev/null +++ b/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.34.sql @@ -0,0 +1,32 @@ +DROP PROCEDURE IF EXISTS AddColumnIfNotExists; + +CREATE PROCEDURE AddColumnIfNotExists( + IN tableName VARCHAR(255), + IN colName VARCHAR(255), + IN colType VARCHAR(255), + IN afterCol VARCHAR(255) +) +BEGIN + DECLARE column_count INT; + + SELECT COUNT(*) + INTO column_count + FROM information_schema.columns + WHERE TABLE_SCHEMA = DATABASE() + AND TABLE_NAME = tableName + AND column_name = colName; + + IF column_count = 0 THEN + SET @sql = CONCAT('ALTER TABLE ', tableName, ' ADD COLUMN ', colName, ' ', colType, ' AFTER ', afterCol); + PREPARE stmt FROM @sql; + EXECUTE stmt; + DEALLOCATE PREPARE stmt; + END IF; +END; + +CALL AddColumnIfNotExists('process', 'biz_type', 'INTEGER DEFAULT 0', 'process_name'); +CALL AddColumnIfNotExists('genesis_process', 'biz_type', 'INTEGER DEFAULT 0', 'process_name'); + +DROP PROCEDURE AddColumnIfNotExists; + +UPDATE db_version SET version='7.1.0.34'; diff --git a/server/controller/db/metadb/migrator/schema/rawsql/postgres/ddl_create_table.sql b/server/controller/db/metadb/migrator/schema/rawsql/postgres/ddl_create_table.sql index f0965b29919..e86049e201b 100644 --- a/server/controller/db/metadb/migrator/schema/rawsql/postgres/ddl_create_table.sql +++ b/server/controller/db/metadb/migrator/schema/rawsql/postgres/ddl_create_table.sql @@ -1369,6 +1369,7 @@ CREATE TABLE IF NOT EXISTS process ( vm_id INTEGER, epc_id INTEGER, process_name TEXT, + biz_type INTEGER DEFAULT 0, command_line TEXT, user_name VARCHAR(256) DEFAULT '', start_time TIMESTAMP NOT NULL DEFAULT NOW(), @@ -1615,6 +1616,7 @@ CREATE TABLE IF NOT EXISTS genesis_process ( lcuuid VARCHAR(64) DEFAULT '', name TEXT, process_name TEXT, + biz_type INTEGER DEFAULT 0, cmd_line TEXT, user_name VARCHAR(256) DEFAULT '', container_id VARCHAR(64) DEFAULT '', diff --git a/server/controller/db/metadb/model/platform_rsc_model.go b/server/controller/db/metadb/model/platform_rsc_model.go index 3f9e007d919..033a0128ddb 100644 --- a/server/controller/db/metadb/model/platform_rsc_model.go +++ b/server/controller/db/metadb/model/platform_rsc_model.go @@ -94,6 +94,7 @@ type Process struct { VMID int `gorm:"column:vm_id;type:int;default:null" json:"VM_ID" mapstructure:"VM_ID"` VPCID int `gorm:"column:epc_id;type:int;default:null" json:"EPC_ID" mapstructure:"EPC_ID"` ProcessName string `gorm:"column:process_name;type:varchar(256);default:''" json:"PROCESS_NAME" mapstructure:"PROCESS_NAME"` + BizType int `gorm:"column:biz_type;type:int;default:0" json:"BIZ_TYPE" mapstructure:"BIZ_TYPE"` CommandLine string `gorm:"column:command_line;type:text" json:"COMMAND_LINE" mapstructure:"CMD_LINE"` UserName string `gorm:"column:user_name;type:varchar(256);default:''" json:"USER_NAME" mapstructure:"USER_NAME"` StartTime time.Time `gorm:"autoCreateTime;column:start_time;type:datetime" json:"START_TIME" mapstructure:"START_TIME"` diff --git a/server/controller/genesis/updater/sync.go b/server/controller/genesis/updater/sync.go index b546aea0a90..485d604ff67 100644 --- a/server/controller/genesis/updater/sync.go +++ b/server/controller/genesis/updater/sync.go @@ -539,6 +539,7 @@ func (v *GenesisSyncRpcUpdater) ParseProcessInfo(orgID int, vtapID uint32, messa NetnsID: p.GetNetnsId(), Name: p.GetName(), ProcessName: p.GetProcessName(), + BizType: int(p.GetBizType()), CMDLine: p.GetCmdline(), UserName: p.GetUser(), ContainerID: p.GetContainerId(), diff --git a/server/controller/model/model.go b/server/controller/model/model.go index 8dd7d4ab8da..a8957356514 100644 --- a/server/controller/model/model.go +++ b/server/controller/model/model.go @@ -752,6 +752,7 @@ type GenesisProcess struct { Lcuuid string `gorm:"primaryKey;column:lcuuid;type:char(64)" json:"LCUUID"` Name string `gorm:"column:name;type:text;default:null" json:"NAME"` ProcessName string `gorm:"column:process_name;type:text;default:null" json:"PROCESS_NAME"` + BizType int `gorm:"column:biz_type;type:int;default:0" json:"BIZ_TYPE"` CMDLine string `gorm:"column:cmd_line;type:text;default:null" json:"CMD_LINE"` ContainerID string `gorm:"column:container_id;type:char(64);default:''" json:"CONTAINER_ID"` UserName string `gorm:"column:user_name;type:varchar(256);default:null" json:"USER"` diff --git a/server/controller/recorder/cache/diffbase/process.go b/server/controller/recorder/cache/diffbase/process.go index 91c7afa7c99..5b2ba9c6049 100644 --- a/server/controller/recorder/cache/diffbase/process.go +++ b/server/controller/recorder/cache/diffbase/process.go @@ -34,6 +34,7 @@ func (b *DataSet) AddProcess(dbItem *metadbmodel.Process, seq int) { ContainerID: dbItem.ContainerID, DeviceType: dbItem.DeviceType, DeviceID: dbItem.DeviceID, + BizType: dbItem.BizType, } b.GetLogFunc()(addDiffBase(ctrlrcommon.RESOURCE_TYPE_PROCESS_EN, b.Process[dbItem.Lcuuid]), b.metadata.LogPrefixes) } @@ -50,12 +51,14 @@ type Process struct { ContainerID string `json:"container_id"` DeviceType int `json:"device_type"` DeviceID int `json:"device_id"` + BizType int `json:"biz_type"` } func (p *Process) Update(cloudItem *cloudmodel.Process, toolDataSet *tool.DataSet) { p.Name = cloudItem.Name p.OSAPPTags = cloudItem.OSAPPTags p.ContainerID = cloudItem.ContainerID + p.BizType = cloudItem.BizType deviceType, deviceID := toolDataSet.GetProcessDeviceTypeAndID(cloudItem.ContainerID, cloudItem.VTapID) if p.DeviceType != deviceType || p.DeviceID != deviceID { p.DeviceType = deviceType diff --git a/server/controller/recorder/pubsub/message/update.go b/server/controller/recorder/pubsub/message/update.go index ef32538d5d7..f8301ab673b 100644 --- a/server/controller/recorder/pubsub/message/update.go +++ b/server/controller/recorder/pubsub/message/update.go @@ -665,6 +665,7 @@ type UpdatedProcessFields struct { ProcessName fieldDetail[string] ContainerID fieldDetail[string] OSAPPTags fieldDetail[string] + BizType fieldDetail[int] VMID fieldDetail[int] VPCID fieldDetail[int] GID fieldDetail[uint32] diff --git a/server/controller/recorder/updater/process.go b/server/controller/recorder/updater/process.go index 945eae749df..270ad05d807 100644 --- a/server/controller/recorder/updater/process.go +++ b/server/controller/recorder/updater/process.go @@ -121,6 +121,7 @@ func (p *Process) generateDBItemToAdd(cloudItem *cloudmodel.Process) (*metadbmod VTapID: cloudItem.VTapID, PID: cloudItem.PID, ProcessName: cloudItem.ProcessName, + BizType: cloudItem.BizType, CommandLine: cloudItem.CommandLine, StartTime: cloudItem.StartTime, UserName: cloudItem.UserName, @@ -156,6 +157,10 @@ func (p *Process) generateUpdateInfo(diffBase *diffbase.Process, cloudItem *clou mapInfo["os_app_tags"] = cloudItem.OSAPPTags structInfo.OSAPPTags.Set(diffBase.OSAPPTags, cloudItem.OSAPPTags) } + if diffBase.BizType != cloudItem.BizType { + mapInfo["biz_type"] = cloudItem.BizType + structInfo.BizType.Set(diffBase.BizType, cloudItem.BizType) + } if diffBase.ContainerID != cloudItem.ContainerID { mapInfo["container_id"] = cloudItem.ContainerID structInfo.ContainerID.Set(diffBase.ContainerID, cloudItem.ContainerID) From 2d70889bd3c90ffd8e8671f76e3131d9b264b812 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 10 Mar 2026 20:47:11 +0800 Subject: [PATCH 14/45] server: add ai_agent config and bump db version --- server/agent_config/template.yaml | 53 +++++++++++++++++++ .../db/metadb/migrator/schema/const.go | 2 +- 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/server/agent_config/template.yaml b/server/agent_config/template.yaml index c1c4aae0f0e..5eacb6b6ade 100644 --- a/server/agent_config/template.yaml +++ b/server/agent_config/template.yaml @@ -1666,6 +1666,59 @@ inputs: enabled_features: [proc.gprocess_info] # type: section # name: + # en: AI Agent + # ch: 智能体治理 + # description: + ai_agent: + # type: string + # name: + # en: HTTP Endpoints + # ch: HTTP 端点 + # unit: + # range: [] + # enum_options: [] + # modification: hot_update + # ee_feature: true + # description: + # en: |- + # HTTP endpoints for AI agent recognition. Requests that match any prefix will mark the process as AI Agent. + # ch: |- + # 用于识别智能体的 HTTP 端点前缀,命中后会标记进程为 AI Agent。 + http_endpoints: + - /v1/chat/completions + - /v1/embeddings + # type: int + # name: + # en: Max Payload Size + # ch: 最大载荷大小 + # unit: byte + # range: [0, 2147483647] + # enum_options: [] + # modification: hot_update + # ee_feature: true + # description: + # en: |- + # Maximum payload size for AI agent reassembly. 0 means unlimited. + # ch: |- + # AI Agent 流重组最大载荷大小,0 表示不限。 + max_payload_size: 0 + # type: bool + # name: + # en: File IO Enabled + # ch: 文件 IO 事件 + # unit: + # range: [] + # enum_options: [] + # modification: hot_update + # ee_feature: true + # description: + # en: |- + # Whether to enable AI Agent file IO event collection. + # ch: |- + # 是否开启 AI Agent 文件 IO 事件采集。 + file_io_enabled: true + # type: section + # name: # en: Symbol Table # ch: 符号表 # description: diff --git a/server/controller/db/metadb/migrator/schema/const.go b/server/controller/db/metadb/migrator/schema/const.go index 4e0e57afb2e..43171fa1733 100644 --- a/server/controller/db/metadb/migrator/schema/const.go +++ b/server/controller/db/metadb/migrator/schema/const.go @@ -20,5 +20,5 @@ const ( RAW_SQL_ROOT_DIR = "/etc/metadb/schema/rawsql" DB_VERSION_TABLE = "db_version" - DB_VERSION_EXPECTED = "7.1.0.33" + DB_VERSION_EXPECTED = "7.1.0.34" ) From 4ea530f8317e2c0056182f686862338ba18c843b Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 11 Mar 2026 10:10:59 +0800 Subject: [PATCH 15/45] agent: extend endpoints for agent judgement --- agent/src/config/config.rs | 1 + server/agent_config/template.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/agent/src/config/config.rs b/agent/src/config/config.rs index abb9f6766f2..aa633524d68 100644 --- a/agent/src/config/config.rs +++ b/agent/src/config/config.rs @@ -572,6 +572,7 @@ impl Default for AiAgentConfig { http_endpoints: vec![ "/v1/chat/completions".to_string(), "/v1/embeddings".to_string(), + "/v1/responses".to_string(), ], max_payload_size: 0, // 0 means unlimited file_io_enabled: true, diff --git a/server/agent_config/template.yaml b/server/agent_config/template.yaml index 5eacb6b6ade..76b4da85189 100644 --- a/server/agent_config/template.yaml +++ b/server/agent_config/template.yaml @@ -1687,6 +1687,7 @@ inputs: http_endpoints: - /v1/chat/completions - /v1/embeddings + - /v1/responses # type: int # name: # en: Max Payload Size From e45c6b07f085517b64fa73e154cd478a158f0a57 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 11 Mar 2026 12:21:16 +0800 Subject: [PATCH 16/45] agent: fix ai-agent tracepoint hooks --- agent/src/ebpf/Makefile | 1 + agent/src/ebpf/kernel/socket_trace.bpf.c | 15 ++++++++++++++ agent/src/ebpf/kernel/uprobe_base.bpf.c | 12 ++++++++++++ agent/src/ebpf/user/socket.c | 25 ++++++++++++++++++++++++ 4 files changed, 53 insertions(+) diff --git a/agent/src/ebpf/Makefile b/agent/src/ebpf/Makefile index 687c5a7bb45..d377e8afdcc 100644 --- a/agent/src/ebpf/Makefile +++ b/agent/src/ebpf/Makefile @@ -117,6 +117,7 @@ ifeq ($(GCC_VER_GTE71),1) CFLAGS += -Wformat-truncation=0 endif CFLAGS += $(JAVA_AGENT_MACROS) +CFLAGS += $(EXTRA_CFLAGS) CTLSRC := user/utils.c user/ctrl_tracer.c user/ctrl.c user/log.c CTLBIN := deepflow-ebpfctl diff --git a/agent/src/ebpf/kernel/socket_trace.bpf.c b/agent/src/ebpf/kernel/socket_trace.bpf.c index db4e715c37b..8af68f6c983 100644 --- a/agent/src/ebpf/kernel/socket_trace.bpf.c +++ b/agent/src/ebpf/kernel/socket_trace.bpf.c @@ -40,7 +40,22 @@ #define __user #ifdef EXTENDED_AI_AGENT_FILE_IO +#ifndef AI_AGENT_PROC_FORK +#define AI_AGENT_PROC_FORK 1 +#define AI_AGENT_PROC_EXEC 2 +#define AI_AGENT_PROC_EXIT 3 +#endif + static __inline int is_ai_agent_process(__u64 pid_tgid); +static __inline int ai_agent_submit_event(void *ctx, __u8 source, + void *event, __u32 event_sz, + __u64 pid_tgid); +static __inline int ai_agent_emit_proc_event(void *ctx, __u8 event_type, + __u32 pid, __u32 parent_pid, + __u64 pid_tgid); +static __inline void ai_agent_cleanup_proc_pid(__u32 tgid); +static __inline int ai_agent_on_fork(void *ctx, __u32 parent_tgid, + __u32 child_tgid); #endif /* *INDENT-OFF* */ diff --git a/agent/src/ebpf/kernel/uprobe_base.bpf.c b/agent/src/ebpf/kernel/uprobe_base.bpf.c index 102c2117dc4..60157454ab0 100644 --- a/agent/src/ebpf/kernel/uprobe_base.bpf.c +++ b/agent/src/ebpf/kernel/uprobe_base.bpf.c @@ -636,6 +636,13 @@ static __inline int do_process_exit(void *ctx) bpf_get_current_comm(data.name, sizeof(data.name)); bpf_perf_event_output(ctx, &NAME(socket_data), BPF_F_CURRENT_CPU, &data, sizeof(data)); +#ifdef EXTENDED_AI_AGENT_FILE_IO + if (is_ai_agent_process(id)) { + ai_agent_emit_proc_event(ctx, AI_AGENT_PROC_EXIT, + pid, 0, id); + ai_agent_cleanup_proc_pid(pid); + } +#endif } bpf_map_delete_elem(&goroutines_map, &id); @@ -747,6 +754,11 @@ static __inline int __process_exec(void *ctx) bpf_perf_event_output(ctx, &NAME(socket_data), BPF_F_CURRENT_CPU, &data, sizeof(data)); } +#ifdef EXTENDED_AI_AGENT_FILE_IO + if (is_ai_agent_process(id)) { + ai_agent_emit_proc_event(ctx, AI_AGENT_PROC_EXEC, pid, 0, id); + } +#endif return 0; } diff --git a/agent/src/ebpf/user/socket.c b/agent/src/ebpf/user/socket.c index 31668a01856..da9f1f710cc 100644 --- a/agent/src/ebpf/user/socket.c +++ b/agent/src/ebpf/user/socket.c @@ -274,6 +274,28 @@ static inline void config_probes_for_proc_event(struct tracer_probes_conf *tps) } } +#ifdef EXTENDED_AI_AGENT_FILE_IO +static inline void config_probes_for_ai_agent(struct tracer_probes_conf *tps) +{ + tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_openat"); + tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_unlinkat"); + tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_fchmodat"); + tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_fchownat"); + tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_setuid"); + tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_setgid"); + tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_setreuid"); + tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_setregid"); + tps_set_symbol(tps, "tracepoint/sched/sched_process_fork"); + tps_set_symbol(tps, "tracepoint/sched/sched_process_exec"); + tps_set_symbol(tps, "tracepoint/sched/sched_process_exit"); +} +#else +static inline void config_probes_for_ai_agent(struct tracer_probes_conf *tps) +{ + (void)tps; +} +#endif + static void config_probes_for_kfunc(struct tracer_probes_conf *tps) { kfunc_set_sym_for_entry_and_exit(tps, "ksys_write"); @@ -294,6 +316,7 @@ static void config_probes_for_kfunc(struct tracer_probes_conf *tps) kfunc_set_symbol(tps, "__sys_accept4", true); kfunc_set_symbol(tps, "__sys_connect", false); config_probes_for_proc_event(tps); + config_probes_for_ai_agent(tps); /* * On certain kernels, such as 5.15.0-127-generic and 5.10.134-18.al8.x86_64, @@ -404,6 +427,7 @@ static void config_probes_for_kprobe_and_tracepoint(struct tracer_probes_conf tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_pwritev2"); tps_set_symbol(tps, "tracepoint/syscalls/sys_exit_pwritev2"); } + config_probes_for_ai_agent(tps); } static inline void __config_kprobe(struct tracer_probes_conf *tps, @@ -467,6 +491,7 @@ static void config_probes_for_kprobe(struct tracer_probes_conf *tps) probes_set_enter_symbol(tps, "__close_fd"); probes_set_exit_symbol(tps, "__sys_socket"); probes_set_enter_symbol(tps, "__sys_connect"); + config_probes_for_ai_agent(tps); } static void socket_tracer_set_probes(struct tracer_probes_conf *tps) From c0d332936e075b2ffae539f9df11ea36522bac55 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 11 Mar 2026 12:58:34 +0800 Subject: [PATCH 17/45] agent: add ai-agent chmod/chown/unlink tracepoints --- agent/src/ebpf/user/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/agent/src/ebpf/user/socket.c b/agent/src/ebpf/user/socket.c index da9f1f710cc..ecab0967583 100644 --- a/agent/src/ebpf/user/socket.c +++ b/agent/src/ebpf/user/socket.c @@ -279,8 +279,11 @@ static inline void config_probes_for_ai_agent(struct tracer_probes_conf *tps) { tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_openat"); tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_unlinkat"); + tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_unlink"); tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_fchmodat"); + tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_chmod"); tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_fchownat"); + tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_chown"); tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_setuid"); tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_setgid"); tps_set_symbol(tps, "tracepoint/syscalls/sys_enter_setreuid"); From 2aea671a0bc303923214b3678de340f6d7f36faa Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 11 Mar 2026 13:43:07 +0800 Subject: [PATCH 18/45] Fix AI Agent cleanup using full proc scan --- .../platform_synchronizer/linux_process.rs | 2 +- .../platform_synchronizer/proc_scan_hook.rs | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/agent/src/platform/platform_synchronizer/linux_process.rs b/agent/src/platform/platform_synchronizer/linux_process.rs index 7c2ccede5ef..5a5865ceaed 100644 --- a/agent/src/platform/platform_synchronizer/linux_process.rs +++ b/agent/src/platform/platform_synchronizer/linux_process.rs @@ -368,7 +368,7 @@ pub(crate) fn get_all_process_in(conf: &OsProcScanConfig, ret: &mut Vec ProcResult { diff --git a/agent/src/platform/platform_synchronizer/proc_scan_hook.rs b/agent/src/platform/platform_synchronizer/proc_scan_hook.rs index 2cc9e1960e4..2eee1f343bb 100644 --- a/agent/src/platform/platform_synchronizer/proc_scan_hook.rs +++ b/agent/src/platform/platform_synchronizer/proc_scan_hook.rs @@ -16,13 +16,21 @@ use super::ProcessData; -pub fn proc_scan_hook(_process_datas: &mut Vec) { +pub fn proc_scan_hook(proc_root: &str, _process_datas: &mut Vec) { // Enterprise: clean dead AI Agent PIDs and mark alive ones with biz_type #[cfg(feature = "enterprise")] { if let Some(registry) = enterprise_utils::ai_agent::global_registry() { - // Collect alive PIDs from proc scan to clean stale entries - let alive_pids: Vec = _process_datas.iter().map(|pd| pd.pid as u32).collect(); + // Use a full /proc scan for cleanup to avoid filtering out short-lived processes + // that are not yet eligible for os_proc_socket_min_lifetime. + let alive_pids: Vec = match procfs::process::all_processes_with_root(proc_root) { + Ok(procs) => procs + .into_iter() + .filter_map(|p| p.ok()) + .map(|p| p.pid as u32) + .collect(), + Err(_) => _process_datas.iter().map(|pd| pd.pid as u32).collect(), + }; registry.cleanup_dead_pids(&alive_pids); for pd in _process_datas.iter_mut() { From 321af1ce5a477a7497cc1a0b0d2c0ba16007efe1 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 11 Mar 2026 14:12:05 +0800 Subject: [PATCH 19/45] Fix AI agent pid_tgid usage in socket trace --- agent/src/ebpf/kernel/socket_trace.bpf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/agent/src/ebpf/kernel/socket_trace.bpf.c b/agent/src/ebpf/kernel/socket_trace.bpf.c index 8af68f6c983..cd165abd35d 100644 --- a/agent/src/ebpf/kernel/socket_trace.bpf.c +++ b/agent/src/ebpf/kernel/socket_trace.bpf.c @@ -1357,7 +1357,8 @@ __data_submit(struct pt_regs *ctx, struct conn_info_s *conn_info, if (tracer_ctx == NULL) return SUBMIT_INVALID; - __u32 tgid = (__u32) (bpf_get_current_pid_tgid() >> 32); + __u64 pid_tgid = bpf_get_current_pid_tgid(); + __u32 tgid = (__u32) (pid_tgid >> 32); __u64 conn_key = gen_conn_key_id((__u64) tgid, (__u64) conn_info->fd); if (conn_info->message_type == MSG_CLEAR) { delete_socket_info(conn_key, conn_info->socket_info_ptr); From ba6a9bbc4975565b811efa57d2cfbcc921c700a4 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 11 Mar 2026 14:20:16 +0800 Subject: [PATCH 20/45] Reduce AI agent stack usage in data submit --- agent/src/ebpf/kernel/socket_trace.bpf.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/agent/src/ebpf/kernel/socket_trace.bpf.c b/agent/src/ebpf/kernel/socket_trace.bpf.c index cd165abd35d..dfcbfb470cb 100644 --- a/agent/src/ebpf/kernel/socket_trace.bpf.c +++ b/agent/src/ebpf/kernel/socket_trace.bpf.c @@ -1357,8 +1357,7 @@ __data_submit(struct pt_regs *ctx, struct conn_info_s *conn_info, if (tracer_ctx == NULL) return SUBMIT_INVALID; - __u64 pid_tgid = bpf_get_current_pid_tgid(); - __u32 tgid = (__u32) (pid_tgid >> 32); + __u32 tgid = (__u32) (bpf_get_current_pid_tgid() >> 32); __u64 conn_key = gen_conn_key_id((__u64) tgid, (__u64) conn_info->fd); if (conn_info->message_type == MSG_CLEAR) { delete_socket_info(conn_key, conn_info->socket_info_ptr); @@ -1385,8 +1384,7 @@ __data_submit(struct pt_regs *ctx, struct conn_info_s *conn_info, */ int data_max_sz = tracer_ctx->data_limit_max; #ifdef EXTENDED_AI_AGENT_FILE_IO - bool is_ai_agent = is_ai_agent_process(pid_tgid); - if (is_ai_agent) { + if (is_ai_agent_process(((__u64)tgid) << 32)) { __u32 ai_limit = tracer_ctx->ai_agent_data_limit_max; data_max_sz = ai_limit == 0 ? AI_AGENT_DATA_LIMIT_MAX_UNLIMITED : ai_limit; From c6b50f2baf5a8dfdb26911d7f1d10e5881f8a117 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 11 Mar 2026 15:06:29 +0800 Subject: [PATCH 21/45] Sync biz_type for gprocess in multi-controller --- message/controller.proto | 1 + .../db/metadb/migrator/schema/const.go | 2 +- .../schema/rawsql/mysql/ddl_create_table.sql | 1 + .../schema/rawsql/mysql/issu/7.1.0.35.sql | 31 +++++++++++++++++++ .../rawsql/postgres/ddl_create_table.sql | 1 + server/controller/db/metadb/model/ch_model.go | 1 + server/controller/genesis/grpc/server.go | 2 ++ .../genesis/store/sync/mysql/run.go | 1 + server/controller/tagrecorder/ch_gprocess.go | 1 + server/controller/tagrecorder/const_sql.go | 1 + 10 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.35.sql diff --git a/message/controller.proto b/message/controller.proto index 217db023e37..68eabe9f4ac 100644 --- a/message/controller.proto +++ b/message/controller.proto @@ -136,6 +136,7 @@ message GenesisSyncProcess { optional string start_time = 10; optional uint32 netns_id = 11; optional string container_id = 12; + optional uint32 biz_type = 13; } message GenesisSyncData{ diff --git a/server/controller/db/metadb/migrator/schema/const.go b/server/controller/db/metadb/migrator/schema/const.go index 43171fa1733..13815e41a29 100644 --- a/server/controller/db/metadb/migrator/schema/const.go +++ b/server/controller/db/metadb/migrator/schema/const.go @@ -20,5 +20,5 @@ const ( RAW_SQL_ROOT_DIR = "/etc/metadb/schema/rawsql" DB_VERSION_TABLE = "db_version" - DB_VERSION_EXPECTED = "7.1.0.34" + DB_VERSION_EXPECTED = "7.1.0.35" ) diff --git a/server/controller/db/metadb/migrator/schema/rawsql/mysql/ddl_create_table.sql b/server/controller/db/metadb/migrator/schema/rawsql/mysql/ddl_create_table.sql index b58354a220e..9bec663a20c 100644 --- a/server/controller/db/metadb/migrator/schema/rawsql/mysql/ddl_create_table.sql +++ b/server/controller/db/metadb/migrator/schema/rawsql/mysql/ddl_create_table.sql @@ -1761,6 +1761,7 @@ CREATE TABLE IF NOT EXISTS ch_gprocess ( icon_id INTEGER, chost_id INTEGER, l3_epc_id INTEGER, + biz_type INTEGER, team_id INTEGER, domain_id INTEGER, sub_domain_id INTEGER, diff --git a/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.35.sql b/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.35.sql new file mode 100644 index 00000000000..9f9aea5a6dc --- /dev/null +++ b/server/controller/db/metadb/migrator/schema/rawsql/mysql/issu/7.1.0.35.sql @@ -0,0 +1,31 @@ +DROP PROCEDURE IF EXISTS AddColumnIfNotExists; + +CREATE PROCEDURE AddColumnIfNotExists( + IN tableName VARCHAR(255), + IN colName VARCHAR(255), + IN colType VARCHAR(255), + IN afterCol VARCHAR(255) +) +BEGIN + DECLARE column_count INT; + + SELECT COUNT(*) + INTO column_count + FROM information_schema.columns + WHERE TABLE_SCHEMA = DATABASE() + AND TABLE_NAME = tableName + AND column_name = colName; + + IF column_count = 0 THEN + SET @sql = CONCAT('ALTER TABLE ', tableName, ' ADD COLUMN ', colName, ' ', colType, ' AFTER ', afterCol); + PREPARE stmt FROM @sql; + EXECUTE stmt; + DEALLOCATE PREPARE stmt; + END IF; +END; + +CALL AddColumnIfNotExists('ch_gprocess', 'biz_type', 'INTEGER DEFAULT 0', 'l3_epc_id'); + +DROP PROCEDURE AddColumnIfNotExists; + +UPDATE db_version SET version='7.1.0.35'; diff --git a/server/controller/db/metadb/migrator/schema/rawsql/postgres/ddl_create_table.sql b/server/controller/db/metadb/migrator/schema/rawsql/postgres/ddl_create_table.sql index e86049e201b..f18ea0ba762 100644 --- a/server/controller/db/metadb/migrator/schema/rawsql/postgres/ddl_create_table.sql +++ b/server/controller/db/metadb/migrator/schema/rawsql/postgres/ddl_create_table.sql @@ -1924,6 +1924,7 @@ CREATE TABLE IF NOT EXISTS ch_gprocess ( icon_id INTEGER, chost_id INTEGER, l3_epc_id INTEGER, + biz_type INTEGER, team_id INTEGER, domain_id INTEGER, sub_domain_id INTEGER, diff --git a/server/controller/db/metadb/model/ch_model.go b/server/controller/db/metadb/model/ch_model.go index b05b5cb9ee5..020d2e059ac 100644 --- a/server/controller/db/metadb/model/ch_model.go +++ b/server/controller/db/metadb/model/ch_model.go @@ -469,6 +469,7 @@ type ChGProcess struct { IconID int `gorm:"column:icon_id;type:int;default:null" json:"ICON_ID"` CHostID int `gorm:"column:chost_id;type:int;not null" json:"CHOST_ID"` L3EPCID int `gorm:"column:l3_epc_id;type:int" json:"L3_EPC_ID"` + BizType int `gorm:"column:biz_type;type:int;default:0" json:"BIZ_TYPE"` TeamID int `gorm:"column:team_id;type:int;not null" json:"TEAM_ID"` DomainID int `gorm:"column:domain_id;type:int;not null" json:"DOMAIN_ID"` SubDomainID int `gorm:"column:sub_domain_id;type:int;default:0" json:"SUB_DOMAIN_ID"` diff --git a/server/controller/genesis/grpc/server.go b/server/controller/genesis/grpc/server.go index 008ddc168cc..5107bda547e 100644 --- a/server/controller/genesis/grpc/server.go +++ b/server/controller/genesis/grpc/server.go @@ -622,6 +622,7 @@ func (g *SynchronizerServer) GenesisSharingSync(ctx context.Context, request *co for _, p := range gSyncData.Processes { pData := p pStartTime := pData.StartTime.Format(controllercommon.GO_BIRTHDAY) + bizType := uint32(pData.BizType) gProcess := &controller.GenesisSyncProcess{ VtapId: &pData.VtapID, Pid: &pData.PID, @@ -629,6 +630,7 @@ func (g *SynchronizerServer) GenesisSharingSync(ctx context.Context, request *co NetnsId: &pData.NetnsID, Name: &pData.Name, ProcessName: &pData.ProcessName, + BizType: &bizType, CmdLine: &pData.CMDLine, User: &pData.UserName, ContainerId: &pData.ContainerID, diff --git a/server/controller/genesis/store/sync/mysql/run.go b/server/controller/genesis/store/sync/mysql/run.go index ce1c1916d9d..0341ff35cdc 100644 --- a/server/controller/genesis/store/sync/mysql/run.go +++ b/server/controller/genesis/store/sync/mysql/run.go @@ -384,6 +384,7 @@ func (g *GenesisSync) GetGenesisSyncResponse(orgID int) (common.GenesisSyncDataR NetnsID: p.GetNetnsId(), Name: p.GetName(), ProcessName: p.GetProcessName(), + BizType: int(p.GetBizType()), CMDLine: p.GetCmdLine(), ContainerID: p.GetContainerId(), UserName: p.GetUser(), diff --git a/server/controller/tagrecorder/ch_gprocess.go b/server/controller/tagrecorder/ch_gprocess.go index 3508d7de741..c7a2f724842 100644 --- a/server/controller/tagrecorder/ch_gprocess.go +++ b/server/controller/tagrecorder/ch_gprocess.go @@ -81,6 +81,7 @@ func (c *ChGProcess) sourceToTarget(md *message.Metadata, source *metadbmodel.Pr Name: sourceName, CHostID: source.VMID, L3EPCID: source.VPCID, + BizType: source.BizType, IconID: iconID, TeamID: md.GetTeamID(), DomainID: md.GetDomainID(), diff --git a/server/controller/tagrecorder/const_sql.go b/server/controller/tagrecorder/const_sql.go index a1f8eb4642c..2a8fb016cfa 100644 --- a/server/controller/tagrecorder/const_sql.go +++ b/server/controller/tagrecorder/const_sql.go @@ -222,6 +222,7 @@ const ( " `icon_id` Int64,\n" + " `chost_id` Int64,\n" + " `l3_epc_id` Int64,\n" + + " `biz_type` UInt64,\n" + " `team_id` UInt64,\n" + " `domain_id` UInt64,\n" + " `sub_domain_id` UInt64\n" + From 95fe6928f7832ed1b1917e8367ab81a285930e9d Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 11 Mar 2026 15:25:08 +0800 Subject: [PATCH 22/45] Fix missing is_ai_agent in socket_trace --- agent/src/ebpf/kernel/socket_trace.bpf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/agent/src/ebpf/kernel/socket_trace.bpf.c b/agent/src/ebpf/kernel/socket_trace.bpf.c index dfcbfb470cb..b851d5646b5 100644 --- a/agent/src/ebpf/kernel/socket_trace.bpf.c +++ b/agent/src/ebpf/kernel/socket_trace.bpf.c @@ -1384,10 +1384,12 @@ __data_submit(struct pt_regs *ctx, struct conn_info_s *conn_info, */ int data_max_sz = tracer_ctx->data_limit_max; #ifdef EXTENDED_AI_AGENT_FILE_IO + __u8 is_ai_agent = 0; if (is_ai_agent_process(((__u64)tgid) << 32)) { __u32 ai_limit = tracer_ctx->ai_agent_data_limit_max; data_max_sz = ai_limit == 0 ? AI_AGENT_DATA_LIMIT_MAX_UNLIMITED : ai_limit; + is_ai_agent = 1; } #endif From 5b78c97cc5ce6e74fdf7be6b0cd1a03317256d42 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 11 Mar 2026 15:35:44 +0800 Subject: [PATCH 23/45] Avoid BPF stack usage for ai_agent flag --- agent/src/ebpf/kernel/socket_trace.bpf.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/agent/src/ebpf/kernel/socket_trace.bpf.c b/agent/src/ebpf/kernel/socket_trace.bpf.c index b851d5646b5..20a7a1fd540 100644 --- a/agent/src/ebpf/kernel/socket_trace.bpf.c +++ b/agent/src/ebpf/kernel/socket_trace.bpf.c @@ -1384,12 +1384,11 @@ __data_submit(struct pt_regs *ctx, struct conn_info_s *conn_info, */ int data_max_sz = tracer_ctx->data_limit_max; #ifdef EXTENDED_AI_AGENT_FILE_IO - __u8 is_ai_agent = 0; if (is_ai_agent_process(((__u64)tgid) << 32)) { __u32 ai_limit = tracer_ctx->ai_agent_data_limit_max; data_max_sz = ai_limit == 0 ? AI_AGENT_DATA_LIMIT_MAX_UNLIMITED : ai_limit; - is_ai_agent = 1; + sk_info->is_ai_agent = 1; } #endif @@ -1471,9 +1470,6 @@ __data_submit(struct pt_regs *ctx, struct conn_info_s *conn_info, syscall_len > data_max_sz ? data_max_sz : syscall_len; } -#ifdef EXTENDED_AI_AGENT_FILE_IO - sk_info->is_ai_agent = is_ai_agent; -#endif sk_info->direction = conn_info->direction; sk_info->pre_direction = conn_info->direction; sk_info->role = conn_info->role; @@ -1557,7 +1553,7 @@ __data_submit(struct pt_regs *ctx, struct conn_info_s *conn_info, sk_info->uid = socket_info_ptr->uid; sk_info->allow_reassembly = socket_info_ptr->allow_reassembly; #ifdef EXTENDED_AI_AGENT_FILE_IO - socket_info_ptr->is_ai_agent = is_ai_agent; + socket_info_ptr->is_ai_agent = sk_info->is_ai_agent; #endif /* From d37714dd9823996da528d8666c2200e3c1bfa51c Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 11 Mar 2026 17:21:42 +0800 Subject: [PATCH 24/45] Fix AI agent governance review issues - proc_scan_hook: inject AI agent PIDs not matched by process_matcher so they appear in MySQL process table (not just l7_flow_log) - handler.rs: add /v1/responses to default ai_agent_endpoints - perf/mod.rs: remove redundant register() with empty endpoint - http.rs: borrow path instead of cloning on every HTTP parse - socket.c: change __set_ai_agent_data_limit_max param to unsigned int to fix dead code branch (limit_size > INT_MAX unreachable with int) - server: decode access_permission from IoEventData into ClickHouse file_event table (column constant, EventStore field, column block) Co-Authored-By: Claude Opus 4.6 --- agent/src/config/handler.rs | 1 + agent/src/ebpf/user/socket.c | 7 ++----- agent/src/flow_generator/perf/mod.rs | 6 ------ .../src/flow_generator/protocol_logs/http.rs | 11 ++++++----- .../platform_synchronizer/proc_scan_hook.rs | 19 +++++++++++++++++++ server/ingester/event/dbwriter/event.go | 2 ++ .../event/dbwriter/event_column_block.go | 4 ++++ server/ingester/event/decoder/decoder.go | 1 + server/libs/ckdb/column.go | 1 + 9 files changed, 36 insertions(+), 16 deletions(-) diff --git a/agent/src/config/handler.rs b/agent/src/config/handler.rs index 8aa7c1bff2a..a9d7d63acbc 100755 --- a/agent/src/config/handler.rs +++ b/agent/src/config/handler.rs @@ -1175,6 +1175,7 @@ impl Default for LogParserConfig { ai_agent_endpoints: vec![ "/v1/chat/completions".to_string(), "/v1/embeddings".to_string(), + "/v1/responses".to_string(), ], ai_agent_max_payload_size: usize::MAX, // default: unlimited (config 0 → usize::MAX) ai_agent_file_io_enabled: true, diff --git a/agent/src/ebpf/user/socket.c b/agent/src/ebpf/user/socket.c index ecab0967583..fbf759e3904 100644 --- a/agent/src/ebpf/user/socket.c +++ b/agent/src/ebpf/user/socket.c @@ -2325,12 +2325,9 @@ static inline int __set_data_limit_max(int limit_size) return socket_data_limit_max; } -static inline int __set_ai_agent_data_limit_max(int limit_size) +static inline int __set_ai_agent_data_limit_max(unsigned int limit_size) { - if (limit_size < 0) { - ebpf_warning("ai_agent limit_size cannot be negative\n"); - return ETR_INVAL; - } else if (limit_size == 0) { + if (limit_size == 0) { ai_agent_data_limit_max = 0; } else if (limit_size > INT_MAX) { ai_agent_data_limit_max = INT_MAX; diff --git a/agent/src/flow_generator/perf/mod.rs b/agent/src/flow_generator/perf/mod.rs index 9cb5b6f94ab..6ff9ba828aa 100644 --- a/agent/src/flow_generator/perf/mod.rs +++ b/agent/src/flow_generator/perf/mod.rs @@ -339,12 +339,6 @@ impl FlowLog { if let Ok(ref result) = ret { if result.has_biz_type(BIZ_TYPE_AI_AGENT) { self.is_ai_agent = true; - // Register PID in the global AI Agent registry - if packet.process_id > 0 { - if let Some(registry) = enterprise_utils::ai_agent::global_registry() { - registry.register(packet.process_id, "", get_timestamp(0)); - } - } } } } diff --git a/agent/src/flow_generator/protocol_logs/http.rs b/agent/src/flow_generator/protocol_logs/http.rs index 803ff63f21e..f780247911c 100644 --- a/agent/src/flow_generator/protocol_logs/http.rs +++ b/agent/src/flow_generator/protocol_logs/http.rs @@ -1256,16 +1256,16 @@ impl HttpLog { if !config.http_endpoint_disabled && info.path.len() > 0 { // Priority use of info.endpoint, because info.endpoint may be set by the wasm plugin let _endpoint_already_set = matches!(info.endpoint.as_ref(), Some(p) if !p.is_empty()); - let path = match info.endpoint.as_ref() { - Some(p) if !p.is_empty() => p.clone(), - _ => info.path.clone(), + let path_ref = match info.endpoint.as_ref() { + Some(p) if !p.is_empty() => p.as_str(), + _ => info.path.as_str(), }; // Priority chain: WASM/biz_field > AI Agent detection > http_endpoint Trie #[cfg(feature = "enterprise")] let ai_agent_matched = if !_endpoint_already_set { if let Some(matched_path) = match_ai_agent_endpoint( &config.ai_agent_endpoints, - &path, + path_ref, param.process_id, std::time::Duration::from_micros(param.time), ) { @@ -1282,7 +1282,8 @@ impl HttpLog { #[cfg(not(feature = "enterprise"))] let ai_agent_matched = false; if !ai_agent_matched { - info.endpoint = Some(handle_endpoint(config, &path)); + let path_owned = path_ref.to_string(); + info.endpoint = Some(handle_endpoint(config, &path_owned)); } } diff --git a/agent/src/platform/platform_synchronizer/proc_scan_hook.rs b/agent/src/platform/platform_synchronizer/proc_scan_hook.rs index 2eee1f343bb..8c3ebe5fc76 100644 --- a/agent/src/platform/platform_synchronizer/proc_scan_hook.rs +++ b/agent/src/platform/platform_synchronizer/proc_scan_hook.rs @@ -20,6 +20,8 @@ pub fn proc_scan_hook(proc_root: &str, _process_datas: &mut Vec) { // Enterprise: clean dead AI Agent PIDs and mark alive ones with biz_type #[cfg(feature = "enterprise")] { + use std::collections::HashSet; + if let Some(registry) = enterprise_utils::ai_agent::global_registry() { // Use a full /proc scan for cleanup to avoid filtering out short-lived processes // that are not yet eligible for os_proc_socket_min_lifetime. @@ -38,6 +40,23 @@ pub fn proc_scan_hook(proc_root: &str, _process_datas: &mut Vec) { pd.biz_type = crate::common::flow::BIZ_TYPE_AI_AGENT; } } + + // Inject AI agent processes that weren't matched by process_matcher. + // Without this, identified AI agents appear in l7_flow_log but NOT in the + // MySQL process table because process_matcher only matches on socket/regex. + let existing_pids: HashSet = + _process_datas.iter().map(|pd| pd.pid as u32).collect(); + for pid in registry.get_all_pids() { + if existing_pids.contains(&pid) { + continue; + } + if let Ok(proc) = procfs::process::Process::new(pid as i32) { + if let Ok(mut pd) = ProcessData::try_from(&proc) { + pd.biz_type = crate::common::flow::BIZ_TYPE_AI_AGENT; + _process_datas.push(pd); + } + } + } } } } diff --git a/server/ingester/event/dbwriter/event.go b/server/ingester/event/dbwriter/event.go index 5f921048095..7bd2ab121b8 100644 --- a/server/ingester/event/dbwriter/event.go +++ b/server/ingester/event/dbwriter/event.go @@ -113,6 +113,7 @@ type EventStore struct { MountSource string `json:"mount_source" category:"$tag" sub:"event_info"` MountPoint string `json:"mount_point" category:"$tag" sub:"event_info"` FileDir string `json:"file_dir" category:"$tag" sub:"event_info"` + AccessPermission uint32 `json:"access_permission" category:"$tag" sub:"event_info"` } func (e *EventStore) NativeTagVersion() uint32 { @@ -238,6 +239,7 @@ func EventColumns(isFileEvent bool) []*ckdb.Column { ckdb.NewColumn("mount_source", ckdb.LowCardinalityString).SetGroupBy(), ckdb.NewColumn("mount_point", ckdb.LowCardinalityString).SetGroupBy(), ckdb.NewColumn("file_dir", ckdb.String).SetGroupBy(), + ckdb.NewColumn("access_permission", ckdb.UInt32).SetComment("文件权限位").SetIgnoredInAggrTable(), ) } return columns diff --git a/server/ingester/event/dbwriter/event_column_block.go b/server/ingester/event/dbwriter/event_column_block.go index ac86e916cb5..243031b5fc3 100644 --- a/server/ingester/event/dbwriter/event_column_block.go +++ b/server/ingester/event/dbwriter/event_column_block.go @@ -72,6 +72,7 @@ type EventBlock struct { ColMountSource *proto.ColLowCardinality[string] ColMountPoint *proto.ColLowCardinality[string] ColFileDir proto.ColStr + ColAccessPermission proto.ColUInt32 *nativetag.NativeTagsBlock } @@ -122,6 +123,7 @@ func (b *EventBlock) Reset() { b.ColMountSource.Reset() b.ColMountPoint.Reset() b.ColFileDir.Reset() + b.ColAccessPermission.Reset() if b.NativeTagsBlock != nil { b.NativeTagsBlock.Reset() } @@ -177,6 +179,7 @@ func (b *EventBlock) ToInput(input proto.Input) proto.Input { proto.InputColumn{Name: ckdb.COLUMN_MOUNT_SOURCE, Data: b.ColMountSource}, proto.InputColumn{Name: ckdb.COLUMN_MOUNT_POINT, Data: b.ColMountPoint}, proto.InputColumn{Name: ckdb.COLUMN_FILE_DIR, Data: &b.ColFileDir}, + proto.InputColumn{Name: ckdb.COLUMN_ACCESS_PERMISSION, Data: &b.ColAccessPermission}, ) } if b.NativeTagsBlock != nil { @@ -249,6 +252,7 @@ func (n *EventStore) AppendToColumnBlock(b ckdb.CKColumnBlock) { block.ColMountSource.Append(n.MountSource) block.ColMountPoint.Append(n.MountPoint) block.ColFileDir.Append(n.FileDir) + block.ColAccessPermission.Append(n.AccessPermission) if block.NativeTagsBlock != nil { block.NativeTagsBlock.AppendToColumnBlock(n.AttributeNames, n.AttributeValues, nil, nil) diff --git a/server/ingester/event/decoder/decoder.go b/server/ingester/event/decoder/decoder.go index 1427fab62eb..d6836f990d7 100644 --- a/server/ingester/event/decoder/decoder.go +++ b/server/ingester/event/decoder/decoder.go @@ -195,6 +195,7 @@ func (d *Decoder) WriteFileEvent(vtapId uint16, e *pb.ProcEvent) { s.MountSource = string(ioData.MountSource) s.MountPoint = string(ioData.MountPoint) s.Bytes = ioData.BytesCount + s.AccessPermission = ioData.AccessPermission s.Duration = uint64(s.EndTime - s.StartTime) } else if e.FileOpEventData != nil { d := e.FileOpEventData diff --git a/server/libs/ckdb/column.go b/server/libs/ckdb/column.go index 4e52d2d82ac..5c66b03da6d 100644 --- a/server/libs/ckdb/column.go +++ b/server/libs/ckdb/column.go @@ -32,6 +32,7 @@ func init() { const ( COLUMN_ACL_GID = "acl_gid" COLUMN_ACL_GIDS = "acl_gids" + COLUMN_ACCESS_PERMISSION = "access_permission" COLUMN_AGENT_ID = "agent_id" COLUMN_ALERT_POLICY = "alert_policy" COLUMN_APP_INSTANCE = "app_instance" From ac183599f8b815a7eac062f97fc6bda692fb2774 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 11 Mar 2026 17:49:41 +0800 Subject: [PATCH 25/45] Fix proc scan hook warning and HTTP endpoint borrow --- agent/src/flow_generator/protocol_logs/http.rs | 10 +++++----- .../platform/platform_synchronizer/proc_scan_hook.rs | 5 +++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/agent/src/flow_generator/protocol_logs/http.rs b/agent/src/flow_generator/protocol_logs/http.rs index f780247911c..6c80dbfb8a0 100644 --- a/agent/src/flow_generator/protocol_logs/http.rs +++ b/agent/src/flow_generator/protocol_logs/http.rs @@ -1256,16 +1256,17 @@ impl HttpLog { if !config.http_endpoint_disabled && info.path.len() > 0 { // Priority use of info.endpoint, because info.endpoint may be set by the wasm plugin let _endpoint_already_set = matches!(info.endpoint.as_ref(), Some(p) if !p.is_empty()); - let path_ref = match info.endpoint.as_ref() { - Some(p) if !p.is_empty() => p.as_str(), - _ => info.path.as_str(), + let path_owned = if let Some(p) = info.endpoint.as_ref().filter(|p| !p.is_empty()) { + p.clone() + } else { + info.path.clone() }; // Priority chain: WASM/biz_field > AI Agent detection > http_endpoint Trie #[cfg(feature = "enterprise")] let ai_agent_matched = if !_endpoint_already_set { if let Some(matched_path) = match_ai_agent_endpoint( &config.ai_agent_endpoints, - path_ref, + path_owned.as_str(), param.process_id, std::time::Duration::from_micros(param.time), ) { @@ -1282,7 +1283,6 @@ impl HttpLog { #[cfg(not(feature = "enterprise"))] let ai_agent_matched = false; if !ai_agent_matched { - let path_owned = path_ref.to_string(); info.endpoint = Some(handle_endpoint(config, &path_owned)); } } diff --git a/agent/src/platform/platform_synchronizer/proc_scan_hook.rs b/agent/src/platform/platform_synchronizer/proc_scan_hook.rs index 8c3ebe5fc76..1b8b5d39b86 100644 --- a/agent/src/platform/platform_synchronizer/proc_scan_hook.rs +++ b/agent/src/platform/platform_synchronizer/proc_scan_hook.rs @@ -16,7 +16,7 @@ use super::ProcessData; -pub fn proc_scan_hook(proc_root: &str, _process_datas: &mut Vec) { +pub fn proc_scan_hook(_proc_root: &str, _process_datas: &mut Vec) { // Enterprise: clean dead AI Agent PIDs and mark alive ones with biz_type #[cfg(feature = "enterprise")] { @@ -25,7 +25,8 @@ pub fn proc_scan_hook(proc_root: &str, _process_datas: &mut Vec) { if let Some(registry) = enterprise_utils::ai_agent::global_registry() { // Use a full /proc scan for cleanup to avoid filtering out short-lived processes // that are not yet eligible for os_proc_socket_min_lifetime. - let alive_pids: Vec = match procfs::process::all_processes_with_root(proc_root) { + let alive_pids: Vec = + match procfs::process::all_processes_with_root(_proc_root) { Ok(procs) => procs .into_iter() .filter_map(|p| p.ok()) From 0ebe43656c553072ec132c296590ca67bdc467c6 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 11 Mar 2026 19:08:56 +0800 Subject: [PATCH 26/45] agent: auto sync AI agent gprocess_info --- .../platform_synchronizer/proc_scan_hook.rs | 3 +- agent/src/utils/process/linux.rs | 118 +++++++++++++++++- 2 files changed, 116 insertions(+), 5 deletions(-) diff --git a/agent/src/platform/platform_synchronizer/proc_scan_hook.rs b/agent/src/platform/platform_synchronizer/proc_scan_hook.rs index 1b8b5d39b86..3a150496a29 100644 --- a/agent/src/platform/platform_synchronizer/proc_scan_hook.rs +++ b/agent/src/platform/platform_synchronizer/proc_scan_hook.rs @@ -25,8 +25,7 @@ pub fn proc_scan_hook(_proc_root: &str, _process_datas: &mut Vec) { if let Some(registry) = enterprise_utils::ai_agent::global_registry() { // Use a full /proc scan for cleanup to avoid filtering out short-lived processes // that are not yet eligible for os_proc_socket_min_lifetime. - let alive_pids: Vec = - match procfs::process::all_processes_with_root(_proc_root) { + let alive_pids: Vec = match procfs::process::all_processes_with_root(_proc_root) { Ok(procs) => procs .into_iter() .filter_map(|p| p.ok()) diff --git a/agent/src/utils/process/linux.rs b/agent/src/utils/process/linux.rs index 1778ceb89b1..82a27b91990 100644 --- a/agent/src/utils/process/linux.rs +++ b/agent/src/utils/process/linux.rs @@ -476,9 +476,14 @@ impl ProcessListener { process_data_cache.retain(|pid, _| alive_pids.contains(pid)); for (key, value) in features.iter_mut() { - if (value.process_matcher.is_empty() && value.pids.is_empty()) - || value.callback.is_none() - { + let ai_agent_pids = fetch_ai_agent_pids(key.as_str()); + + if should_skip_feature( + value.process_matcher.is_empty(), + value.pids.is_empty(), + ai_agent_pids.is_empty(), + value.callback.is_none(), + ) { continue; } @@ -502,6 +507,15 @@ impl ProcessListener { } } + if !ai_agent_pids.is_empty() { + merge_ai_agent_processes( + process_data_cache, + &ai_agent_pids, + &mut pids, + &mut process_datas, + ); + } + pids.sort(); pids.dedup(); process_datas.sort_by_key(|x| x.pid); @@ -563,3 +577,101 @@ impl ProcessListener { self.thread_handle.lock().unwrap().take() } } + +fn should_skip_feature( + process_matcher_empty: bool, + previous_pids_empty: bool, + ai_agent_pids_empty: bool, + callback_missing: bool, +) -> bool { + if callback_missing { + return true; + } + !(!process_matcher_empty || !previous_pids_empty || !ai_agent_pids_empty) +} + +#[cfg(feature = "enterprise")] +fn fetch_ai_agent_pids(feature: &str) -> Vec { + if feature == "proc.gprocess_info" { + if let Some(registry) = enterprise_utils::ai_agent::global_registry() { + return registry.get_all_pids(); + } + } + Vec::new() +} + +#[cfg(not(feature = "enterprise"))] +fn fetch_ai_agent_pids(_feature: &str) -> Vec { + Vec::new() +} + +fn merge_ai_agent_processes( + process_data_cache: &HashMap, + ai_agent_pids: &[u32], + pids: &mut Vec, + process_datas: &mut Vec, +) { + let mut existing_pids: HashSet = pids.iter().copied().collect(); + for pid in ai_agent_pids { + if existing_pids.contains(pid) { + continue; + } + if let Some(process_data) = process_data_cache.get(&(*pid as i32)) { + pids.push(*pid); + process_datas.push(process_data.clone()); + existing_pids.insert(*pid); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Duration; + + fn make_process_data(pid: u64) -> ProcessData { + ProcessData { + name: format!("proc-{pid}"), + pid, + ppid: 1, + process_name: format!("proc-{pid}"), + cmd: format!("/proc/{pid}"), + cmd_with_args: vec![format!("/proc/{pid}")], + user_id: 0, + user: "root".to_string(), + start_time: Duration::from_secs(0), + os_app_tags: vec![], + netns_id: 0, + container_id: String::new(), + biz_type: 0, + } + } + + #[test] + fn merge_ai_agent_processes_adds_missing_pids() { + let mut process_data_cache = HashMap::new(); + process_data_cache.insert(1001, make_process_data(1001)); + process_data_cache.insert(1002, make_process_data(1002)); + + let ai_agent_pids = vec![1002]; + let mut pids = vec![1001]; + let mut process_datas = vec![make_process_data(1001)]; + + merge_ai_agent_processes( + &process_data_cache, + &ai_agent_pids, + &mut pids, + &mut process_datas, + ); + + pids.sort(); + assert_eq!(pids, vec![1001, 1002]); + assert!(process_datas.iter().any(|pd| pd.pid == 1002)); + } + + #[test] + fn should_skip_feature_allows_ai_agent_without_matcher() { + let skip = should_skip_feature(true, true, false, false); + assert!(!skip); + } +} From b45fa06cc8cfe31784c4f7a6a27570bc06159043 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 11 Mar 2026 19:30:57 +0800 Subject: [PATCH 27/45] agent: mark ai agent biz_type in gprocess --- agent/src/utils/process/linux.rs | 37 +++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/agent/src/utils/process/linux.rs b/agent/src/utils/process/linux.rs index 82a27b91990..2c13c3fd9ad 100644 --- a/agent/src/utils/process/linux.rs +++ b/agent/src/utils/process/linux.rs @@ -34,6 +34,7 @@ use log::{debug, error, info, trace}; use nix::sys::utsname::uname; use procfs::process::all_processes_with_root; +use crate::common::flow::BIZ_TYPE_AI_AGENT; use crate::config::ProcessMatcher; use crate::platform::{get_os_app_tag_by_exec, ProcessData, ProcessDataOp}; @@ -612,13 +613,21 @@ fn merge_ai_agent_processes( process_datas: &mut Vec, ) { let mut existing_pids: HashSet = pids.iter().copied().collect(); + let ai_agent_set: HashSet = ai_agent_pids.iter().copied().collect(); + for process_data in process_datas.iter_mut() { + if ai_agent_set.contains(&(process_data.pid as u32)) { + process_data.biz_type = BIZ_TYPE_AI_AGENT; + } + } for pid in ai_agent_pids { if existing_pids.contains(pid) { continue; } if let Some(process_data) = process_data_cache.get(&(*pid as i32)) { + let mut process_data = process_data.clone(); + process_data.biz_type = BIZ_TYPE_AI_AGENT; pids.push(*pid); - process_datas.push(process_data.clone()); + process_datas.push(process_data); existing_pids.insert(*pid); } } @@ -669,6 +678,32 @@ mod tests { assert!(process_datas.iter().any(|pd| pd.pid == 1002)); } + #[test] + fn merge_ai_agent_processes_sets_biz_type() { + use crate::common::flow::BIZ_TYPE_AI_AGENT; + + let mut process_data_cache = HashMap::new(); + process_data_cache.insert(2001, make_process_data(2001)); + process_data_cache.insert(2002, make_process_data(2002)); + + let ai_agent_pids = vec![2002]; + let mut pids = vec![2002]; + let mut process_datas = vec![make_process_data(2002)]; + + merge_ai_agent_processes( + &process_data_cache, + &ai_agent_pids, + &mut pids, + &mut process_datas, + ); + + let ai_agent = process_datas + .iter() + .find(|pd| pd.pid == 2002) + .expect("ai agent process missing"); + assert_eq!(ai_agent.biz_type, BIZ_TYPE_AI_AGENT); + } + #[test] fn should_skip_feature_allows_ai_agent_without_matcher() { let skip = should_skip_feature(true, true, false, false); From aa921356068f949760fdbb4096a320e9e71b390d Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Thu, 12 Mar 2026 09:54:00 +0800 Subject: [PATCH 28/45] server: support gprocess.biz_type tag query --- .../clickhouse/tag/enum/biz_type.ch | 3 +++ .../clickhouse/tag/enum/biz_type.en | 3 +++ .../clickhouse/tag/flow_log/l7_flow_log | 2 +- .../clickhouse/tag/flow_metrics/application | 2 +- .../tag/flow_metrics/application_map | 2 +- .../engine/clickhouse/tag/translation.go | 23 ++++++++++++++++++- 6 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 server/querier/db_descriptions/clickhouse/tag/enum/biz_type.ch create mode 100644 server/querier/db_descriptions/clickhouse/tag/enum/biz_type.en diff --git a/server/querier/db_descriptions/clickhouse/tag/enum/biz_type.ch b/server/querier/db_descriptions/clickhouse/tag/enum/biz_type.ch new file mode 100644 index 00000000000..f3cadb9174c --- /dev/null +++ b/server/querier/db_descriptions/clickhouse/tag/enum/biz_type.ch @@ -0,0 +1,3 @@ +# Value , DisplayName , Description +0 , 默认 , 默认业务类型 +1 , 智能体 , AI Agent diff --git a/server/querier/db_descriptions/clickhouse/tag/enum/biz_type.en b/server/querier/db_descriptions/clickhouse/tag/enum/biz_type.en new file mode 100644 index 00000000000..6d224d3aba4 --- /dev/null +++ b/server/querier/db_descriptions/clickhouse/tag/enum/biz_type.en @@ -0,0 +1,3 @@ +# Value , DisplayName , Description +0 , Default , Default business type +1 , AI-Agent , AI Agent diff --git a/server/querier/db_descriptions/clickhouse/tag/flow_log/l7_flow_log b/server/querier/db_descriptions/clickhouse/tag/flow_log/l7_flow_log index b9b0888ca52..03bd824990b 100644 --- a/server/querier/db_descriptions/clickhouse/tag/flow_log/l7_flow_log +++ b/server/querier/db_descriptions/clickhouse/tag/flow_log/l7_flow_log @@ -127,7 +127,7 @@ capture_nic_type , capture_nic_type , capture_nic_type tap_side , tap_side , tap_side , string_enum , observation_point , Capture Info , 111 , 1 , observation_point , observation_point , observation_point , string_enum , observation_point , Capture Info , 111 , 0 , -biz_type , biz_type , biz_type , int , , Business Info , 111 , 0 , +biz_type , biz_type , biz_type , int_enum , biz_type , Business Info , 111 , 0 , biz_code , biz_code , biz_code , string , , Business Info , 111 , 0 , biz_scenario , biz_scenario , biz_scenario , string , , Business Info , 111 , 0 , biz_response_code , biz_response_code , biz_response_code , string , , Business Info , 111 , 0 , diff --git a/server/querier/db_descriptions/clickhouse/tag/flow_metrics/application b/server/querier/db_descriptions/clickhouse/tag/flow_metrics/application index 34a63f00b74..9ffbbf87a60 100644 --- a/server/querier/db_descriptions/clickhouse/tag/flow_metrics/application +++ b/server/querier/db_descriptions/clickhouse/tag/flow_metrics/application @@ -60,4 +60,4 @@ agent , agent , agent signal_source , signal_source , signal_source , int_enum , l7_signal_source , Capture Info , 111 , 0 role , role , role , int_enum , role , Capture Info , 111 , 0 -biz_type , biz_type , biz_type , int , , Business Info , 111 , 0 +biz_type , biz_type , biz_type , int_enum , biz_type , Business Info , 111 , 0 diff --git a/server/querier/db_descriptions/clickhouse/tag/flow_metrics/application_map b/server/querier/db_descriptions/clickhouse/tag/flow_metrics/application_map index 0e65926dd18..e1720a08268 100644 --- a/server/querier/db_descriptions/clickhouse/tag/flow_metrics/application_map +++ b/server/querier/db_descriptions/clickhouse/tag/flow_metrics/application_map @@ -77,4 +77,4 @@ tap_side , tap_side , tap_side observation_point , observation_point , observation_point , string_enum , observation_point , Capture Info , 111 , 0 signal_source , signal_source , signal_source , int_enum , l7_signal_source , Capture Info , 111 , 0 -biz_type , biz_type , biz_type , int , , Business Info , 111 , 0 +biz_type , biz_type , biz_type , int_enum , biz_type , Business Info , 111 , 0 diff --git a/server/querier/engine/clickhouse/tag/translation.go b/server/querier/engine/clickhouse/tag/translation.go index 198d7aee49d..40d82aca72c 100644 --- a/server/querier/engine/clickhouse/tag/translation.go +++ b/server/querier/engine/clickhouse/tag/translation.go @@ -63,7 +63,7 @@ var HOSTNAME_IP_DEVICE_MAP = map[string]struct { var INT_ENUM_TAG = []string{ "close_type", "eth_type", "signal_source", "is_ipv4", "l7_ip_protocol", "type", "l7_protocol", "protocol", "response_status", "server_port", "status", "capture_nic_type", "tunnel_tier", - "tunnel_type", "instance_type", "nat_source", "role", "event_level", "policy_level", + "tunnel_type", "instance_type", "nat_source", "role", "event_level", "policy_level", "biz_type", "policy_app_type", "is_tls", "is_async", "is_reversed", "severity_number", "file_type", } var INT_ENUM_PEER_TAG = []string{"tcp_flags_bit", "auto_instance_type", "auto_service_type"} @@ -851,6 +851,27 @@ func GenerateTagResoureMap() map[string]map[string]*Tag { ), } } + // gprocess.biz_type + for _, suffix := range []string{"", "_0", "_1"} { + bizTypeSuffix := "gprocess.biz_type" + suffix + processIDSuffix := "gprocess_id" + suffix + tagResourceMap[bizTypeSuffix] = map[string]*Tag{ + "default": NewTag( + "dictGet('flow_tag.gprocess_map', 'biz_type', (toUInt64("+processIDSuffix+")))", + processIDSuffix+"!=0", + "toUInt64("+processIDSuffix+") GLOBAL IN (SELECT id FROM flow_tag.gprocess_map WHERE biz_type GLOBAL IN (SELECT value FROM flow_tag.int_enum_map WHERE tag_name='biz_type' AND (name_en %[1]s %[2]s OR name_zh %[1]s %[2]s)))", + "toUInt64("+processIDSuffix+") GLOBAL IN (SELECT id FROM flow_tag.gprocess_map WHERE biz_type GLOBAL IN (SELECT value FROM flow_tag.int_enum_map WHERE tag_name='biz_type' AND (%[1]s(name_en,%[2]s) OR %[1]s(name_zh,%[2]s))))", + processIDSuffix, + ), + "enum": NewTag( + "dictGetOrDefault('flow_tag.int_enum_map', '%s', ('%s',toUInt64(dictGet('flow_tag.gprocess_map', 'biz_type', (toUInt64("+processIDSuffix+"))))), dictGet('flow_tag.gprocess_map', 'biz_type', (toUInt64("+processIDSuffix+"))))", + "", + "toUInt64(dictGet('flow_tag.gprocess_map', 'biz_type', (toUInt64("+processIDSuffix+")))) GLOBAL IN (SELECT value FROM flow_tag.int_enum_map WHERE %s %s %s and tag_name='%s')", + "toUInt64(dictGet('flow_tag.gprocess_map', 'biz_type', (toUInt64("+processIDSuffix+")))) GLOBAL IN (SELECT value FROM flow_tag.int_enum_map WHERE %s(%s,%s) and tag_name='%s')", + "", + ), + } + } // 单个外部字段-ext_metrics tagResourceMap["tag."] = map[string]*Tag{ From a63adf3ef6ee41b14a0d8c003b645f21c9c7881b Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Thu, 12 Mar 2026 10:18:03 +0800 Subject: [PATCH 29/45] agent: log ai agent pids for gprocess sync --- agent/src/utils/process/linux.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/agent/src/utils/process/linux.rs b/agent/src/utils/process/linux.rs index 2c13c3fd9ad..9b74812b070 100644 --- a/agent/src/utils/process/linux.rs +++ b/agent/src/utils/process/linux.rs @@ -595,8 +595,11 @@ fn should_skip_feature( fn fetch_ai_agent_pids(feature: &str) -> Vec { if feature == "proc.gprocess_info" { if let Some(registry) = enterprise_utils::ai_agent::global_registry() { - return registry.get_all_pids(); + let pids = registry.get_all_pids(); + debug!("AI Agent: proc.gprocess_info fetch {} pids", pids.len()); + return pids; } + debug!("AI Agent: proc.gprocess_info registry not initialized"); } Vec::new() } @@ -629,6 +632,11 @@ fn merge_ai_agent_processes( pids.push(*pid); process_datas.push(process_data); existing_pids.insert(*pid); + } else { + debug!( + "AI Agent: pid {} not found in process cache, skip gprocess sync", + pid + ); } } } From 5df64cce70891678ea4cbd27a08afba46290b9b6 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Thu, 12 Mar 2026 14:57:21 +0800 Subject: [PATCH 30/45] ai-agent: inherit child proc lifecycle --- agent/src/common/proc_event/linux.rs | 55 ++++++++++++++++++++++++++++ agent/src/ebpf_dispatcher.rs | 17 ++++++++- 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/agent/src/common/proc_event/linux.rs b/agent/src/common/proc_event/linux.rs index 8742e439b2c..ee3f96e8fcf 100644 --- a/agent/src/common/proc_event/linux.rs +++ b/agent/src/common/proc_event/linux.rs @@ -238,6 +238,9 @@ const PROC_LC_UID_OFF: usize = 9; const PROC_LC_GID_OFF: usize = 13; const PROC_LC_TS_OFF: usize = 17; const PROC_LC_COMM_OFF: usize = 25; +pub const PROC_LIFECYCLE_FORK: u8 = 1; +pub const PROC_LIFECYCLE_EXEC: u8 = 2; +pub const PROC_LIFECYCLE_EXIT: u8 = 3; struct ProcLifecycleEventData { lifecycle_type: u8, @@ -249,6 +252,13 @@ struct ProcLifecycleEventData { comm: Vec, } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct ProcLifecycleInfo { + pub lifecycle_type: u8, + pub pid: u32, + pub parent_pid: u32, +} + impl TryFrom<&[u8]> for ProcLifecycleEventData { type Error = Error; @@ -440,6 +450,17 @@ impl ProcEvent { Ok(BoxedProcEvents(Box::new(proc_event))) } + + pub fn proc_lifecycle_info(&self) -> Option { + match &self.event_data { + EventData::ProcLifecycleEvent(data) => Some(ProcLifecycleInfo { + lifecycle_type: data.lifecycle_type, + pid: data.pid, + parent_pid: data.parent_pid, + }), + _ => None, + } + } } #[derive(Debug)] @@ -491,3 +512,37 @@ impl Sendable for BoxedProcEvents { SendMessageType::ProcEvents } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_proc_lifecycle_info_extracts_fields() { + let event_data = ProcLifecycleEventData { + lifecycle_type: 1, + pid: 4321, + parent_pid: 1234, + uid: 0, + gid: 0, + timestamp: 42, + comm: b"sleep".to_vec(), + }; + let proc_event = ProcEvent { + pid: 1234, + pod_id: 0, + thread_id: 0, + coroutine_id: 0, + process_kname: b"python3".to_vec(), + start_time: 42, + end_time: 43, + event_type: EventType::ProcLifecycleEvent, + event_data: EventData::ProcLifecycleEvent(event_data), + }; + + let info = proc_event.proc_lifecycle_info().expect("missing info"); + assert_eq!(info.lifecycle_type, 1); + assert_eq!(info.pid, 4321); + assert_eq!(info.parent_pid, 1234); + } +} diff --git a/agent/src/ebpf_dispatcher.rs b/agent/src/ebpf_dispatcher.rs index 330761dc285..26a272a8de5 100644 --- a/agent/src/ebpf_dispatcher.rs +++ b/agent/src/ebpf_dispatcher.rs @@ -61,7 +61,7 @@ use crate::common::l7_protocol_log::{ get_all_protocol, L7ProtocolBitmap, L7ProtocolParserInterface, }; use crate::common::meta_packet::{MetaPacket, SegmentFlags}; -use crate::common::proc_event::{BoxedProcEvents, EventType, ProcEvent}; +use crate::common::proc_event::{BoxedProcEvents, EventType, ProcEvent, PROC_LIFECYCLE_FORK}; use crate::common::{FlowAclListener, FlowAclListenerId}; use crate::config::handler::{CollectorAccess, EbpfAccess, EbpfConfig, LogParserAccess}; use crate::config::FlowAccess; @@ -118,6 +118,19 @@ pub struct SyncEbpfCounter { counter: Arc, } +#[cfg(feature = "enterprise")] +fn register_ai_agent_child(event: &BoxedProcEvents) { + if let Some(info) = event.0.proc_lifecycle_info() { + if info.lifecycle_type != PROC_LIFECYCLE_FORK { + return; + } + if let Some(registry) = enterprise_utils::ai_agent::global_registry() { + let now = Duration::from_nanos(event.0.start_time); + registry.register_child(info.parent_pid, info.pid, now); + } + } +} + impl OwnedCountable for SyncEbpfCounter { fn get_counters(&self) -> Vec { let rx = self.counter.rx.swap(0, Ordering::Relaxed); @@ -644,6 +657,8 @@ impl EbpfCollector { if let Some(policy) = POLICY_GETTER.as_ref() { event.0.pod_id = policy.lookup_pod_id(&container_id); } + #[cfg(feature = "enterprise")] + register_ai_agent_child(&event); if let Err(e) = PROC_EVENT_SENDER.as_mut().unwrap().send(event) { warn!("event send ebpf error: {:?}", e); } From b1be1b4a0360848d8910e6935793af7352b54e3d Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Thu, 12 Mar 2026 15:06:10 +0800 Subject: [PATCH 31/45] ai-agent: expose proc event start time --- agent/src/common/proc_event/linux.rs | 4 ++++ agent/src/ebpf_dispatcher.rs | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/agent/src/common/proc_event/linux.rs b/agent/src/common/proc_event/linux.rs index ee3f96e8fcf..0a4ce4c6190 100644 --- a/agent/src/common/proc_event/linux.rs +++ b/agent/src/common/proc_event/linux.rs @@ -451,6 +451,10 @@ impl ProcEvent { Ok(BoxedProcEvents(Box::new(proc_event))) } + pub fn start_time(&self) -> u64 { + self.start_time + } + pub fn proc_lifecycle_info(&self) -> Option { match &self.event_data { EventData::ProcLifecycleEvent(data) => Some(ProcLifecycleInfo { diff --git a/agent/src/ebpf_dispatcher.rs b/agent/src/ebpf_dispatcher.rs index 26a272a8de5..10d01d0f73d 100644 --- a/agent/src/ebpf_dispatcher.rs +++ b/agent/src/ebpf_dispatcher.rs @@ -61,7 +61,9 @@ use crate::common::l7_protocol_log::{ get_all_protocol, L7ProtocolBitmap, L7ProtocolParserInterface, }; use crate::common::meta_packet::{MetaPacket, SegmentFlags}; -use crate::common::proc_event::{BoxedProcEvents, EventType, ProcEvent, PROC_LIFECYCLE_FORK}; +#[cfg(feature = "enterprise")] +use crate::common::proc_event::PROC_LIFECYCLE_FORK; +use crate::common::proc_event::{BoxedProcEvents, EventType, ProcEvent}; use crate::common::{FlowAclListener, FlowAclListenerId}; use crate::config::handler::{CollectorAccess, EbpfAccess, EbpfConfig, LogParserAccess}; use crate::config::FlowAccess; @@ -125,7 +127,7 @@ fn register_ai_agent_child(event: &BoxedProcEvents) { return; } if let Some(registry) = enterprise_utils::ai_agent::global_registry() { - let now = Duration::from_nanos(event.0.start_time); + let now = Duration::from_nanos(event.0.start_time()); registry.register_child(info.parent_pid, info.pid, now); } } From 3a4ef94efb9ab3ed1a6b70db182a7a2fcb6dedd5 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Thu, 12 Mar 2026 16:56:16 +0800 Subject: [PATCH 32/45] Fix proc lifecycle gprocess fallback and captured bytes --- agent/src/common/l7_protocol_log.rs | 29 ++++++++++++++-- agent/src/common/meta_packet.rs | 15 +++++++++ .../ebpf/kernel/include/socket_trace_common.h | 1 + agent/src/ebpf/kernel/socket_trace.bpf.c | 1 + agent/src/ebpf/mod.rs | 1 + agent/src/ebpf/user/socket.c | 1 + agent/src/ebpf/user/socket.h | 1 + agent/src/flow_generator/protocol_logs.rs | 12 +++---- server/ingester/event/decoder/decoder.go | 33 ++++++++++++++++++- server/ingester/event/decoder/decoder_test.go | 33 +++++++++++++++++++ 10 files changed, 116 insertions(+), 11 deletions(-) create mode 100644 server/ingester/event/decoder/decoder_test.go diff --git a/agent/src/common/l7_protocol_log.rs b/agent/src/common/l7_protocol_log.rs index a86865e2835..979576f5f0d 100644 --- a/agent/src/common/l7_protocol_log.rs +++ b/agent/src/common/l7_protocol_log.rs @@ -697,7 +697,7 @@ pub struct ParseParam<'a> { // the config of `l7_log_packet_size`, must set in parse_payload and check_payload pub buf_size: u16, - pub captured_byte: u16, + pub captured_byte: u32, pub oracle_parse_conf: OracleConfig, pub iso8583_parse_conf: Iso8583ParseConfig, @@ -837,7 +837,7 @@ impl<'a> ParseParam<'a> { } pub fn set_captured_byte(&mut self, captured_byte: usize) { - self.captured_byte = captured_byte as u16; + self.captured_byte = u32::try_from(captured_byte).unwrap_or(u32::MAX); } pub fn set_rrt_timeout(&mut self, t: usize) { @@ -961,3 +961,28 @@ impl fmt::Debug for L7ProtocolBitmap { f.write_str(format!("{:#?}", p).as_str()) } } + +#[cfg(test)] +mod tests { + use super::*; + use std::cell::RefCell; + use std::rc::Rc; + + #[test] + fn captured_byte_should_not_truncate_large_payloads() { + let packet = MetaPacket::default(); + let mut param = ParseParam::new( + &packet, + None, + Rc::new(RefCell::new(None)), + #[cfg(any(target_os = "linux", target_os = "android"))] + Rc::new(RefCell::new(None)), + false, + false, + ); + + let captured: u32 = 200_000; + param.set_captured_byte(captured as usize); + assert_eq!(param.captured_byte as u32, captured); + } +} diff --git a/agent/src/common/meta_packet.rs b/agent/src/common/meta_packet.rs index 76f00de7e31..ba0ddf950bf 100644 --- a/agent/src/common/meta_packet.rs +++ b/agent/src/common/meta_packet.rs @@ -245,6 +245,7 @@ pub struct MetaPacket<'a> { pub socket_id: u64, pub cap_start_seq: u64, pub cap_end_seq: u64, + pub reasm_bytes: u32, pub l7_protocol_from_ebpf: L7Protocol, // 流结束标识, 目前只有 go http2 uprobe 用到 pub is_request_end: bool, @@ -1035,6 +1036,9 @@ impl<'a> MetaPacket<'a> { #[inline] pub fn get_captured_byte(&self) -> usize { if self.tap_port.is_from(TapPort::FROM_EBPF) { + if self.reasm_bytes > 0 { + return self.reasm_bytes as usize; + } return self.packet_len as usize - 54; } @@ -1147,6 +1151,7 @@ impl<'a> MetaPacket<'a> { packet.signal_source = SignalSource::EBPF; packet.cap_start_seq = data.cap_seq; packet.cap_end_seq = data.cap_seq; + packet.reasm_bytes = data.reasm_bytes; packet.process_id = data.process_id; packet.thread_id = data.thread_id; packet.coroutine_id = data.coroutine_id; @@ -1597,4 +1602,14 @@ mod tests { pkt ); } + + #[test] + fn get_captured_byte_prefers_reasm_bytes_for_ebpf() { + let mut pkt = MetaPacket::default(); + pkt.tap_port = TapPort::from_ebpf(1, 0); + pkt.packet_len = 54 + 16; + pkt.reasm_bytes = 200_000; + + assert_eq!(pkt.get_captured_byte(), 200_000); + } } diff --git a/agent/src/ebpf/kernel/include/socket_trace_common.h b/agent/src/ebpf/kernel/include/socket_trace_common.h index 372977959dd..f0df8793373 100644 --- a/agent/src/ebpf/kernel/include/socket_trace_common.h +++ b/agent/src/ebpf/kernel/include/socket_trace_common.h @@ -129,6 +129,7 @@ struct __socket_data { __u32 fd; __u16 data_type; // HTTP, DNS, MySQL ... __u16 data_len; // 数据长度 + __u32 reasm_bytes; // 重组后的累计字节数 __u8 socket_role; // this message is created by: 0:unkonwn 1:client(connect) 2:server(accept) char data[BURST_DATA_BUF_SIZE]; } __attribute__ ((packed)); diff --git a/agent/src/ebpf/kernel/socket_trace.bpf.c b/agent/src/ebpf/kernel/socket_trace.bpf.c index 20a7a1fd540..1ba59b4e819 100644 --- a/agent/src/ebpf/kernel/socket_trace.bpf.c +++ b/agent/src/ebpf/kernel/socket_trace.bpf.c @@ -1674,6 +1674,7 @@ __data_submit(struct pt_regs *ctx, struct conn_info_s *conn_info, v->cap_timestamp = bpf_ktime_get_ns(); v->direction = conn_info->direction; v->syscall_len = syscall_len; + v->reasm_bytes = socket_info_ptr->reasm_bytes; v->msg_type = MSG_COMMON; // Reassembly modification type diff --git a/agent/src/ebpf/mod.rs b/agent/src/ebpf/mod.rs index a7efbab0420..4ae106da1c1 100644 --- a/agent/src/ebpf/mod.rs +++ b/agent/src/ebpf/mod.rs @@ -369,6 +369,7 @@ pub struct SK_BPF_DATA { */ pub syscall_len: u64, // 本次系统调用读、写数据的总长度 pub cap_len: u32, // 返回的cap_data长度 + pub reasm_bytes: u32, // 重组后的累计字节数 pub cap_seq: u64, // cap_data在Socket中的相对顺序号,在所在socket下从0开始自增,用于数据乱序排序 pub socket_role: u8, // this message is created by: 0:unkonwn 1:client(connect) 2:server(accept) pub fd: u32, // File descriptor for an open file or socket. diff --git a/agent/src/ebpf/user/socket.c b/agent/src/ebpf/user/socket.c index fbf759e3904..d218e0b26d7 100644 --- a/agent/src/ebpf/user/socket.c +++ b/agent/src/ebpf/user/socket.c @@ -1402,6 +1402,7 @@ static void reader_raw_cb(void *cookie, void *raw, int raw_size) } submit_data->syscall_len += offset; submit_data->cap_len = len + offset; + submit_data->reasm_bytes = sd->reasm_bytes; burst_data[i] = submit_data; start += diff --git a/agent/src/ebpf/user/socket.h b/agent/src/ebpf/user/socket.h index fd5e72fcd74..5f1a42f6de0 100644 --- a/agent/src/ebpf/user/socket.h +++ b/agent/src/ebpf/user/socket.h @@ -89,6 +89,7 @@ struct socket_bpf_data { uint8_t direction; // 数据的收发方向,枚举如下: 1 SOCK_DIR_SND, 2 SOCK_DIR_RCV uint64_t syscall_len; // 本次系统调用读、写数据的总长度 uint32_t cap_len; // 返回的cap_data长度 + uint32_t reasm_bytes; // 重组后的累计字节数 uint64_t cap_seq; // cap_data在Socket中的相对顺序号,从启动时的时钟开始自增1,用于数据乱序排序 uint8_t socket_role; // this message is created by: 0:unkonwn 1:client(connect) 2:server(accept) uint32_t fd; // File descriptor for an open file or socket. diff --git a/agent/src/flow_generator/protocol_logs.rs b/agent/src/flow_generator/protocol_logs.rs index 094fc4d13d8..84ae6c2ea64 100644 --- a/agent/src/flow_generator/protocol_logs.rs +++ b/agent/src/flow_generator/protocol_logs.rs @@ -570,16 +570,12 @@ macro_rules! swap_if { macro_rules! set_captured_byte { ($this:expr, $param:expr) => { match $this.msg_type { - LogMessageType::Request => $this.captured_request_byte = $param.captured_byte as u32, - LogMessageType::Response => $this.captured_response_byte = $param.captured_byte as u32, + LogMessageType::Request => $this.captured_request_byte = $param.captured_byte, + LogMessageType::Response => $this.captured_response_byte = $param.captured_byte, _ => { match LogMessageType::from($param.direction) { - LogMessageType::Request => { - $this.captured_request_byte = $param.captured_byte as u32 - } - LogMessageType::Response => { - $this.captured_response_byte = $param.captured_byte as u32 - } + LogMessageType::Request => $this.captured_request_byte = $param.captured_byte, + LogMessageType::Response => $this.captured_response_byte = $param.captured_byte, _ => unimplemented!(), }; } diff --git a/server/ingester/event/decoder/decoder.go b/server/ingester/event/decoder/decoder.go index d6836f990d7..aa576d75b25 100644 --- a/server/ingester/event/decoder/decoder.go +++ b/server/ingester/event/decoder/decoder.go @@ -181,7 +181,12 @@ func (d *Decoder) WriteFileEvent(vtapId uint16, e *pb.ProcEvent) { s.SignalSource = uint8(e.EventType) } - s.GProcessID = d.platformData.QueryProcessInfo(s.OrgId, vtapId, e.Pid) + s.GProcessID = resolveGProcessID( + func(pid uint32) uint32 { + return d.platformData.QueryProcessInfo(s.OrgId, vtapId, pid) + }, + e, + ) if e.IoEventData != nil { ioData := e.IoEventData s.EventType = strings.ToLower(ioData.Operation.String()) @@ -282,6 +287,32 @@ func (d *Decoder) WriteFileEvent(vtapId uint16, e *pb.ProcEvent) { d.eventWriter.Write(s) } +func resolveGProcessID(queryProcessInfo func(pid uint32) uint32, e *pb.ProcEvent) uint32 { + if e == nil { + return 0 + } + + gprocessID := queryProcessInfo(e.Pid) + if gprocessID != 0 { + return gprocessID + } + + // Proc lifecycle (fork/exec/exit) events may arrive before the controller + // has synchronized the child process into the `process` table. In that + // window, QueryProcessInfo(child_pid) returns 0 even though the parent is + // already mapped. Falling back to the parent_pid keeps the lifecycle event + // attached to the correct AI Agent gprocess_id until the child entry is + // eventually synced. + if e.EventType != pb.EventType_ProcLifecycleEvent || e.ProcLifecycleEventData == nil { + return 0 + } + parentPid := e.ProcLifecycleEventData.ParentPid + if parentPid == 0 || parentPid == e.Pid { + return 0 + } + return queryProcessInfo(parentPid) +} + func (d *Decoder) export(item exporterscommon.ExportItem) { if d.exporters == nil { return diff --git a/server/ingester/event/decoder/decoder_test.go b/server/ingester/event/decoder/decoder_test.go new file mode 100644 index 00000000000..b4a8e45b7b4 --- /dev/null +++ b/server/ingester/event/decoder/decoder_test.go @@ -0,0 +1,33 @@ +package decoder + +import ( + "testing" + + "github.com/deepflowio/deepflow/server/libs/flow-metrics/pb" +) + +func TestResolveGProcessIDProcLifecycleFallback(t *testing.T) { + parentPid := uint32(2000) + childPid := uint32(3000) + parentGpid := uint32(42) + + query := func(pid uint32) uint32 { + if pid == parentPid { + return parentGpid + } + return 0 + } + + event := &pb.ProcEvent{ + Pid: childPid, + EventType: pb.EventType_ProcLifecycleEvent, + ProcLifecycleEventData: &pb.ProcLifecycleEventData{ + ParentPid: parentPid, + }, + } + + got := resolveGProcessID(query, event) + if got != parentGpid { + t.Fatalf("expected gprocess_id fallback to parent %d, got %d", parentGpid, got) + } +} From a8c70bb71786b3be82d7dcd0e33c6b912e4a374b Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Thu, 12 Mar 2026 19:36:50 +0800 Subject: [PATCH 33/45] fix: guard ai reasm bytes on invalid socket info --- agent/src/ebpf/kernel/socket_trace.bpf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/agent/src/ebpf/kernel/socket_trace.bpf.c b/agent/src/ebpf/kernel/socket_trace.bpf.c index 1ba59b4e819..e73f86764ad 100644 --- a/agent/src/ebpf/kernel/socket_trace.bpf.c +++ b/agent/src/ebpf/kernel/socket_trace.bpf.c @@ -1674,7 +1674,11 @@ __data_submit(struct pt_regs *ctx, struct conn_info_s *conn_info, v->cap_timestamp = bpf_ktime_get_ns(); v->direction = conn_info->direction; v->syscall_len = syscall_len; - v->reasm_bytes = socket_info_ptr->reasm_bytes; + __u32 reasm_bytes = sk_info->reasm_bytes; + if (is_socket_info_valid(socket_info_ptr)) { + reasm_bytes = socket_info_ptr->reasm_bytes; + } + v->reasm_bytes = reasm_bytes; v->msg_type = MSG_COMMON; // Reassembly modification type From 6d8c0cefa3a7831cf1d6503a3c0a2c92b19021ae Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Thu, 12 Mar 2026 20:06:07 +0800 Subject: [PATCH 34/45] Fix proc.gprocess_info refresh on process change --- agent/src/utils/process/linux.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/src/utils/process/linux.rs b/agent/src/utils/process/linux.rs index 9b74812b070..0e49c63c946 100644 --- a/agent/src/utils/process/linux.rs +++ b/agent/src/utils/process/linux.rs @@ -522,7 +522,7 @@ impl ProcessListener { process_datas.sort_by_key(|x| x.pid); process_datas.merge_and_dedup(); - if pids != value.pids { + if pids != value.pids || process_datas != value.process_datas { debug!("Feature {} update {} pids {:?}.", key, pids.len(), pids); value.callback.as_ref().unwrap()(&pids, &process_datas); value.pids = pids; From aaf3a3bfb837566407b8540925027932180ca1cf Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Thu, 12 Mar 2026 23:19:36 +0800 Subject: [PATCH 35/45] fix: enable reassembly after protocol inference --- agent/src/ebpf/kernel/socket_trace.bpf.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/agent/src/ebpf/kernel/socket_trace.bpf.c b/agent/src/ebpf/kernel/socket_trace.bpf.c index e73f86764ad..0fe6503d106 100644 --- a/agent/src/ebpf/kernel/socket_trace.bpf.c +++ b/agent/src/ebpf/kernel/socket_trace.bpf.c @@ -1903,6 +1903,16 @@ static __inline int process_data(struct pt_regs *ctx, __u64 id, if (act == INFER_TERMINATE) return -1; + + if (act == INFER_FINISH && + conn_info->protocol != PROTO_UNKNOWN && + is_socket_info_valid(conn_info->socket_info_ptr) && + !conn_info->socket_info_ptr->allow_reassembly && + is_proto_reasm_enabled(conn_info->protocol)) { + conn_info->socket_info_ptr->allow_reassembly = true; + conn_info->socket_info_ptr->reasm_bytes = 0; + check_and_set_data_reassembly(conn_info); + } #if !defined(LINUX_VER_KFUNC) && !defined(LINUX_VER_5_2_PLUS) if (disable_kprobe && extra->source == DATA_SOURCE_SYSCALL) return -1; From 9bde4c6a56fa5b642c6232c7d99924fb786b2f23 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Thu, 12 Mar 2026 23:58:46 +0800 Subject: [PATCH 36/45] fix: enable reassembly on inferred protocol for existing sockets --- agent/src/ebpf/kernel/socket_trace.bpf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/agent/src/ebpf/kernel/socket_trace.bpf.c b/agent/src/ebpf/kernel/socket_trace.bpf.c index 0fe6503d106..bb167bd7446 100644 --- a/agent/src/ebpf/kernel/socket_trace.bpf.c +++ b/agent/src/ebpf/kernel/socket_trace.bpf.c @@ -1550,6 +1550,12 @@ __data_submit(struct pt_regs *ctx, struct conn_info_s *conn_info, #endif __u32 send_reasm_bytes = 0; if (is_socket_info_valid(socket_info_ptr)) { + if (!socket_info_ptr->allow_reassembly && + is_proto_reasm_enabled(conn_info->protocol)) { + socket_info_ptr->allow_reassembly = true; + socket_info_ptr->finish_reasm = false; + socket_info_ptr->reasm_bytes = 0; + } sk_info->uid = socket_info_ptr->uid; sk_info->allow_reassembly = socket_info_ptr->allow_reassembly; #ifdef EXTENDED_AI_AGENT_FILE_IO From 195751500c48cefed18aa8ca79caf10b46d5182f Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Fri, 13 Mar 2026 10:28:34 +0800 Subject: [PATCH 37/45] =?UTF-8?q?fix:=20ai-agent=20=E5=AD=90=E5=AD=99?= =?UTF-8?q?=E8=BF=9B=E7=A8=8B=E7=BB=A7=E6=89=BF=20gprocess=5Fid?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/ingester/event/decoder/decoder.go | 160 +++++++++++++++--- server/ingester/event/decoder/decoder_test.go | 60 ++++++- server/ingester/event/event/event.go | 7 + 3 files changed, 204 insertions(+), 23 deletions(-) diff --git a/server/ingester/event/decoder/decoder.go b/server/ingester/event/decoder/decoder.go index aa576d75b25..d4bb85574c1 100644 --- a/server/ingester/event/decoder/decoder.go +++ b/server/ingester/event/decoder/decoder.go @@ -20,6 +20,7 @@ import ( "net" "strconv" "strings" + "sync" "time" logging "github.com/op/go-logging" @@ -56,15 +57,81 @@ type Counter struct { ErrorCount int64 `statsd:"err-count"` } +type AiAgentRootPidCache struct { + mu sync.RWMutex + rootPidByKey map[uint64]uint32 +} + +func NewAiAgentRootPidCache() *AiAgentRootPidCache { + return &AiAgentRootPidCache{ + rootPidByKey: make(map[uint64]uint32), + } +} + +func aiAgentRootPidKey(orgId, vtapId uint16, pid uint32) uint64 { + return uint64(orgId)<<48 | uint64(vtapId)<<32 | uint64(pid) +} + +func (c *AiAgentRootPidCache) Get(orgId, vtapId uint16, pid uint32) (uint32, bool) { + if c == nil || pid == 0 { + return 0, false + } + key := aiAgentRootPidKey(orgId, vtapId, pid) + c.mu.RLock() + defer c.mu.RUnlock() + rootPid, ok := c.rootPidByKey[key] + return rootPid, ok +} + +func (c *AiAgentRootPidCache) Set(orgId, vtapId uint16, pid, rootPid uint32) { + if c == nil || pid == 0 { + return + } + key := aiAgentRootPidKey(orgId, vtapId, pid) + c.mu.Lock() + c.rootPidByKey[key] = rootPid + c.mu.Unlock() +} + +func (c *AiAgentRootPidCache) Delete(orgId, vtapId uint16, pid uint32) { + if c == nil || pid == 0 { + return + } + key := aiAgentRootPidKey(orgId, vtapId, pid) + c.mu.Lock() + delete(c.rootPidByKey, key) + c.mu.Unlock() +} + +func (c *AiAgentRootPidCache) ResolveRootPid(orgId, vtapId uint16, pid, parentPid uint32) uint32 { + if c == nil || pid == 0 { + return 0 + } + if rootPid, ok := c.Get(orgId, vtapId, pid); ok && rootPid != 0 { + return rootPid + } + if parentPid != 0 && parentPid != pid { + if rootPid, ok := c.Get(orgId, vtapId, parentPid); ok && rootPid != 0 { + c.Set(orgId, vtapId, pid, rootPid) + return rootPid + } + c.Set(orgId, vtapId, pid, parentPid) + return parentPid + } + c.Set(orgId, vtapId, pid, pid) + return pid +} + type Decoder struct { - index int - eventType common.EventType - platformData *grpc.PlatformInfoTable - inQueue queue.QueueReader - eventWriter *dbwriter.EventWriter - exporters *exporters.Exporters - debugEnabled bool - config *config.Config + index int + eventType common.EventType + platformData *grpc.PlatformInfoTable + inQueue queue.QueueReader + eventWriter *dbwriter.EventWriter + exporters *exporters.Exporters + debugEnabled bool + config *config.Config + aiAgentRootPidCache *AiAgentRootPidCache orgId, teamId uint16 @@ -80,6 +147,7 @@ func NewDecoder( platformData *grpc.PlatformInfoTable, exporters *exporters.Exporters, config *config.Config, + aiAgentRootPidCache *AiAgentRootPidCache, ) *Decoder { controllers := make([]net.IP, len(config.Base.ControllerIPs)) for i, ipString := range config.Base.ControllerIPs { @@ -89,15 +157,16 @@ func NewDecoder( } } return &Decoder{ - index: index, - eventType: eventType, - platformData: platformData, - inQueue: inQueue, - debugEnabled: log.IsEnabledFor(logging.DEBUG), - eventWriter: eventWriter, - exporters: exporters, - config: config, - counter: &Counter{}, + index: index, + eventType: eventType, + platformData: platformData, + inQueue: inQueue, + debugEnabled: log.IsEnabledFor(logging.DEBUG), + eventWriter: eventWriter, + exporters: exporters, + config: config, + aiAgentRootPidCache: aiAgentRootPidCache, + counter: &Counter{}, } } @@ -185,6 +254,9 @@ func (d *Decoder) WriteFileEvent(vtapId uint16, e *pb.ProcEvent) { func(pid uint32) uint32 { return d.platformData.QueryProcessInfo(s.OrgId, vtapId, pid) }, + d.aiAgentRootPidCache, + s.OrgId, + vtapId, e, ) if e.IoEventData != nil { @@ -287,14 +359,51 @@ func (d *Decoder) WriteFileEvent(vtapId uint16, e *pb.ProcEvent) { d.eventWriter.Write(s) } -func resolveGProcessID(queryProcessInfo func(pid uint32) uint32, e *pb.ProcEvent) uint32 { +func resolveGProcessID(queryProcessInfo func(pid uint32) uint32, rootPidCache *AiAgentRootPidCache, orgId, vtapId uint16, e *pb.ProcEvent) uint32 { if e == nil { return 0 } - gprocessID := queryProcessInfo(e.Pid) - if gprocessID != 0 { - return gprocessID + pid := e.Pid + if pid == 0 { + return 0 + } + + rootPid := pid + if rootPidCache != nil { + if e.EventType == pb.EventType_ProcLifecycleEvent && e.ProcLifecycleEventData != nil { + lifecyclePid := e.ProcLifecycleEventData.Pid + if lifecyclePid != 0 { + pid = lifecyclePid + } + rootPid = rootPidCache.ResolveRootPid(orgId, vtapId, pid, e.ProcLifecycleEventData.ParentPid) + } else if cachedRoot, ok := rootPidCache.Get(orgId, vtapId, pid); ok && cachedRoot != 0 { + rootPid = cachedRoot + } + } + + if rootPid != 0 { + gprocessID := queryProcessInfo(rootPid) + if gprocessID != 0 { + if rootPidCache != nil && + e.EventType == pb.EventType_ProcLifecycleEvent && e.ProcLifecycleEventData != nil && + e.ProcLifecycleEventData.LifecycleType == pb.ProcLifecycleType_ProcLifecycleExit { + rootPidCache.Delete(orgId, vtapId, pid) + } + return gprocessID + } + } + + if rootPid == pid { + gprocessID := queryProcessInfo(pid) + if gprocessID != 0 { + if rootPidCache != nil && + e.EventType == pb.EventType_ProcLifecycleEvent && e.ProcLifecycleEventData != nil && + e.ProcLifecycleEventData.LifecycleType == pb.ProcLifecycleType_ProcLifecycleExit { + rootPidCache.Delete(orgId, vtapId, pid) + } + return gprocessID + } } // Proc lifecycle (fork/exec/exit) events may arrive before the controller @@ -310,7 +419,14 @@ func resolveGProcessID(queryProcessInfo func(pid uint32) uint32, e *pb.ProcEvent if parentPid == 0 || parentPid == e.Pid { return 0 } - return queryProcessInfo(parentPid) + gprocessID := queryProcessInfo(parentPid) + if gprocessID == 0 { + return 0 + } + if rootPidCache != nil && e.ProcLifecycleEventData.LifecycleType == pb.ProcLifecycleType_ProcLifecycleExit { + rootPidCache.Delete(orgId, vtapId, pid) + } + return gprocessID } func (d *Decoder) export(item exporterscommon.ExportItem) { diff --git a/server/ingester/event/decoder/decoder_test.go b/server/ingester/event/decoder/decoder_test.go index b4a8e45b7b4..1eb0a05750b 100644 --- a/server/ingester/event/decoder/decoder_test.go +++ b/server/ingester/event/decoder/decoder_test.go @@ -10,6 +10,9 @@ func TestResolveGProcessIDProcLifecycleFallback(t *testing.T) { parentPid := uint32(2000) childPid := uint32(3000) parentGpid := uint32(42) + orgId := uint16(1) + vtapId := uint16(2) + cache := NewAiAgentRootPidCache() query := func(pid uint32) uint32 { if pid == parentPid { @@ -23,11 +26,66 @@ func TestResolveGProcessIDProcLifecycleFallback(t *testing.T) { EventType: pb.EventType_ProcLifecycleEvent, ProcLifecycleEventData: &pb.ProcLifecycleEventData{ ParentPid: parentPid, + Pid: childPid, }, } - got := resolveGProcessID(query, event) + got := resolveGProcessID(query, cache, orgId, vtapId, event) if got != parentGpid { t.Fatalf("expected gprocess_id fallback to parent %d, got %d", parentGpid, got) } } + +func TestResolveGProcessIDChildInheritsRoot(t *testing.T) { + orgId := uint16(1) + vtapId := uint16(2) + parentPid := uint32(4000) + childPid := uint32(5000) + grandPid := uint32(6000) + parentGpid := uint32(77) + cache := NewAiAgentRootPidCache() + + query := func(pid uint32) uint32 { + if pid == parentPid { + return parentGpid + } + return 0 + } + + forkChild := &pb.ProcEvent{ + Pid: childPid, + EventType: pb.EventType_ProcLifecycleEvent, + ProcLifecycleEventData: &pb.ProcLifecycleEventData{ + LifecycleType: pb.ProcLifecycleType_ProcLifecycleFork, + ParentPid: parentPid, + Pid: childPid, + }, + } + if got := resolveGProcessID(query, cache, orgId, vtapId, forkChild); got != parentGpid { + t.Fatalf("expected child inherit gprocess_id %d, got %d", parentGpid, got) + } + + forkGrand := &pb.ProcEvent{ + Pid: grandPid, + EventType: pb.EventType_ProcLifecycleEvent, + ProcLifecycleEventData: &pb.ProcLifecycleEventData{ + LifecycleType: pb.ProcLifecycleType_ProcLifecycleFork, + ParentPid: childPid, + Pid: grandPid, + }, + } + if got := resolveGProcessID(query, cache, orgId, vtapId, forkGrand); got != parentGpid { + t.Fatalf("expected grandchild inherit gprocess_id %d, got %d", parentGpid, got) + } + + fileOp := &pb.ProcEvent{ + Pid: grandPid, + EventType: pb.EventType_FileOpEvent, + FileOpEventData: &pb.FileOpEventData{ + OpType: pb.FileOpType_FileOpCreate, + }, + } + if got := resolveGProcessID(query, cache, orgId, vtapId, fileOp); got != parentGpid { + t.Fatalf("expected file event inherit gprocess_id %d, got %d", parentGpid, got) + } +} diff --git a/server/ingester/event/event/event.go b/server/ingester/event/event/event.go index 2aaca632288..e0fd49b6b99 100644 --- a/server/ingester/event/event/event.go +++ b/server/ingester/event/event/event.go @@ -96,6 +96,7 @@ func NewResouceEventor(eventQueue *queue.OverwriteQueue, config *config.Config, platformTable, nil, config, + nil, ) return &Eventor{ Config: config, @@ -126,6 +127,7 @@ func NewAlertEventor(config *config.Config, recv *receiver.Receiver, manager *dr platformTable, nil, config, + nil, ) return &Eventor{ Config: config, @@ -161,6 +163,10 @@ func NewEventor(eventType common.EventType, config *config.Config, recv *receive decoders := make([]*decoder.Decoder, queueCount) platformDatas := make([]*grpc.PlatformInfoTable, queueCount) + var aiAgentRootPidCache *decoder.AiAgentRootPidCache + if eventType == common.FILE_EVENT { + aiAgentRootPidCache = decoder.NewAiAgentRootPidCache() + } for i := 0; i < queueCount; i++ { eventWriter, err := dbwriter.NewEventWriter(eventType, i, config) if err != nil { @@ -178,6 +184,7 @@ func NewEventor(eventType common.EventType, config *config.Config, recv *receive platformDatas[i], exporters, config, + aiAgentRootPidCache, ) } return &Eventor{ From c1283848bdd473b1e649956b69d5840e229e20dd Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Fri, 13 Mar 2026 11:00:05 +0800 Subject: [PATCH 38/45] =?UTF-8?q?fix:=20proc=20exec=20=E4=BF=9D=E6=8C=81?= =?UTF-8?q?=20gprocess=20=E7=BB=A7=E6=89=BF=E4=B8=8E=E5=8D=95=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/ingester/event/decoder/decoder.go | 20 ++++++++++++- server/ingester/event/decoder/decoder_test.go | 29 +++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/server/ingester/event/decoder/decoder.go b/server/ingester/event/decoder/decoder.go index d4bb85574c1..e84ba942927 100644 --- a/server/ingester/event/decoder/decoder.go +++ b/server/ingester/event/decoder/decoder.go @@ -376,7 +376,25 @@ func resolveGProcessID(queryProcessInfo func(pid uint32) uint32, rootPidCache *A if lifecyclePid != 0 { pid = lifecyclePid } - rootPid = rootPidCache.ResolveRootPid(orgId, vtapId, pid, e.ProcLifecycleEventData.ParentPid) + parentPid := e.ProcLifecycleEventData.ParentPid + if e.ProcLifecycleEventData.LifecycleType == pb.ProcLifecycleType_ProcLifecycleExec { + if cachedRoot, ok := rootPidCache.Get(orgId, vtapId, pid); ok && cachedRoot != 0 { + rootPid = cachedRoot + } else if parentPid != 0 { + if parentRoot, ok := rootPidCache.Get(orgId, vtapId, parentPid); ok && parentRoot != 0 { + rootPidCache.Set(orgId, vtapId, pid, parentRoot) + rootPid = parentRoot + } else { + rootPidCache.Set(orgId, vtapId, pid, pid) + rootPid = pid + } + } else { + rootPidCache.Set(orgId, vtapId, pid, pid) + rootPid = pid + } + } else { + rootPid = rootPidCache.ResolveRootPid(orgId, vtapId, pid, parentPid) + } } else if cachedRoot, ok := rootPidCache.Get(orgId, vtapId, pid); ok && cachedRoot != 0 { rootPid = cachedRoot } diff --git a/server/ingester/event/decoder/decoder_test.go b/server/ingester/event/decoder/decoder_test.go index 1eb0a05750b..548152f38a1 100644 --- a/server/ingester/event/decoder/decoder_test.go +++ b/server/ingester/event/decoder/decoder_test.go @@ -89,3 +89,32 @@ func TestResolveGProcessIDChildInheritsRoot(t *testing.T) { t.Fatalf("expected file event inherit gprocess_id %d, got %d", parentGpid, got) } } + +func TestResolveGProcessIDExecKeepsSelfWhenUncached(t *testing.T) { + orgId := uint16(1) + vtapId := uint16(2) + pid := uint32(7000) + parentPid := uint32(8000) + pidGpid := uint32(88) + cache := NewAiAgentRootPidCache() + + query := func(id uint32) uint32 { + if id == pid { + return pidGpid + } + return 0 + } + + execEvent := &pb.ProcEvent{ + Pid: pid, + EventType: pb.EventType_ProcLifecycleEvent, + ProcLifecycleEventData: &pb.ProcLifecycleEventData{ + LifecycleType: pb.ProcLifecycleType_ProcLifecycleExec, + ParentPid: parentPid, + Pid: pid, + }, + } + if got := resolveGProcessID(query, cache, orgId, vtapId, execEvent); got != pidGpid { + t.Fatalf("expected exec event use self gprocess_id %d, got %d", pidGpid, got) + } +} From 96fa37fcb5a3728fa8f34cbaff42f42da662b6ca Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Fri, 13 Mar 2026 15:41:03 +0800 Subject: [PATCH 39/45] Ensure AI agent pids included in socket list sync --- agent/src/utils/process/linux.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/agent/src/utils/process/linux.rs b/agent/src/utils/process/linux.rs index 0e49c63c946..6343e62f1b6 100644 --- a/agent/src/utils/process/linux.rs +++ b/agent/src/utils/process/linux.rs @@ -593,13 +593,17 @@ fn should_skip_feature( #[cfg(feature = "enterprise")] fn fetch_ai_agent_pids(feature: &str) -> Vec { - if feature == "proc.gprocess_info" { + // AI Agent processes must participate in both gprocess_info and socket_list. + // - gprocess_info ensures process metadata sync (MySQL process table, biz_type tagging) + // - socket_list ensures GPID sync for EBPF flows (gprocess_id injection) + // Without socket_list, GPID sync remains empty, leading to gprocess_id=0 in L7/proc events. + if feature == "proc.gprocess_info" || feature == "proc.socket_list" { if let Some(registry) = enterprise_utils::ai_agent::global_registry() { let pids = registry.get_all_pids(); - debug!("AI Agent: proc.gprocess_info fetch {} pids", pids.len()); + debug!("AI Agent: {} fetch {} pids", feature, pids.len()); return pids; } - debug!("AI Agent: proc.gprocess_info registry not initialized"); + debug!("AI Agent: {} registry not initialized", feature); } Vec::new() } From d0ab245c37178e4a35144c81a4a04c80fb5b4944 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Fri, 13 Mar 2026 21:11:13 +0800 Subject: [PATCH 40/45] fix: propagate reasm_bytes on ebpf merge --- agent/src/common/meta_packet.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/agent/src/common/meta_packet.rs b/agent/src/common/meta_packet.rs index ba0ddf950bf..db85d03bb0c 100644 --- a/agent/src/common/meta_packet.rs +++ b/agent/src/common/meta_packet.rs @@ -1087,6 +1087,13 @@ impl<'a> MetaPacket<'a> { self.payload_len += packet.payload_len; self.l4_payload_len += packet.l4_payload_len; self.cap_end_seq = packet.cap_start_seq; + // eBPF reassembly: propagate the latest cumulative reassembly bytes. + // `reasm_bytes` reflects the total bytes reassembled in the kernel for + // this flow. When we merge multiple MSG_REASM_* segments, we must keep + // the newest cumulative value, otherwise `get_captured_byte()` will + // stay at the first segment's size (e.g., HTTP headers only) and + // `captured_request_byte` will be incorrect for large bodies. + self.reasm_bytes = self.reasm_bytes.max(packet.reasm_bytes); } #[cfg(all(unix, feature = "libtrace"))] From 932234d0442d521e48f49fad30593af0ef195532 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Mon, 16 Mar 2026 23:44:49 +0800 Subject: [PATCH 41/45] fix: remove record_endpoint_hit stub from AI Agent registry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 移除开源版 AiAgentRegistry stub 中的 record_endpoint_hit() 方法, 与企业版删除 endpoint 唯一性约束保持一致。 Co-Authored-By: Claude Opus 4.6 (1M context) --- agent/crates/enterprise-utils/src/lib.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/agent/crates/enterprise-utils/src/lib.rs b/agent/crates/enterprise-utils/src/lib.rs index 0b5e70ffc77..c0231a77847 100644 --- a/agent/crates/enterprise-utils/src/lib.rs +++ b/agent/crates/enterprise-utils/src/lib.rs @@ -486,10 +486,6 @@ pub mod ai_agent { pub fn set_bpf_map_fd(&self, _fd: i32) {} pub fn set_file_io_enabled(&self, _enabled: bool) {} - - pub fn record_endpoint_hit(&self, _pid: u32, _endpoint: &str, _now: Duration) -> bool { - false - } } /// Check if a URL path matches an AI Agent endpoint pattern. From 94e4ea51773349ce1e9b689171e78eddf86c4a62 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Tue, 17 Mar 2026 18:46:10 +0800 Subject: [PATCH 42/45] feat: add ai_agent_root_pid to resolve gprocess_id for child processes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **复现步骤** AI Agent子进程(fork/exec产生的)的file_event和proc_lifecycle_event 的gprocess_id为0,因为子进程在controller同步到process表之前就已 经产生了事件。 **原因和解决方案** 子进程的gprocess_id依赖server端通过QueryProcessInfo查询,但新 fork的子进程可能还未同步到process表。 解决方案:在Agent端维护root_pid(最初通过endpoint识别的根AI Agent 进程PID),通过protobuf的ai_agent_root_pid字段传递到server端。 Server端在cache和直接PID查询都失败时,使用ai_agent_root_pid作为 fallback查询gprocess_id。 变更内容: - metric.proto: ProcEvent新增ai_agent_root_pid字段(tag=14) - proc_event/linux.rs: ProcEvent结构体新增ai_agent_root_pid字段 - ebpf_dispatcher.rs: 新增fill_ai_agent_root_pid()从registry查询 root_pid并填充到事件中 - decoder.go: resolveGProcessID()新增ai_agent_root_pid fallback - enterprise-utils/lib.rs: 开源stub新增get_root_pid/register_child **影响范围** 仅影响AI Agent治理数据采集功能的gprocess_id解析 **验证方案** - 单元测试:TestResolveGProcessIDAiAgentRootPidFallback - 部署后验证fork事件的gprocess_id不再为0 **涉及分支** * support-agent-governance **检查项** - [x] 需要更新依赖 - [ ] 是共性问题(代码中存在类似问题) - [ ] 编译通过 - [ ] 单元测试通过 Co-Authored-By: Claude Opus 4.6 --- agent/crates/enterprise-utils/src/lib.rs | 9 ++++ agent/src/common/proc_event/linux.rs | 4 ++ agent/src/ebpf_dispatcher.rs | 12 +++++ message/metric.proto | 1 + server/ingester/event/decoder/decoder.go | 40 +++++++++++----- server/ingester/event/decoder/decoder_test.go | 47 +++++++++++++++++++ 6 files changed, 100 insertions(+), 13 deletions(-) diff --git a/agent/crates/enterprise-utils/src/lib.rs b/agent/crates/enterprise-utils/src/lib.rs index c0231a77847..655dcd90c54 100644 --- a/agent/crates/enterprise-utils/src/lib.rs +++ b/agent/crates/enterprise-utils/src/lib.rs @@ -444,6 +444,7 @@ pub mod ai_agent { pub first_seen: Duration, pub last_seen: Duration, pub matched_endpoint: String, + pub root_pid: u32, } #[derive(Debug, Clone, Default)] @@ -462,6 +463,14 @@ pub mod ai_agent { false } + pub fn get_root_pid(&self, _pid: u32) -> u32 { + 0 + } + + pub fn register_child(&self, _parent_pid: u32, _child_pid: u32, _now: Duration) -> bool { + false + } + pub fn get_all_pids(&self) -> Vec { vec![] } diff --git a/agent/src/common/proc_event/linux.rs b/agent/src/common/proc_event/linux.rs index 0a4ce4c6190..6b7604a3bb7 100644 --- a/agent/src/common/proc_event/linux.rs +++ b/agent/src/common/proc_event/linux.rs @@ -387,6 +387,7 @@ impl fmt::Display for EventType { pub struct ProcEvent { pub pid: u32, pub pod_id: u32, + pub ai_agent_root_pid: u32, thread_id: u32, coroutine_id: u64, // optional process_kname: Vec, @@ -446,6 +447,7 @@ impl ProcEvent { event_type, event_data, pod_id: 0, + ai_agent_root_pid: 0, }; Ok(BoxedProcEvents(Box::new(proc_event))) @@ -490,6 +492,7 @@ impl Sendable for BoxedProcEvents { end_time: self.0.end_time, event_type: self.0.event_type.into(), pod_id: self.0.pod_id, + ai_agent_root_pid: self.0.ai_agent_root_pid, ..Default::default() }; match self.0.event_data { @@ -535,6 +538,7 @@ mod tests { let proc_event = ProcEvent { pid: 1234, pod_id: 0, + ai_agent_root_pid: 0, thread_id: 0, coroutine_id: 0, process_kname: b"python3".to_vec(), diff --git a/agent/src/ebpf_dispatcher.rs b/agent/src/ebpf_dispatcher.rs index 10d01d0f73d..4f97c0adaa5 100644 --- a/agent/src/ebpf_dispatcher.rs +++ b/agent/src/ebpf_dispatcher.rs @@ -133,6 +133,16 @@ fn register_ai_agent_child(event: &BoxedProcEvents) { } } +#[cfg(feature = "enterprise")] +fn fill_ai_agent_root_pid(event: &mut BoxedProcEvents) { + if let Some(registry) = enterprise_utils::ai_agent::global_registry() { + let root_pid = registry.get_root_pid(event.0.pid); + if root_pid != 0 { + event.0.ai_agent_root_pid = root_pid; + } + } +} + impl OwnedCountable for SyncEbpfCounter { fn get_counters(&self) -> Vec { let rx = self.counter.rx.swap(0, Ordering::Relaxed); @@ -661,6 +671,8 @@ impl EbpfCollector { } #[cfg(feature = "enterprise")] register_ai_agent_child(&event); + #[cfg(feature = "enterprise")] + fill_ai_agent_root_pid(&mut event); if let Err(e) = PROC_EVENT_SENDER.as_mut().unwrap().send(event) { warn!("event send ebpf error: {:?}", e); } diff --git a/message/metric.proto b/message/metric.proto index e6b4e468348..99377d4d5d7 100644 --- a/message/metric.proto +++ b/message/metric.proto @@ -335,6 +335,7 @@ message ProcEvent { FileOpEventData file_op_event_data = 11; PermOpEventData perm_op_event_data = 12; ProcLifecycleEventData proc_lifecycle_event_data = 13; + uint32 ai_agent_root_pid = 14; } message PrometheusMetric { diff --git a/server/ingester/event/decoder/decoder.go b/server/ingester/event/decoder/decoder.go index e84ba942927..c8b2dd2d29c 100644 --- a/server/ingester/event/decoder/decoder.go +++ b/server/ingester/event/decoder/decoder.go @@ -400,26 +400,42 @@ func resolveGProcessID(queryProcessInfo func(pid uint32) uint32, rootPidCache *A } } + cleanupOnExit := func() { + if rootPidCache != nil && + e.EventType == pb.EventType_ProcLifecycleEvent && e.ProcLifecycleEventData != nil && + e.ProcLifecycleEventData.LifecycleType == pb.ProcLifecycleType_ProcLifecycleExit { + rootPidCache.Delete(orgId, vtapId, pid) + } + } + if rootPid != 0 { gprocessID := queryProcessInfo(rootPid) if gprocessID != 0 { - if rootPidCache != nil && - e.EventType == pb.EventType_ProcLifecycleEvent && e.ProcLifecycleEventData != nil && - e.ProcLifecycleEventData.LifecycleType == pb.ProcLifecycleType_ProcLifecycleExit { - rootPidCache.Delete(orgId, vtapId, pid) - } + cleanupOnExit() return gprocessID } } - if rootPid == pid { + if rootPid != pid { gprocessID := queryProcessInfo(pid) if gprocessID != 0 { - if rootPidCache != nil && - e.EventType == pb.EventType_ProcLifecycleEvent && e.ProcLifecycleEventData != nil && - e.ProcLifecycleEventData.LifecycleType == pb.ProcLifecycleType_ProcLifecycleExit { - rootPidCache.Delete(orgId, vtapId, pid) + cleanupOnExit() + return gprocessID + } + } + + // Fallback: use ai_agent_root_pid sent by the agent. + // The agent tracks root AI Agent PIDs in its registry and attaches + // the root PID to every event from AI Agent processes. This resolves + // gprocess_id for child/grandchild processes that haven't been + // synchronized to the process table yet. + if e.AiAgentRootPid != 0 && e.AiAgentRootPid != pid && e.AiAgentRootPid != rootPid { + gprocessID := queryProcessInfo(e.AiAgentRootPid) + if gprocessID != 0 { + if rootPidCache != nil { + rootPidCache.Set(orgId, vtapId, pid, e.AiAgentRootPid) } + cleanupOnExit() return gprocessID } } @@ -441,9 +457,7 @@ func resolveGProcessID(queryProcessInfo func(pid uint32) uint32, rootPidCache *A if gprocessID == 0 { return 0 } - if rootPidCache != nil && e.ProcLifecycleEventData.LifecycleType == pb.ProcLifecycleType_ProcLifecycleExit { - rootPidCache.Delete(orgId, vtapId, pid) - } + cleanupOnExit() return gprocessID } diff --git a/server/ingester/event/decoder/decoder_test.go b/server/ingester/event/decoder/decoder_test.go index 548152f38a1..dd9b45de834 100644 --- a/server/ingester/event/decoder/decoder_test.go +++ b/server/ingester/event/decoder/decoder_test.go @@ -118,3 +118,50 @@ func TestResolveGProcessIDExecKeepsSelfWhenUncached(t *testing.T) { t.Fatalf("expected exec event use self gprocess_id %d, got %d", pidGpid, got) } } + +func TestResolveGProcessIDAiAgentRootPidFallback(t *testing.T) { + orgId := uint16(1) + vtapId := uint16(2) + rootPid := uint32(1000) + childPid := uint32(2000) + rootGpid := uint32(55) + cache := NewAiAgentRootPidCache() + + query := func(pid uint32) uint32 { + if pid == rootPid { + return rootGpid + } + return 0 + } + + // File event from a child process that isn't in the process table yet, + // but carries the root AI Agent PID from the agent registry. + fileEvent := &pb.ProcEvent{ + Pid: childPid, + EventType: pb.EventType_FileOpEvent, + AiAgentRootPid: rootPid, + FileOpEventData: &pb.FileOpEventData{OpType: pb.FileOpType_FileOpCreate}, + } + got := resolveGProcessID(query, cache, orgId, vtapId, fileEvent) + if got != rootGpid { + t.Fatalf("expected ai_agent_root_pid fallback to gprocess_id %d, got %d", rootGpid, got) + } + + // After the first resolution, the cache should be populated so + // subsequent events for the same child resolve without the fallback. + query2 := func(pid uint32) uint32 { + if pid == rootPid { + return rootGpid + } + return 0 + } + fileEvent2 := &pb.ProcEvent{ + Pid: childPid, + EventType: pb.EventType_IoEvent, + IoEventData: &pb.IoEventData{}, + } + got2 := resolveGProcessID(query2, cache, orgId, vtapId, fileEvent2) + if got2 != rootGpid { + t.Fatalf("expected cached root_pid resolution to gprocess_id %d, got %d", rootGpid, got2) + } +} From 647d2d2a9ff5c06c570c22c0141ac1202184eb54 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 18 Mar 2026 13:25:45 +0800 Subject: [PATCH 43/45] fix: normalize file_op event output to match IoEvent format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Strip "FileOp" prefix from event_type (fileopcreate→create), split full file path into file_dir + file_name, and populate access_permission for chmod events. Co-Authored-By: Claude Opus 4.6 --- server/ingester/event/decoder/decoder.go | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/server/ingester/event/decoder/decoder.go b/server/ingester/event/decoder/decoder.go index c8b2dd2d29c..9bab6861dad 100644 --- a/server/ingester/event/decoder/decoder.go +++ b/server/ingester/event/decoder/decoder.go @@ -276,9 +276,25 @@ func (d *Decoder) WriteFileEvent(vtapId uint16, e *pb.ProcEvent) { s.Duration = uint64(s.EndTime - s.StartTime) } else if e.FileOpEventData != nil { d := e.FileOpEventData - s.EventType = strings.ToLower(d.OpType.String()) + // Strip "FileOp" prefix: FileOpCreate→create, FileOpDelete→delete, etc. + opStr := d.OpType.String() + if strings.HasPrefix(opStr, "FileOp") { + opStr = opStr[len("FileOp"):] + } + s.EventType = strings.ToLower(opStr) s.ProcessKName = string(e.ProcessKname) - s.FileName = string(d.Filename) + // Split full path into file_dir and file_name to match IoEvent format + fullPath := string(d.Filename) + if idx := strings.LastIndex(fullPath, "/"); idx >= 0 { + s.FileDir = fullPath[:idx+1] + s.FileName = fullPath[idx+1:] + } else { + s.FileName = fullPath + } + // For chmod, mode contains the actual permission bits + if d.OpType == pb.FileOpType_FileOpChmod { + s.AccessPermission = d.Mode + } s.SyscallThread = e.ThreadId s.SyscallCoroutine = e.CoroutineId } else if e.PermOpEventData != nil { From abff528d71b893439cbf62b07b3074067f259b40 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 18 Mar 2026 14:21:02 +0800 Subject: [PATCH 44/45] =?UTF-8?q?fix:=20AI=20agent=E5=AD=90=E8=BF=9B?= =?UTF-8?q?=E7=A8=8B=E6=96=87=E4=BB=B6=E8=AF=BB=E5=86=99=E4=BA=8B=E4=BB=B6?= =?UTF-8?q?=E8=A2=ABcollect=5Fmode=E8=BF=87=E6=BB=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AI Agent进程的文件read/write事件之前仅绕过了latency过滤, 但仍被io_event_collect_mode过滤(默认mode=1要求trace关联)。 fork的子进程exec后执行独立的文件操作没有trace_id,导致事件 被丢弃。现在AI Agent进程同时绕过collect_mode和latency过滤。 Co-Authored-By: Claude Opus 4.6 --- agent/src/ebpf/kernel/files_rw.bpf.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/agent/src/ebpf/kernel/files_rw.bpf.c b/agent/src/ebpf/kernel/files_rw.bpf.c index 55df6d89b80..5006df7a81a 100644 --- a/agent/src/ebpf/kernel/files_rw.bpf.c +++ b/agent/src/ebpf/kernel/files_rw.bpf.c @@ -334,7 +334,14 @@ static __inline int trace_io_event_common(void *ctx, return -1; } +#ifdef EXTENDED_AI_AGENT_FILE_IO + int __ai_agent = is_ai_agent_process(pid_tgid); +#endif + if (tracer_ctx->io_event_collect_mode == 0) { +#ifdef EXTENDED_AI_AGENT_FILE_IO + if (!__ai_agent) +#endif return -1; } @@ -346,6 +353,9 @@ static __inline int trace_io_event_common(void *ctx, } if (trace_id == 0 && tracer_ctx->io_event_collect_mode == 1) { +#ifdef EXTENDED_AI_AGENT_FILE_IO + if (!__ai_agent) +#endif return -1; } @@ -370,21 +380,13 @@ static __inline int trace_io_event_common(void *ctx, latency = TIME_ROLLBACK_DEFAULT_LATENCY_NS; } + if (latency < tracer_ctx->io_event_minimal_duration) { #ifdef EXTENDED_AI_AGENT_FILE_IO - if (is_ai_agent_process(pid_tgid)) { - goto skip_latency_filter; - } + if (!__ai_agent) #endif - - if (latency < tracer_ctx->io_event_minimal_duration) { return -1; } -#ifdef EXTENDED_AI_AGENT_FILE_IO -skip_latency_filter: - ; /* null statement - labels cannot be followed by declarations in C */ -#endif - struct __io_event_buffer *buffer = io_event_buffer__lookup(&k0); if (!buffer) { return -1; From 6ddee0a7262f3459af93d89f0da7e41bcd8f1d1a Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Wed, 18 Mar 2026 18:54:47 +0800 Subject: [PATCH 45/45] chore: support deepflow-ctl for ai-agent process show --- cli/ctl/genesis.go | 9 ++++++++- .../db_descriptions/clickhouse/tag/enum/biz_type.ch | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/cli/ctl/genesis.go b/cli/ctl/genesis.go index 77c2afdc4e5..4a92cfe595c 100644 --- a/cli/ctl/genesis.go +++ b/cli/ctl/genesis.go @@ -352,7 +352,7 @@ func tableVinterface(response *simplejson.Json, table *tablewriter.Table) { } func tableProcess(response *simplejson.Json, table *tablewriter.Table) { - table.SetHeader([]string{"PID", "AGENT_ID", "NETNS_ID", "NAME", "PROCESS_NAME", "USER", "START_TIME"}) + table.SetHeader([]string{"PID", "AGENT_ID", "NETNS_ID", "NAME", "PROCESS_NAME", "BIZ_TYPE", "USER", "START_TIME"}) tableItems := [][]string{} for i := range response.Get("DATA").MustArray() { @@ -363,6 +363,13 @@ func tableProcess(response *simplejson.Json, table *tablewriter.Table) { tableItem = append(tableItem, strconv.Itoa(data.Get("NETNS_ID").MustInt())) tableItem = append(tableItem, data.Get("NAME").MustString()) tableItem = append(tableItem, data.Get("PROCESS_NAME").MustString()) + + bizType := "DEFAULT" + if data.Get("BIZ_TYPE").MustInt() == 1 { + bizType = "AI_AGENT" + } + tableItem = append(tableItem, bizType) + tableItem = append(tableItem, data.Get("USER").MustString()) tableItem = append(tableItem, data.Get("START_TIME").MustString()) tableItems = append(tableItems, tableItem) diff --git a/server/querier/db_descriptions/clickhouse/tag/enum/biz_type.ch b/server/querier/db_descriptions/clickhouse/tag/enum/biz_type.ch index f3cadb9174c..98d872e9258 100644 --- a/server/querier/db_descriptions/clickhouse/tag/enum/biz_type.ch +++ b/server/querier/db_descriptions/clickhouse/tag/enum/biz_type.ch @@ -1,3 +1,3 @@ # Value , DisplayName , Description 0 , 默认 , 默认业务类型 -1 , 智能体 , AI Agent +1 , 智能体 , 智能体业务类型