From 47b8e6165029ee63291f044d4d4ed645e833b577 Mon Sep 17 00:00:00 2001 From: Bjorn Melin Date: Thu, 7 May 2026 03:01:54 -0600 Subject: [PATCH 1/5] feat(automation): suggest starter rules --- README.md | 3 +- .../automation-rules-and-bulk-actions.md | 24 +- docs/operations/verification-and-hardening.md | 1 + docs/roadmap/v1-search-triage-draft-queue.md | 1 + src/automation/mod.rs | 12 +- src/automation/model.rs | 50 ++ src/automation/output.rs | 72 ++- src/automation/service.rs | 56 +- src/automation/suggestions.rs | 601 ++++++++++++++++++ src/cli.rs | 18 + src/cli_output/errors.rs | 7 + src/cli_output/tests.rs | 89 +++ src/handlers/automation.rs | 20 + src/lib.rs | 6 + 14 files changed, 952 insertions(+), 8 deletions(-) create mode 100644 src/automation/suggestions.rs diff --git a/README.md b/README.md index 9eb66e1..8727973 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,7 @@ cargo run -- attachment fetch m-1:1.2 --json cargo run -- attachment export m-1:1.2 --json cargo run -- attachment export m-1:1.2 --to ./exports/statement.pdf --json cargo run -- automation rules validate --json +cargo run -- automation rules suggest --json cargo run -- automation rollout --limit 10 --json cargo run -- automation run --json cargo run -- automation run --rule archive-newsletters --limit 25 --json @@ -219,7 +220,7 @@ Advanced manual overrides still work: ## Near-term build plan -1. Use the verification and hardening runbook to canonicalize labels, deepen the local audit corpus, and stage the first real personal ruleset. +1. Use the verification and hardening runbook to canonicalize labels, deepen the local audit corpus, and generate disabled starter rules with `automation rules suggest`. 2. Expand automation ergonomics only after a few low-surprise micro-batch archive/label runs land cleanly. 3. Expand unsubscribe assistance only after the deeper sync proves out list-header coverage in the local cache. 4. Build a TUI over the existing command core, audit surfaces, and SQLite workflow model. diff --git a/docs/operations/automation-rules-and-bulk-actions.md b/docs/operations/automation-rules-and-bulk-actions.md index 1dd4218..80a38fb 100644 --- a/docs/operations/automation-rules-and-bulk-actions.md +++ b/docs/operations/automation-rules-and-bulk-actions.md @@ -7,6 +7,7 @@ This runbook covers Mailroom’s review-first automation surface. The current automation slice owns: - a local typed rules file at `.mailroom/automation.toml` +- read-only starter rule suggestions from recurring local mailbox evidence - rule validation and preview snapshots - read-only rollout checks for first-wave micro-batch readiness - persisted automation runs and append-only run events @@ -33,12 +34,15 @@ cargo run -- audit labels --json cp config/automation.example.toml .mailroom/automation.toml ``` -Automation commands require: +Automation commands other than `automation rules suggest` require: - an authenticated active Gmail account - a locally synced mailbox - an existing `.mailroom/automation.toml` file +`automation rules suggest` still requires an authenticated active account and +local sync evidence, but it does not require an existing rules file. + If you use label actions, the referenced label names must already exist in the local Gmail label cache. If you created labels recently, run `mailroom sync run` again first. @@ -51,6 +55,18 @@ Mailroom reads only one active rule file: Use `config/automation.example.toml` as the tracked template. +You can generate disabled starter rules from recurring older `INBOX` list or +bulk sender evidence in the local cache: + +```bash +cargo run -- automation rules suggest --json +cargo run -- automation rules suggest --limit 5 --min-thread-count 4 --older-than-days 21 --json +``` + +Suggestions are read-only. They do not write `.mailroom/automation.toml` and do +not mutate Gmail. Review the disabled TOML snippets, copy only low-surprise +rules into `.mailroom/automation.toml`, then enable one rule at a time. + Supported match fields: - `from_address` @@ -107,6 +123,12 @@ Validate the active file: cargo run -- automation rules validate --json ``` +Generate disabled starter rules before editing the active file: + +```bash +cargo run -- automation rules suggest --json +``` + Check first-wave rollout readiness without saving a run: ```bash diff --git a/docs/operations/verification-and-hardening.md b/docs/operations/verification-and-hardening.md index fbc2a65..d720690 100644 --- a/docs/operations/verification-and-hardening.md +++ b/docs/operations/verification-and-hardening.md @@ -159,6 +159,7 @@ Validate the rules file and preview a bounded run: ```bash cp config/automation.example.toml .mailroom/automation.toml +cargo run -- automation rules suggest --json $EDITOR .mailroom/automation.toml cargo run -- automation rules validate --json diff --git a/docs/roadmap/v1-search-triage-draft-queue.md b/docs/roadmap/v1-search-triage-draft-queue.md index c54147b..b46bde1 100644 --- a/docs/roadmap/v1-search-triage-draft-queue.md +++ b/docs/roadmap/v1-search-triage-draft-queue.md @@ -61,6 +61,7 @@ The attachment catalog/export foundation is now in place too: The review-first automation slice is now in place too: - typed TOML rules under `.mailroom/automation.toml` +- disabled starter rule suggestions from recurring local mailbox evidence - persisted automation run snapshots and append-only run events - thread-first archive, label, and trash bulk actions gated behind `--execute` - unsubscribe assistance through list headers in candidate inspection output diff --git a/src/automation/mod.rs b/src/automation/mod.rs index 9d04114..0312239 100644 --- a/src/automation/mod.rs +++ b/src/automation/mod.rs @@ -2,10 +2,16 @@ mod model; mod output; mod rules; mod service; +mod suggestions; pub use model::{ - AutomationPruneRequest, AutomationPruneStatus, AutomationRolloutRequest, AutomationRunRequest, - DEFAULT_AUTOMATION_ROLLOUT_LIMIT, DEFAULT_AUTOMATION_RUN_LIMIT, + AutomationPruneRequest, AutomationPruneStatus, AutomationRolloutRequest, + AutomationRulesSuggestRequest, AutomationRunRequest, DEFAULT_AUTOMATION_ROLLOUT_LIMIT, + DEFAULT_AUTOMATION_RUN_LIMIT, DEFAULT_AUTOMATION_SUGGESTION_LIMIT, + DEFAULT_AUTOMATION_SUGGESTION_MIN_THREAD_COUNT, DEFAULT_AUTOMATION_SUGGESTION_OLDER_THAN_DAYS, + DEFAULT_AUTOMATION_SUGGESTION_SAMPLE_LIMIT, }; pub(crate) use service::AutomationServiceError; -pub use service::{apply_run, prune_runs, rollout, run_preview, show_run, validate_rules}; +pub use service::{ + apply_run, prune_runs, rollout, run_preview, show_run, suggest_rules, validate_rules, +}; diff --git a/src/automation/model.rs b/src/automation/model.rs index ab81c34..085670f 100644 --- a/src/automation/model.rs +++ b/src/automation/model.rs @@ -6,6 +6,10 @@ use std::path::PathBuf; pub const DEFAULT_AUTOMATION_RUN_LIMIT: usize = 250; pub const DEFAULT_AUTOMATION_ROLLOUT_LIMIT: usize = 10; +pub const DEFAULT_AUTOMATION_SUGGESTION_LIMIT: usize = 10; +pub const DEFAULT_AUTOMATION_SUGGESTION_MIN_THREAD_COUNT: usize = 3; +pub const DEFAULT_AUTOMATION_SUGGESTION_OLDER_THAN_DAYS: u32 = 14; +pub const DEFAULT_AUTOMATION_SUGGESTION_SAMPLE_LIMIT: usize = 3; #[derive(Debug, Clone)] pub struct AutomationRunRequest { @@ -26,6 +30,14 @@ pub struct AutomationPruneRequest { pub execute: bool, } +#[derive(Debug, Clone)] +pub struct AutomationRulesSuggestRequest { + pub limit: usize, + pub min_thread_count: usize, + pub older_than_days: u32, + pub sample_limit: usize, +} + #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "snake_case")] pub enum AutomationPruneStatus { @@ -53,6 +65,44 @@ pub struct AutomationRulesValidateReport { pub rules: Vec, } +#[derive(Debug, Clone, Serialize)] +pub struct AutomationRulesSuggestReport { + pub account_id: String, + pub rules_path: PathBuf, + pub inspected_thread_count: usize, + pub eligible_thread_count: usize, + pub suggestion_count: usize, + pub min_thread_count: usize, + pub older_than_days: u32, + pub suggestions: Vec, + pub warnings: Vec, + pub next_steps: Vec, + pub command_plan: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct AutomationRuleSuggestion { + pub rule_id: String, + pub description: String, + pub confidence: String, + pub source: String, + pub matched_thread_count: usize, + pub match_fields: Vec, + pub sample_threads: Vec, + pub rule: AutomationRule, + pub toml: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct AutomationRuleSuggestionSample { + pub thread_id: String, + pub message_id: String, + pub subject: String, + pub from_address: Option, + pub list_id_header: Option, + pub internal_date_epoch_ms: i64, +} + #[derive(Debug, Clone, Serialize)] pub struct AutomationRuleSummary { pub id: String, diff --git a/src/automation/output.rs b/src/automation/output.rs index d31d549..02b1857 100644 --- a/src/automation/output.rs +++ b/src/automation/output.rs @@ -1,6 +1,7 @@ use super::model::{ AutomationApplyReport, AutomationPruneReport, AutomationRolloutReport, - AutomationRulesValidateReport, AutomationRunPreviewReport, AutomationShowReport, + AutomationRulesSuggestReport, AutomationRulesValidateReport, AutomationRunPreviewReport, + AutomationShowReport, }; use anyhow::Result; use std::io::{self, Write}; @@ -51,6 +52,75 @@ impl AutomationRulesValidateReport { } } +impl AutomationRulesSuggestReport { + pub fn print(&self, json: bool) -> Result<()> { + route_output_to_stdout(json, |json, stdout| self.write(json, stdout)) + } + + fn render_plain(&self) -> String { + let mut lines = vec![ + String::from("operation=rules_suggest"), + format!("account_id={}", sanitize(&self.account_id)), + format!( + "rules_path={}", + sanitize(&self.rules_path.display().to_string()) + ), + format!("inspected_thread_count={}", self.inspected_thread_count), + format!("eligible_thread_count={}", self.eligible_thread_count), + format!("suggestion_count={}", self.suggestion_count), + format!("min_thread_count={}", self.min_thread_count), + format!("older_than_days={}", self.older_than_days), + ]; + if !self.suggestions.is_empty() { + lines.push(String::from("suggestions_format=tsv")); + lines.push(String::from( + "rule_id\tconfidence\tmatched_thread_count\tsource\tmatch_fields\tsample_subjects", + )); + lines.extend(self.suggestions.iter().map(|suggestion| { + let sample_subjects = suggestion + .sample_threads + .iter() + .map(|sample| sample.subject.as_str()) + .collect::>() + .join(" | "); + format!( + "{}\t{}\t{}\t{}\t{}\t{}", + sanitize(&suggestion.rule_id), + sanitize(&suggestion.confidence), + suggestion.matched_thread_count, + sanitize(&suggestion.source), + sanitize(&suggestion.match_fields.join(", ")), + sanitize(&sample_subjects), + ) + })); + lines.push(String::from("toml_snippets_format=toml")); + for suggestion in &self.suggestions { + lines.push(format!("# rule_id={}", sanitize(&suggestion.rule_id))); + lines.push(suggestion.toml.trim_end().to_owned()); + } + } + for warning in &self.warnings { + lines.push(format!("warning={}", sanitize(warning))); + } + for next_step in &self.next_steps { + lines.push(format!("next_step={}", sanitize(next_step))); + } + for command in &self.command_plan { + lines.push(format!("command={}", sanitize(command))); + } + lines.join("\n") + "\n" + } + + fn write(&self, json: bool, writer: &mut W) -> Result<()> { + if json { + crate::cli_output::write_json_success(writer, self)?; + } else { + writer.write_all(self.render_plain().as_bytes())?; + } + Ok(()) + } +} + impl AutomationRunPreviewReport { pub fn print(&self, json: bool) -> Result<()> { route_output_to_stdout(json, |json, stdout| self.write(json, stdout)) diff --git a/src/automation/service.rs b/src/automation/service.rs index 547a1fd..86d9bbd 100644 --- a/src/automation/service.rs +++ b/src/automation/service.rs @@ -1,10 +1,12 @@ use super::model::{ AutomationApplyReport, AutomationPruneReport, AutomationPruneRequest, AutomationPruneStatus, AutomationRolloutCandidateSummary, AutomationRolloutReport, AutomationRolloutRequest, - AutomationRule, AutomationRuleAction, AutomationRulesValidateReport, - AutomationRunPreviewReport, AutomationRunRequest, AutomationShowReport, + AutomationRule, AutomationRuleAction, AutomationRulesSuggestReport, + AutomationRulesSuggestRequest, AutomationRulesValidateReport, AutomationRunPreviewReport, + AutomationRunRequest, AutomationShowReport, }; use super::rules::{ResolvedAutomationRules, resolve_rule_selection, validate_rule_file}; +use super::suggestions::suggest_rules_from_candidates; use crate::config::ConfigReport; use crate::gmail::GmailClient; use crate::store; @@ -42,6 +44,14 @@ pub enum AutomationServiceError { InvalidRolloutLimit, #[error("automation prune --older-than-days must be greater than zero")] InvalidPruneWindow, + #[error("automation rules suggest --limit must be greater than zero")] + InvalidSuggestionLimit, + #[error("automation rules suggest --min-thread-count must be greater than zero")] + InvalidSuggestionMinThreadCount, + #[error("automation rules suggest --older-than-days must be greater than zero")] + InvalidSuggestionOlderThanDays, + #[error("automation rules suggest --sample-limit must be greater than zero")] + InvalidSuggestionSampleLimit, #[error("re-run with --execute to apply automation changes")] ExecuteRequired, #[error( @@ -78,6 +88,11 @@ pub enum AutomationServiceError { }, #[error("{message}")] RuleValidation { message: String }, + #[error("failed to render suggested automation rule TOML: {source}")] + RuleTomlSerialize { + #[source] + source: toml::ser::Error, + }, #[error("failed to join automation task: {source}")] TaskPanic { #[source] @@ -115,6 +130,25 @@ pub async fn validate_rules(config_report: &ConfigReport) -> Result Result { + validate_suggest_request(&request)?; + ensure_runtime_dirs_task(configured_paths(config_report)?).await?; + init_store_task(config_report).await?; + let account_id = resolve_automation_account_id_task(config_report).await?; + let thread_candidates = list_latest_thread_candidates_task(config_report, &account_id).await?; + let now_epoch_ms = current_epoch_seconds()?.saturating_mul(1_000); + Ok(suggest_rules_from_candidates( + config_report, + account_id, + &thread_candidates, + &request, + now_epoch_ms, + )?) +} + pub async fn run_preview( config_report: &ConfigReport, request: AutomationRunRequest, @@ -643,6 +677,24 @@ fn normalize_prune_statuses(statuses: Vec) -> Vec Result<(), AutomationServiceError> { + if request.limit == 0 { + return Err(AutomationServiceError::InvalidSuggestionLimit); + } + if request.min_thread_count == 0 { + return Err(AutomationServiceError::InvalidSuggestionMinThreadCount); + } + if request.older_than_days == 0 { + return Err(AutomationServiceError::InvalidSuggestionOlderThanDays); + } + if request.sample_limit == 0 { + return Err(AutomationServiceError::InvalidSuggestionSampleLimit); + } + Ok(()) +} + fn prune_status_to_run_status(status: AutomationPruneStatus) -> AutomationRunStatus { match status { AutomationPruneStatus::Previewed => AutomationRunStatus::Previewed, diff --git a/src/automation/suggestions.rs b/src/automation/suggestions.rs new file mode 100644 index 0000000..7c5d48d --- /dev/null +++ b/src/automation/suggestions.rs @@ -0,0 +1,601 @@ +use super::AutomationServiceError; +use super::model::{ + AutomationMatchRule, AutomationRule, AutomationRuleAction, AutomationRuleSet, + AutomationRuleSuggestion, AutomationRuleSuggestionSample, AutomationRulesSuggestReport, + AutomationRulesSuggestRequest, +}; +use super::rules::active_rules_path; +use crate::config::ConfigReport; +use crate::store::automation::AutomationThreadCandidate; +use std::collections::{BTreeMap, BTreeSet}; + +const INBOX_LABEL: &str = "INBOX"; +const RULE_ID_SOURCE_LIMIT: usize = 48; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +enum SuggestionKind { + ListId, + Sender, +} + +impl SuggestionKind { + const fn confidence(self) -> &'static str { + match self { + Self::ListId => "high", + Self::Sender => "medium", + } + } + + const fn source(self) -> &'static str { + match self { + Self::ListId => "list_id", + Self::Sender => "sender", + } + } + + const fn priority(self) -> i64 { + match self { + Self::ListId => 200, + Self::Sender => 150, + } + } + + const fn rank(self) -> usize { + match self { + Self::ListId => 2, + Self::Sender => 1, + } + } + + const fn rule_prefix(self) -> &'static str { + match self { + Self::ListId => "archive-list", + Self::Sender => "archive-sender", + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +struct SuggestionKey { + kind: SuggestionKind, + value: String, +} + +#[derive(Debug, Clone)] +struct SuggestionGroup { + key: SuggestionKey, + matched_thread_count: usize, + all_have_list_unsubscribe: bool, + precedence_values: BTreeSet, + samples: Vec, +} + +pub(crate) fn suggest_rules_from_candidates( + config_report: &ConfigReport, + account_id: String, + candidates: &[AutomationThreadCandidate], + request: &AutomationRulesSuggestRequest, + now_epoch_ms: i64, +) -> Result { + let rules_path = active_rules_path(config_report); + let inspected_thread_count = candidates.len(); + let (eligible_thread_count, groups) = group_candidates(candidates, request, now_epoch_ms); + let mut suggestions = groups + .into_values() + .filter(|group| group.matched_thread_count >= request.min_thread_count) + .map(|group| build_suggestion(group, request)) + .collect::, _>>()?; + + suggestions.sort_by(|left, right| { + suggestion_rank(right) + .cmp(&suggestion_rank(left)) + .then_with(|| left.rule_id.cmp(&right.rule_id)) + }); + suggestions.truncate(request.limit); + + let mut warnings = Vec::new(); + if inspected_thread_count == 0 { + warnings.push(String::from( + "Local mailbox cache is empty; run `mailroom sync run --json` before using suggestions.", + )); + } else if suggestions.is_empty() { + warnings.push(format!( + "No recurring older INBOX list or bulk senders met the minimum thread threshold of {}.", + request.min_thread_count + )); + } + + let next_steps = vec![ + format!( + "Review the disabled TOML snippets and copy selected rules into {}.", + rules_path.display() + ), + String::from( + "Enable one low-surprise archive rule, validate it, run rollout, then create and inspect a small preview snapshot.", + ), + String::from("Keep trash and unsubscribe execution out of first-wave automation."), + ]; + + let command_plan = vec![ + String::from("cargo run -- automation rules validate --json"), + String::from("cargo run -- automation rollout --rule --limit 10 --json"), + String::from("cargo run -- automation run --rule --limit 10 --json"), + String::from("cargo run -- automation show --json"), + String::from("cargo run -- automation apply --execute --json"), + ]; + + Ok(AutomationRulesSuggestReport { + account_id, + rules_path, + inspected_thread_count, + eligible_thread_count, + suggestion_count: suggestions.len(), + min_thread_count: request.min_thread_count, + older_than_days: request.older_than_days, + suggestions, + warnings, + next_steps, + command_plan, + }) +} + +fn group_candidates( + candidates: &[AutomationThreadCandidate], + request: &AutomationRulesSuggestRequest, + now_epoch_ms: i64, +) -> (usize, BTreeMap) { + let cutoff_epoch_ms = + now_epoch_ms.saturating_sub(i64::from(request.older_than_days).saturating_mul(86_400_000)); + let mut eligible_thread_count = 0usize; + let mut groups = BTreeMap::new(); + + for candidate in candidates { + if !is_candidate_eligible(candidate, cutoff_epoch_ms) { + continue; + } + let Some(key) = suggestion_key(candidate) else { + continue; + }; + eligible_thread_count += 1; + let has_unsubscribe = has_list_unsubscribe(candidate); + let precedence = normalized_precedence(candidate.precedence_header.as_deref()); + let sample = AutomationRuleSuggestionSample { + thread_id: candidate.thread_id.clone(), + message_id: candidate.message_id.clone(), + subject: candidate.subject.clone(), + from_address: candidate.from_address.clone(), + list_id_header: candidate.list_id_header.clone(), + internal_date_epoch_ms: candidate.internal_date_epoch_ms, + }; + + let group = groups + .entry(key.clone()) + .or_insert_with(|| SuggestionGroup { + key, + matched_thread_count: 0, + all_have_list_unsubscribe: true, + precedence_values: BTreeSet::new(), + samples: Vec::new(), + }); + group.matched_thread_count += 1; + group.all_have_list_unsubscribe &= has_unsubscribe; + if let Some(precedence) = precedence { + group.precedence_values.insert(precedence); + } + if group.samples.len() < request.sample_limit { + group.samples.push(sample); + } + } + + (eligible_thread_count, groups) +} + +fn is_candidate_eligible(candidate: &AutomationThreadCandidate, cutoff_epoch_ms: i64) -> bool { + has_label(&candidate.label_names, INBOX_LABEL) + && candidate.internal_date_epoch_ms <= cutoff_epoch_ms + && (has_list_unsubscribe(candidate) + || normalized_precedence(candidate.precedence_header.as_deref()).is_some()) +} + +fn suggestion_key(candidate: &AutomationThreadCandidate) -> Option { + if has_list_unsubscribe(candidate) + && let Some(list_id) = normalized_list_id(candidate.list_id_header.as_deref()) + { + return Some(SuggestionKey { + kind: SuggestionKind::ListId, + value: list_id, + }); + } + + normalized_from_address(candidate.from_address.as_deref()).map(|from_address| SuggestionKey { + kind: SuggestionKind::Sender, + value: from_address, + }) +} + +fn build_suggestion( + group: SuggestionGroup, + request: &AutomationRulesSuggestRequest, +) -> Result { + let rule_id = format!( + "{}-{}", + group.key.kind.rule_prefix(), + slugify(&group.key.value) + ); + let description = format!( + "Archive older INBOX mail for {} after review.", + group.key.value + ); + let mut matcher = AutomationMatchRule { + label_any: vec![String::from(INBOX_LABEL)], + older_than_days: Some(request.older_than_days), + ..AutomationMatchRule::default() + }; + let mut match_fields = vec![ + String::from("label_any=INBOX"), + format!("older_than_days={}", request.older_than_days), + ]; + + match group.key.kind { + SuggestionKind::ListId => { + matcher.has_list_unsubscribe = Some(true); + matcher.list_id_contains = vec![group.key.value.clone()]; + match_fields.push(String::from("has_list_unsubscribe=true")); + match_fields.push(format!("list_id_contains={}", group.key.value)); + } + SuggestionKind::Sender => { + matcher.from_address = Some(group.key.value.clone()); + match_fields.push(format!("from_address={}", group.key.value)); + if group.all_have_list_unsubscribe { + matcher.has_list_unsubscribe = Some(true); + match_fields.push(String::from("has_list_unsubscribe=true")); + } + if !group.precedence_values.is_empty() { + matcher.precedence = group.precedence_values.iter().cloned().collect(); + match_fields.push(format!("precedence={}", matcher.precedence.join("|"))); + } + } + } + + let rule = AutomationRule { + id: rule_id.clone(), + description: Some(description.clone()), + enabled: false, + priority: group.key.kind.priority(), + matcher, + action: AutomationRuleAction::Archive, + }; + let toml = toml::to_string_pretty(&AutomationRuleSet { + rules: vec![rule.clone()], + }) + .map_err(|source| AutomationServiceError::RuleTomlSerialize { source })?; + + Ok(AutomationRuleSuggestion { + rule_id, + description, + confidence: group.key.kind.confidence().to_owned(), + source: group.key.kind.source().to_owned(), + matched_thread_count: group.matched_thread_count, + match_fields, + sample_threads: group.samples, + rule, + toml, + }) +} + +fn suggestion_rank(suggestion: &AutomationRuleSuggestion) -> (usize, usize, i64) { + let confidence_rank = match suggestion.confidence.as_str() { + "high" => SuggestionKind::ListId.rank(), + _ => SuggestionKind::Sender.rank(), + }; + let latest_epoch_ms = suggestion + .sample_threads + .iter() + .map(|sample| sample.internal_date_epoch_ms) + .max() + .unwrap_or_default(); + ( + confidence_rank, + suggestion.matched_thread_count, + latest_epoch_ms, + ) +} + +fn has_label(labels: &[String], expected: &str) -> bool { + labels + .iter() + .any(|label| label.eq_ignore_ascii_case(expected)) +} + +fn has_list_unsubscribe(candidate: &AutomationThreadCandidate) -> bool { + candidate + .list_unsubscribe_header + .as_deref() + .is_some_and(|value| !value.trim().is_empty()) +} + +fn normalized_list_id(header: Option<&str>) -> Option { + let value = header? + .trim() + .trim_start_matches('<') + .trim_end_matches('>') + .trim() + .to_ascii_lowercase(); + (!value.is_empty()).then_some(value) +} + +fn normalized_from_address(from_address: Option<&str>) -> Option { + let value = from_address?.trim().to_ascii_lowercase(); + (!value.is_empty()).then_some(value) +} + +fn normalized_precedence(header: Option<&str>) -> Option { + let value = header?.trim().to_ascii_lowercase(); + ["bulk", "list", "junk"] + .iter() + .find(|expected| value.contains(**expected)) + .map(|value| (*value).to_owned()) +} + +fn slugify(value: &str) -> String { + let mut slug = String::new(); + let mut previous_was_dash = false; + for character in value.chars() { + let next = if character.is_ascii_alphanumeric() { + Some(character.to_ascii_lowercase()) + } else if character == '.' || character == '_' || character == '-' || character == '@' { + Some('-') + } else { + None + }; + if let Some(next) = next { + if next == '-' { + if !slug.is_empty() && !previous_was_dash { + slug.push(next); + previous_was_dash = true; + } + } else { + slug.push(next); + previous_was_dash = false; + } + } + if slug.len() >= RULE_ID_SOURCE_LIMIT { + break; + } + } + let slug = slug.trim_matches('-').to_owned(); + if slug.is_empty() { + String::from("unknown") + } else { + slug + } +} + +#[cfg(test)] +mod tests { + use super::{slugify, suggest_rules_from_candidates}; + use crate::automation::model::AutomationRulesSuggestRequest; + use crate::config::resolve; + use crate::store::automation::AutomationThreadCandidate; + use crate::workspace::WorkspacePaths; + use tempfile::TempDir; + + #[test] + fn suggests_disabled_list_rule_from_recurring_inbox_list_mail() { + let (_repo_root, config_report) = config_report(); + let request = request(); + let candidates = vec![ + list_candidate("thread-1", "message-1", 100), + list_candidate("thread-2", "message-2", 200), + list_candidate("thread-3", "message-3", 300), + ]; + + let report = suggest_rules_from_candidates( + &config_report, + String::from("gmail:operator@example.com"), + &candidates, + &request, + 2_000_000_000, + ) + .unwrap(); + + assert_eq!(report.inspected_thread_count, 3); + assert_eq!(report.eligible_thread_count, 3); + assert_eq!(report.suggestion_count, 1); + let suggestion = &report.suggestions[0]; + assert_eq!(suggestion.rule_id, "archive-list-digest-example-com"); + assert_eq!(suggestion.confidence, "high"); + assert_eq!(suggestion.matched_thread_count, 3); + assert!(!suggestion.rule.enabled); + assert_eq!( + suggestion.rule.matcher.label_any, + vec![String::from("INBOX")] + ); + assert_eq!(suggestion.rule.matcher.older_than_days, Some(14)); + assert_eq!(suggestion.rule.matcher.has_list_unsubscribe, Some(true)); + assert_eq!( + suggestion.rule.matcher.list_id_contains, + vec![String::from("digest.example.com")] + ); + assert!(suggestion.toml.contains("[[rules]]")); + assert!(suggestion.toml.contains("enabled = false")); + assert!(suggestion.toml.contains("[rules.match]")); + toml::from_str::(&suggestion.toml).unwrap(); + } + + #[test] + fn suggests_sender_rule_for_recurring_bulk_sender_without_list_id() { + let (_repo_root, config_report) = config_report(); + let request = request(); + let candidates = vec![ + sender_candidate("thread-1", "message-1", 100), + sender_candidate("thread-2", "message-2", 200), + sender_candidate("thread-3", "message-3", 300), + ]; + + let report = suggest_rules_from_candidates( + &config_report, + String::from("gmail:operator@example.com"), + &candidates, + &request, + 2_000_000_000, + ) + .unwrap(); + + let suggestion = &report.suggestions[0]; + assert_eq!(suggestion.rule_id, "archive-sender-notices-example-com"); + assert_eq!(suggestion.confidence, "medium"); + assert_eq!( + suggestion.rule.matcher.from_address.as_deref(), + Some("notices@example.com") + ); + assert_eq!( + suggestion.rule.matcher.precedence, + vec![String::from("bulk")] + ); + assert_eq!(suggestion.rule.matcher.has_list_unsubscribe, None); + } + + #[test] + fn ignores_new_threads_non_inbox_threads_and_small_groups() { + let (_repo_root, config_report) = config_report(); + let mut request = request(); + request.min_thread_count = 2; + let mut non_inbox = list_candidate("thread-1", "message-1", 100); + non_inbox.label_names = vec![String::from("CATEGORY_PROMOTIONS")]; + let new_thread = list_candidate("thread-2", "message-2", 1_999_999_999); + let one_old_thread = list_candidate("thread-3", "message-3", 100); + + let report = suggest_rules_from_candidates( + &config_report, + String::from("gmail:operator@example.com"), + &[non_inbox, new_thread, one_old_thread], + &request, + 2_000_000_000, + ) + .unwrap(); + + assert_eq!(report.eligible_thread_count, 1); + assert!(report.suggestions.is_empty()); + assert_eq!(report.warnings.len(), 1); + } + + #[test] + fn limits_suggestions_and_samples_deterministically() { + let (_repo_root, config_report) = config_report(); + let mut request = request(); + request.limit = 1; + request.sample_limit = 1; + let candidates = vec![ + list_candidate_for("a.example.com", "thread-a1", "message-a1", 100), + list_candidate_for("a.example.com", "thread-a2", "message-a2", 200), + list_candidate_for("a.example.com", "thread-a3", "message-a3", 300), + list_candidate_for("b.example.com", "thread-b1", "message-b1", 400), + list_candidate_for("b.example.com", "thread-b2", "message-b2", 500), + list_candidate_for("b.example.com", "thread-b3", "message-b3", 600), + list_candidate_for("b.example.com", "thread-b4", "message-b4", 700), + ]; + + let report = suggest_rules_from_candidates( + &config_report, + String::from("gmail:operator@example.com"), + &candidates, + &request, + 2_000_000_000, + ) + .unwrap(); + + assert_eq!(report.suggestion_count, 1); + assert_eq!(report.suggestions[0].rule_id, "archive-list-b-example-com"); + assert_eq!(report.suggestions[0].sample_threads.len(), 1); + } + + #[test] + fn slugify_normalizes_rule_ids() { + assert_eq!(slugify(""), "digest-example-com"); + assert_eq!(slugify("!!!"), "unknown"); + } + + fn request() -> AutomationRulesSuggestRequest { + AutomationRulesSuggestRequest { + limit: 10, + min_thread_count: 3, + older_than_days: 14, + sample_limit: 3, + } + } + + fn config_report() -> (TempDir, crate::config::ConfigReport) { + let repo_root = TempDir::with_prefix("mailroom-automation-suggest").unwrap(); + std::fs::create_dir(repo_root.path().join(".git")).unwrap(); + let paths = WorkspacePaths::from_repo_root(repo_root.path().to_path_buf()); + paths.ensure_runtime_dirs().unwrap(); + let config_report = resolve(&paths).unwrap(); + (repo_root, config_report) + } + + fn list_candidate( + thread_id: &str, + message_id: &str, + internal_date_epoch_ms: i64, + ) -> AutomationThreadCandidate { + list_candidate_for( + "digest.example.com", + thread_id, + message_id, + internal_date_epoch_ms, + ) + } + + fn list_candidate_for( + list_id: &str, + thread_id: &str, + message_id: &str, + internal_date_epoch_ms: i64, + ) -> AutomationThreadCandidate { + candidate(thread_id, message_id, internal_date_epoch_ms, |candidate| { + candidate.list_id_header = Some(format!("<{list_id}>")); + candidate.list_unsubscribe_header = + Some(String::from("")); + }) + } + + fn sender_candidate( + thread_id: &str, + message_id: &str, + internal_date_epoch_ms: i64, + ) -> AutomationThreadCandidate { + candidate(thread_id, message_id, internal_date_epoch_ms, |candidate| { + candidate.from_header = String::from("Notices "); + candidate.from_address = Some(String::from("notices@example.com")); + candidate.precedence_header = Some(String::from("bulk")); + }) + } + + fn candidate( + thread_id: &str, + message_id: &str, + internal_date_epoch_ms: i64, + update: impl FnOnce(&mut AutomationThreadCandidate), + ) -> AutomationThreadCandidate { + let mut candidate = AutomationThreadCandidate { + account_id: String::from("gmail:operator@example.com"), + thread_id: thread_id.to_owned(), + message_id: message_id.to_owned(), + internal_date_epoch_ms, + subject: format!("Subject for {thread_id}"), + from_header: String::from("Digest "), + from_address: Some(String::from("digest@example.com")), + snippet: String::from("Snippet"), + label_names: vec![String::from("INBOX")], + attachment_count: 0, + list_id_header: None, + list_unsubscribe_header: None, + list_unsubscribe_post_header: None, + precedence_header: None, + auto_submitted_header: None, + }; + update(&mut candidate); + candidate + } +} diff --git a/src/cli.rs b/src/cli.rs index d2db887..3c3172e 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -353,6 +353,24 @@ pub enum AutomationRulesCommand { #[arg(long)] json: bool, }, + /// Suggest disabled starter rules from recurring local mailbox evidence + Suggest { + /// Maximum number of starter rules to suggest + #[arg(long, default_value_t = crate::automation::DEFAULT_AUTOMATION_SUGGESTION_LIMIT)] + limit: usize, + /// Minimum recurring thread count required before suggesting a rule + #[arg(long, default_value_t = crate::automation::DEFAULT_AUTOMATION_SUGGESTION_MIN_THREAD_COUNT)] + min_thread_count: usize, + /// Only suggest rules for threads at least this old + #[arg(long, default_value_t = crate::automation::DEFAULT_AUTOMATION_SUGGESTION_OLDER_THAN_DAYS)] + older_than_days: u32, + /// Maximum sample threads to include per suggestion + #[arg(long, default_value_t = crate::automation::DEFAULT_AUTOMATION_SUGGESTION_SAMPLE_LIMIT)] + sample_limit: usize, + /// Emit JSON instead of plain text + #[arg(long)] + json: bool, + }, } #[derive(Debug, Subcommand)] diff --git a/src/cli_output/errors.rs b/src/cli_output/errors.rs index 0bc603d..f0d0ac7 100644 --- a/src/cli_output/errors.rs +++ b/src/cli_output/errors.rs @@ -118,6 +118,10 @@ fn classify_error(error: &AnyhowError) -> (ErrorCode, &'static str) { AutomationServiceError::InvalidLimit | AutomationServiceError::InvalidRolloutLimit | AutomationServiceError::InvalidPruneWindow + | AutomationServiceError::InvalidSuggestionLimit + | AutomationServiceError::InvalidSuggestionMinThreadCount + | AutomationServiceError::InvalidSuggestionOlderThanDays + | AutomationServiceError::InvalidSuggestionSampleLimit | AutomationServiceError::ExecuteRequired | AutomationServiceError::RuleFileMissing { .. } | AutomationServiceError::RuleFileRead { .. } @@ -134,6 +138,9 @@ fn classify_error(error: &AnyhowError) -> (ErrorCode, &'static str) { AutomationServiceError::TaskPanic { .. } => { (ErrorCode::InternalFailure, "automation.task_panic") } + AutomationServiceError::RuleTomlSerialize { .. } => { + (ErrorCode::InternalFailure, "automation.rule_toml") + } AutomationServiceError::StoreInit { .. } | AutomationServiceError::MailboxRead { .. } | AutomationServiceError::AutomationRead { .. } diff --git a/src/cli_output/tests.rs b/src/cli_output/tests.rs index 812531a..4240076 100644 --- a/src/cli_output/tests.rs +++ b/src/cli_output/tests.rs @@ -179,6 +179,95 @@ fn automation_prune_validation_maps_to_validation_failed_code() { assert_eq!(exit_code(&report), std::process::ExitCode::from(2)); } +#[test] +fn automation_rules_suggest_validation_maps_to_validation_failed_code() { + let error = anyhow!(AutomationServiceError::InvalidSuggestionLimit); + + let report = describe_error(&error, "automation.rules.suggest"); + let value = to_value(json_failure_value(&report)).unwrap(); + + assert_eq!(value["error"]["code"], json!("validation_failed")); + assert_eq!(value["error"]["kind"], json!("automation.validation")); + assert_eq!( + value["error"]["operation"], + json!("automation.rules.suggest") + ); + assert_eq!(exit_code(&report), std::process::ExitCode::from(2)); +} + +#[test] +fn automation_rules_suggest_zero_limit_fails_in_json_and_human_modes() { + use std::process::Command; + use tempfile::TempDir; + + let cargo = std::env::var("CARGO").unwrap_or_else(|_| String::from("cargo")); + let manifest_path = format!("{}/Cargo.toml", env!("CARGO_MANIFEST_DIR")); + let repo_root = TempDir::new().unwrap(); + std::fs::create_dir(repo_root.path().join(".git")).unwrap(); + let config_dir = repo_root.path().join("config"); + std::fs::create_dir_all(&config_dir).unwrap(); + + let json_output = Command::new(&cargo) + .args([ + "run", + "--quiet", + "--manifest-path", + &manifest_path, + "--", + "automation", + "rules", + "suggest", + "--limit", + "0", + "--json", + ]) + .env("XDG_CONFIG_HOME", &config_dir) + .env_remove("HOME") + .current_dir(repo_root.path()) + .output() + .unwrap(); + assert_eq!(json_output.status.code(), Some(2)); + assert!(json_output.stderr.is_empty()); + + let json_stdout = String::from_utf8(json_output.stdout).unwrap(); + let json_value: serde_json::Value = serde_json::from_str(&json_stdout).unwrap(); + assert_eq!(json_value["success"], json!(false)); + assert_eq!(json_value["error"]["code"], json!("validation_failed")); + assert_eq!( + json_value["error"]["operation"], + json!("automation.rules.suggest") + ); + assert!( + json_value["error"]["message"] + .as_str() + .unwrap() + .contains("--limit") + ); + + let human_output = Command::new(&cargo) + .args([ + "run", + "--quiet", + "--manifest-path", + &manifest_path, + "--", + "automation", + "rules", + "suggest", + "--limit", + "0", + ]) + .env("XDG_CONFIG_HOME", &config_dir) + .env_remove("HOME") + .current_dir(repo_root.path()) + .output() + .unwrap(); + assert_eq!(human_output.status.code(), Some(2)); + assert!(human_output.stdout.is_empty()); + let human_stderr = String::from_utf8(human_output.stderr).unwrap(); + assert!(human_stderr.contains("automation rules suggest --limit")); +} + #[test] fn attachment_file_errors_map_to_validation_failed_code() { let error = anyhow!(WorkflowServiceError::AttachmentRead { diff --git a/src/handlers/automation.rs b/src/handlers/automation.rs index 7e0fb2f..fb2a3ee 100644 --- a/src/handlers/automation.rs +++ b/src/handlers/automation.rs @@ -15,6 +15,26 @@ pub(crate) async fn handle_automation_command( } => automation::validate_rules(&config_report) .await? .print(json)?, + AutomationCommand::Rules { + command: + AutomationRulesCommand::Suggest { + limit, + min_thread_count, + older_than_days, + sample_limit, + json, + }, + } => automation::suggest_rules( + &config_report, + automation::AutomationRulesSuggestRequest { + limit, + min_thread_count, + older_than_days, + sample_limit, + }, + ) + .await? + .print(json)?, AutomationCommand::Run { rule_ids, limit, diff --git a/src/lib.rs b/src/lib.rs index 834c093..d8ab805 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -219,6 +219,12 @@ fn command_metadata(command: &Commands) -> CommandMetadata { json: *json, operation: "automation.rules.validate", }, + AutomationCommand::Rules { + command: AutomationRulesCommand::Suggest { json, .. }, + } => CommandMetadata { + json: *json, + operation: "automation.rules.suggest", + }, AutomationCommand::Run { json, .. } => CommandMetadata { json: *json, operation: "automation.run", From e8db301bf6cceec9a4e9f72e97908de3c4a44fd2 Mon Sep 17 00:00:00 2001 From: Bjorn Melin Date: Thu, 7 May 2026 03:14:07 -0600 Subject: [PATCH 2/5] fix(automation): align rule suggestion evidence --- src/automation/suggestions.rs | 194 ++++++++++++++++++++++++++++++---- 1 file changed, 172 insertions(+), 22 deletions(-) diff --git a/src/automation/suggestions.rs b/src/automation/suggestions.rs index 7c5d48d..babb862 100644 --- a/src/automation/suggestions.rs +++ b/src/automation/suggestions.rs @@ -59,14 +59,20 @@ impl SuggestionKind { struct SuggestionKey { kind: SuggestionKind, value: String, + evidence: SuggestionEvidence, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +enum SuggestionEvidence { + ListId, + ListUnsubscribe, + Precedence(String), } #[derive(Debug, Clone)] struct SuggestionGroup { key: SuggestionKey, matched_thread_count: usize, - all_have_list_unsubscribe: bool, - precedence_values: BTreeSet, samples: Vec, } @@ -92,6 +98,7 @@ pub(crate) fn suggest_rules_from_candidates( .then_with(|| left.rule_id.cmp(&right.rule_id)) }); suggestions.truncate(request.limit); + deduplicate_rule_ids(&mut suggestions)?; let mut warnings = Vec::new(); if inspected_thread_count == 0 { @@ -157,8 +164,6 @@ fn group_candidates( continue; }; eligible_thread_count += 1; - let has_unsubscribe = has_list_unsubscribe(candidate); - let precedence = normalized_precedence(candidate.precedence_header.as_deref()); let sample = AutomationRuleSuggestionSample { thread_id: candidate.thread_id.clone(), message_id: candidate.message_id.clone(), @@ -173,15 +178,9 @@ fn group_candidates( .or_insert_with(|| SuggestionGroup { key, matched_thread_count: 0, - all_have_list_unsubscribe: true, - precedence_values: BTreeSet::new(), samples: Vec::new(), }); group.matched_thread_count += 1; - group.all_have_list_unsubscribe &= has_unsubscribe; - if let Some(precedence) = precedence { - group.precedence_values.insert(precedence); - } if group.samples.len() < request.sample_limit { group.samples.push(sample); } @@ -204,12 +203,22 @@ fn suggestion_key(candidate: &AutomationThreadCandidate) -> Option { matcher.from_address = Some(group.key.value.clone()); match_fields.push(format!("from_address={}", group.key.value)); - if group.all_have_list_unsubscribe { - matcher.has_list_unsubscribe = Some(true); - match_fields.push(String::from("has_list_unsubscribe=true")); - } - if !group.precedence_values.is_empty() { - matcher.precedence = group.precedence_values.iter().cloned().collect(); - match_fields.push(format!("precedence={}", matcher.precedence.join("|"))); + match &group.key.evidence { + SuggestionEvidence::ListId => {} + SuggestionEvidence::ListUnsubscribe => { + matcher.has_list_unsubscribe = Some(true); + match_fields.push(String::from("has_list_unsubscribe=true")); + } + SuggestionEvidence::Precedence(precedence) => { + matcher.precedence = vec![precedence.clone()]; + match_fields.push(format!("precedence={precedence}")); + } } } } @@ -265,10 +277,7 @@ fn build_suggestion( matcher, action: AutomationRuleAction::Archive, }; - let toml = toml::to_string_pretty(&AutomationRuleSet { - rules: vec![rule.clone()], - }) - .map_err(|source| AutomationServiceError::RuleTomlSerialize { source })?; + let toml = render_rule_toml(&rule)?; Ok(AutomationRuleSuggestion { rule_id, @@ -283,6 +292,34 @@ fn build_suggestion( }) } +fn deduplicate_rule_ids( + suggestions: &mut [AutomationRuleSuggestion], +) -> Result<(), AutomationServiceError> { + let mut used_rule_ids = BTreeSet::new(); + for suggestion in suggestions { + let base_rule_id = suggestion.rule_id.clone(); + let mut candidate_rule_id = base_rule_id.clone(); + let mut suffix = 2usize; + while !used_rule_ids.insert(candidate_rule_id.clone()) { + candidate_rule_id = format!("{base_rule_id}-{suffix}"); + suffix += 1; + } + if candidate_rule_id != suggestion.rule_id { + suggestion.rule_id = candidate_rule_id.clone(); + suggestion.rule.id = candidate_rule_id; + suggestion.toml = render_rule_toml(&suggestion.rule)?; + } + } + Ok(()) +} + +fn render_rule_toml(rule: &AutomationRule) -> Result { + toml::to_string_pretty(&AutomationRuleSet { + rules: vec![rule.clone()], + }) + .map_err(|source| AutomationServiceError::RuleTomlSerialize { source }) +} + fn suggestion_rank(suggestion: &AutomationRuleSuggestion) -> (usize, usize, i64) { let confidence_rank = match suggestion.confidence.as_str() { "high" => SuggestionKind::ListId.rank(), @@ -456,6 +493,93 @@ mod tests { assert_eq!(suggestion.rule.matcher.has_list_unsubscribe, None); } + #[test] + fn sender_suggestions_keep_counted_evidence_aligned_with_matcher() { + let (_repo_root, config_report) = config_report(); + let mut request = request(); + request.min_thread_count = 2; + let candidates = vec![ + sender_unsubscribe_candidate("thread-u1", "message-u1", 100), + sender_unsubscribe_candidate("thread-u2", "message-u2", 200), + sender_candidate("thread-p1", "message-p1", 300), + sender_candidate("thread-p2", "message-p2", 400), + ]; + + let report = suggest_rules_from_candidates( + &config_report, + String::from("gmail:operator@example.com"), + &candidates, + &request, + 2_000_000_000, + ) + .unwrap(); + + assert_eq!(report.suggestion_count, 2); + let unsubscribe_suggestion = report + .suggestions + .iter() + .find(|suggestion| { + suggestion.rule.matcher.has_list_unsubscribe == Some(true) + && suggestion.rule.matcher.precedence.is_empty() + }) + .expect("unsubscribe-backed sender suggestion"); + assert_eq!(unsubscribe_suggestion.matched_thread_count, 2); + + let precedence_suggestion = report + .suggestions + .iter() + .find(|suggestion| { + suggestion.rule.matcher.has_list_unsubscribe.is_none() + && suggestion.rule.matcher.precedence == vec![String::from("bulk")] + }) + .expect("precedence-backed sender suggestion"); + assert_eq!(precedence_suggestion.matched_thread_count, 2); + } + + #[test] + fn deduplicates_rule_ids_after_slug_collisions() { + let (_repo_root, config_report) = config_report(); + let mut request = request(); + request.min_thread_count = 1; + let candidates = vec![ + sender_collision_candidate("foo_bar@example.com", "thread-1", "message-1", 300), + sender_collision_candidate("foo-bar@example.com", "thread-2", "message-2", 100), + ]; + + let report = suggest_rules_from_candidates( + &config_report, + String::from("gmail:operator@example.com"), + &candidates, + &request, + 2_000_000_000, + ) + .unwrap(); + + assert_eq!( + report + .suggestions + .iter() + .map(|suggestion| suggestion.rule_id.as_str()) + .collect::>(), + vec![ + "archive-sender-foo-bar-example-com", + "archive-sender-foo-bar-example-com-2", + ] + ); + let suffix_suggestion = &report.suggestions[1]; + assert_eq!( + suffix_suggestion.rule.id, + "archive-sender-foo-bar-example-com-2" + ); + assert!( + suffix_suggestion + .toml + .contains("id = \"archive-sender-foo-bar-example-com-2\"") + ); + toml::from_str::(&suffix_suggestion.toml) + .unwrap(); + } + #[test] fn ignores_new_threads_non_inbox_threads_and_small_groups() { let (_repo_root, config_report) = config_report(); @@ -572,6 +696,32 @@ mod tests { }) } + fn sender_unsubscribe_candidate( + thread_id: &str, + message_id: &str, + internal_date_epoch_ms: i64, + ) -> AutomationThreadCandidate { + candidate(thread_id, message_id, internal_date_epoch_ms, |candidate| { + candidate.from_header = String::from("Notices "); + candidate.from_address = Some(String::from("notices@example.com")); + candidate.list_unsubscribe_header = + Some(String::from("")); + }) + } + + fn sender_collision_candidate( + from_address: &str, + thread_id: &str, + message_id: &str, + internal_date_epoch_ms: i64, + ) -> AutomationThreadCandidate { + candidate(thread_id, message_id, internal_date_epoch_ms, |candidate| { + candidate.from_header = format!("Sender <{from_address}>"); + candidate.from_address = Some(from_address.to_owned()); + candidate.precedence_header = Some(String::from("bulk")); + }) + } + fn candidate( thread_id: &str, message_id: &str, From bf7ab2c7b868e83f78234920b1f255517f08e89d Mon Sep 17 00:00:00 2001 From: Bjorn Melin Date: Thu, 7 May 2026 03:25:39 -0600 Subject: [PATCH 3/5] fix(automation): address rule suggestion review --- docs/operations/verification-and-hardening.md | 1 + src/automation/suggestions.rs | 92 +++++++++++++++++-- src/cli_output/tests.rs | 28 +++++- 3 files changed, 111 insertions(+), 10 deletions(-) diff --git a/docs/operations/verification-and-hardening.md b/docs/operations/verification-and-hardening.md index d720690..ace5c39 100644 --- a/docs/operations/verification-and-hardening.md +++ b/docs/operations/verification-and-hardening.md @@ -235,6 +235,7 @@ cargo run -- doctor --json cargo run -- audit labels --json cargo run -- audit verification --json cargo run -- sync run --profile deep-audit --json +cargo run -- automation rules suggest --json cargo run -- automation rules validate --json cargo run -- automation rollout --limit 10 --json cargo run -- automation run --limit 10 --json diff --git a/src/automation/suggestions.rs b/src/automation/suggestions.rs index babb862..6fe0b13 100644 --- a/src/automation/suggestions.rs +++ b/src/automation/suggestions.rs @@ -181,14 +181,44 @@ fn group_candidates( samples: Vec::new(), }); group.matched_thread_count += 1; - if group.samples.len() < request.sample_limit { - group.samples.push(sample); - } + retain_recent_sample(&mut group.samples, sample, request.sample_limit); } (eligible_thread_count, groups) } +fn retain_recent_sample( + samples: &mut Vec, + sample: AutomationRuleSuggestionSample, + sample_limit: usize, +) { + if samples.len() < sample_limit { + samples.push(sample); + return; + } + + let Some(oldest_index) = samples + .iter() + .enumerate() + .min_by(|(_, left), (_, right)| sample_rank_key(left).cmp(&sample_rank_key(right))) + .map(|(index, _)| index) + else { + return; + }; + + if sample_rank_key(&sample) > sample_rank_key(&samples[oldest_index]) { + samples[oldest_index] = sample; + } +} + +fn sample_rank_key(sample: &AutomationRuleSuggestionSample) -> (i64, &str, &str) { + ( + sample.internal_date_epoch_ms, + sample.thread_id.as_str(), + sample.message_id.as_str(), + ) +} + fn is_candidate_eligible(candidate: &AutomationThreadCandidate, cutoff_epoch_ms: i64) -> bool { has_label(&candidate.label_names, INBOX_LABEL) && candidate.internal_date_epoch_ms <= cutoff_epoch_ms @@ -368,10 +398,14 @@ fn normalized_from_address(from_address: Option<&str>) -> Option { fn normalized_precedence(header: Option<&str>) -> Option { let value = header?.trim().to_ascii_lowercase(); + let tokens: Vec<&str> = value + .split(|character: char| !character.is_ascii_alphanumeric()) + .filter(|token| !token.is_empty()) + .collect(); ["bulk", "list", "junk"] .iter() - .find(|expected| value.contains(**expected)) - .map(|value| (*value).to_owned()) + .find(|expected| tokens.contains(expected)) + .map(|matched| (*matched).to_owned()) } fn slugify(value: &str) -> String { @@ -410,7 +444,7 @@ fn slugify(value: &str) -> String { #[cfg(test)] mod tests { - use super::{slugify, suggest_rules_from_candidates}; + use super::{normalized_precedence, slugify, suggest_rules_from_candidates}; use crate::automation::model::AutomationRulesSuggestRequest; use crate::config::resolve; use crate::store::automation::AutomationThreadCandidate; @@ -634,6 +668,52 @@ mod tests { assert_eq!(report.suggestions[0].sample_threads.len(), 1); } + #[test] + fn sample_limit_keeps_latest_samples_independent_of_input_order() { + let (_repo_root, config_report) = config_report(); + let mut request = request(); + request.limit = 1; + request.min_thread_count = 2; + request.sample_limit = 1; + let candidates = vec![ + list_candidate_for("a.example.com", "thread-a-old", "message-a-old", 100), + list_candidate_for("b.example.com", "thread-b-old", "message-b-old", 400), + list_candidate_for("b.example.com", "thread-b-new", "message-b-new", 500), + list_candidate_for("a.example.com", "thread-a-new", "message-a-new", 900), + ]; + + let report = suggest_rules_from_candidates( + &config_report, + String::from("gmail:operator@example.com"), + &candidates, + &request, + 2_000_000_000, + ) + .unwrap(); + + assert_eq!(report.suggestion_count, 1); + assert_eq!(report.suggestions[0].rule_id, "archive-list-a-example-com"); + assert_eq!(report.suggestions[0].sample_threads.len(), 1); + assert_eq!( + report.suggestions[0].sample_threads[0].thread_id, + "thread-a-new" + ); + } + + #[test] + fn normalized_precedence_requires_exact_tokens() { + assert_eq!( + normalized_precedence(Some("bulk")), + Some(String::from("bulk")) + ); + assert_eq!( + normalized_precedence(Some("x-priority; list")), + Some(String::from("list")) + ); + assert_eq!(normalized_precedence(Some("notbulk")), None); + assert_eq!(normalized_precedence(Some("xlistx")), None); + } + #[test] fn slugify_normalizes_rule_ids() { assert_eq!(slugify(""), "digest-example-com"); diff --git a/src/cli_output/tests.rs b/src/cli_output/tests.rs index 4240076..a47b645 100644 --- a/src/cli_output/tests.rs +++ b/src/cli_output/tests.rs @@ -197,6 +197,25 @@ fn automation_rules_suggest_validation_maps_to_validation_failed_code() { #[test] fn automation_rules_suggest_zero_limit_fails_in_json_and_human_modes() { + assert_automation_rules_suggest_flag_rejected("--limit"); +} + +#[test] +fn automation_rules_suggest_zero_min_thread_count_fails_in_json_and_human_modes() { + assert_automation_rules_suggest_flag_rejected("--min-thread-count"); +} + +#[test] +fn automation_rules_suggest_zero_older_than_days_fails_in_json_and_human_modes() { + assert_automation_rules_suggest_flag_rejected("--older-than-days"); +} + +#[test] +fn automation_rules_suggest_zero_sample_limit_fails_in_json_and_human_modes() { + assert_automation_rules_suggest_flag_rejected("--sample-limit"); +} + +fn assert_automation_rules_suggest_flag_rejected(flag: &str) { use std::process::Command; use tempfile::TempDir; @@ -217,7 +236,7 @@ fn automation_rules_suggest_zero_limit_fails_in_json_and_human_modes() { "automation", "rules", "suggest", - "--limit", + flag, "0", "--json", ]) @@ -233,6 +252,7 @@ fn automation_rules_suggest_zero_limit_fails_in_json_and_human_modes() { let json_value: serde_json::Value = serde_json::from_str(&json_stdout).unwrap(); assert_eq!(json_value["success"], json!(false)); assert_eq!(json_value["error"]["code"], json!("validation_failed")); + assert_eq!(json_value["error"]["kind"], json!("automation.validation")); assert_eq!( json_value["error"]["operation"], json!("automation.rules.suggest") @@ -241,7 +261,7 @@ fn automation_rules_suggest_zero_limit_fails_in_json_and_human_modes() { json_value["error"]["message"] .as_str() .unwrap() - .contains("--limit") + .contains(flag) ); let human_output = Command::new(&cargo) @@ -254,7 +274,7 @@ fn automation_rules_suggest_zero_limit_fails_in_json_and_human_modes() { "automation", "rules", "suggest", - "--limit", + flag, "0", ]) .env("XDG_CONFIG_HOME", &config_dir) @@ -265,7 +285,7 @@ fn automation_rules_suggest_zero_limit_fails_in_json_and_human_modes() { assert_eq!(human_output.status.code(), Some(2)); assert!(human_output.stdout.is_empty()); let human_stderr = String::from_utf8(human_output.stderr).unwrap(); - assert!(human_stderr.contains("automation rules suggest --limit")); + assert!(human_stderr.contains(&format!("automation rules suggest {flag}"))); } #[test] From 8fd8f650ed275a7eb5dab63f5a0934fa098c0bde Mon Sep 17 00:00:00 2001 From: Bjorn Melin Date: Thu, 7 May 2026 03:33:40 -0600 Subject: [PATCH 4/5] fix(automation): align precedence matching --- src/automation/headers.rs | 65 +++++++++++++++++++++++++++++++++++ src/automation/mod.rs | 1 + src/automation/service.rs | 51 ++++++++++++++++++++++++++- src/automation/suggestions.rs | 29 ++-------------- 4 files changed, 118 insertions(+), 28 deletions(-) create mode 100644 src/automation/headers.rs diff --git a/src/automation/headers.rs b/src/automation/headers.rs new file mode 100644 index 0000000..01dba9e --- /dev/null +++ b/src/automation/headers.rs @@ -0,0 +1,65 @@ +const PRECEDENCE_VALUES: [&str; 3] = ["bulk", "list", "junk"]; + +pub(crate) fn normalized_precedence(header: Option<&str>) -> Option { + let value = header?.trim().to_ascii_lowercase(); + PRECEDENCE_VALUES + .iter() + .find(|expected| has_ascii_token(&value, expected)) + .map(|matched| (*matched).to_owned()) +} + +pub(crate) fn match_precedence_values( + candidate: Option<&str>, + required_values: &[String], +) -> Option> { + if required_values.is_empty() { + return Some(Vec::new()); + } + let candidate = candidate?.trim().to_ascii_lowercase(); + let matches = required_values + .iter() + .filter(|required| { + let required = required.trim().to_ascii_lowercase(); + !required.is_empty() && has_ascii_token(&candidate, &required) + }) + .cloned() + .collect::>(); + (!matches.is_empty()).then_some(matches) +} + +fn has_ascii_token(value: &str, expected: &str) -> bool { + value + .split(|character: char| !character.is_ascii_alphanumeric()) + .any(|token| token == expected) +} + +#[cfg(test)] +mod tests { + use super::{match_precedence_values, normalized_precedence}; + + #[test] + fn normalized_precedence_requires_exact_tokens() { + assert_eq!( + normalized_precedence(Some("bulk")), + Some(String::from("bulk")) + ); + assert_eq!( + normalized_precedence(Some("x-priority; list")), + Some(String::from("list")) + ); + assert_eq!(normalized_precedence(Some("notbulk")), None); + assert_eq!(normalized_precedence(Some("xlistx")), None); + } + + #[test] + fn match_precedence_values_requires_exact_tokens() { + assert_eq!( + match_precedence_values(Some("x-priority; bulk"), &[String::from("bulk")]), + Some(vec![String::from("bulk")]) + ); + assert_eq!( + match_precedence_values(Some("notbulk"), &[String::from("bulk")]), + None + ); + } +} diff --git a/src/automation/mod.rs b/src/automation/mod.rs index 0312239..da628b4 100644 --- a/src/automation/mod.rs +++ b/src/automation/mod.rs @@ -1,3 +1,4 @@ +mod headers; mod model; mod output; mod rules; diff --git a/src/automation/service.rs b/src/automation/service.rs index 86d9bbd..d310291 100644 --- a/src/automation/service.rs +++ b/src/automation/service.rs @@ -1,3 +1,4 @@ +use super::headers::match_precedence_values; use super::model::{ AutomationApplyReport, AutomationPruneReport, AutomationPruneRequest, AutomationPruneStatus, AutomationRolloutCandidateSummary, AutomationRolloutReport, AutomationRolloutRequest, @@ -906,7 +907,7 @@ fn match_rule( candidate.list_id_header.as_deref(), &rule.matcher.list_id_contains, )?; - let precedence_matches = match_optional_contains( + let precedence_matches = match_precedence_values( candidate.precedence_header.as_deref(), &rule.matcher.precedence, )?; @@ -1433,6 +1434,31 @@ mod tests { assert_eq!(selected[0].thread_id, "thread-old"); } + #[test] + fn build_run_candidates_matches_precedence_by_exact_token() { + let planned_rules = vec![planned_precedence_rule("bulk-precedence")]; + let mut malformed_precedence = + thread_candidate("thread-notbulk", "message-notbulk", 100, "Digest"); + malformed_precedence.precedence_header = Some(String::from("notbulk")); + let mut bulk_precedence = thread_candidate("thread-bulk", "message-bulk", 200, "Digest"); + bulk_precedence.precedence_header = Some(String::from("x-priority; bulk")); + + let selected = build_run_candidates( + &[malformed_precedence, bulk_precedence], + &planned_rules, + 0, + 10, + ); + + assert_eq!(selected.len(), 1); + assert_eq!(selected[0].rule_id, "bulk-precedence"); + assert_eq!(selected[0].thread_id, "thread-bulk"); + assert_eq!( + selected[0].reason.precedence_values, + vec![String::from("bulk")] + ); + } + #[tokio::test] async fn rollout_reports_missing_rules_without_persisting_run() { let temp_dir = TempDir::new().unwrap(); @@ -2147,6 +2173,29 @@ kind = "archive" } } + fn planned_precedence_rule(id: &str) -> PlannedRule { + PlannedRule { + rule: AutomationRule { + id: String::from(id), + description: None, + enabled: true, + priority: 100, + matcher: AutomationMatchRule { + precedence: vec![String::from("bulk")], + ..AutomationMatchRule::default() + }, + action: crate::automation::model::AutomationRuleAction::Archive, + }, + action: AutomationActionSnapshot { + kind: AutomationActionKind::Archive, + add_label_ids: Vec::new(), + add_label_names: Vec::new(), + remove_label_ids: vec![String::from("INBOX")], + remove_label_names: vec![String::from("INBOX")], + }, + } + } + fn thread_candidate( thread_id: &str, message_id: &str, diff --git a/src/automation/suggestions.rs b/src/automation/suggestions.rs index 6fe0b13..d4f73c2 100644 --- a/src/automation/suggestions.rs +++ b/src/automation/suggestions.rs @@ -1,4 +1,5 @@ use super::AutomationServiceError; +use super::headers::normalized_precedence; use super::model::{ AutomationMatchRule, AutomationRule, AutomationRuleAction, AutomationRuleSet, AutomationRuleSuggestion, AutomationRuleSuggestionSample, AutomationRulesSuggestReport, @@ -396,18 +397,6 @@ fn normalized_from_address(from_address: Option<&str>) -> Option { (!value.is_empty()).then_some(value) } -fn normalized_precedence(header: Option<&str>) -> Option { - let value = header?.trim().to_ascii_lowercase(); - let tokens: Vec<&str> = value - .split(|character: char| !character.is_ascii_alphanumeric()) - .filter(|token| !token.is_empty()) - .collect(); - ["bulk", "list", "junk"] - .iter() - .find(|expected| tokens.contains(expected)) - .map(|matched| (*matched).to_owned()) -} - fn slugify(value: &str) -> String { let mut slug = String::new(); let mut previous_was_dash = false; @@ -444,7 +433,7 @@ fn slugify(value: &str) -> String { #[cfg(test)] mod tests { - use super::{normalized_precedence, slugify, suggest_rules_from_candidates}; + use super::{slugify, suggest_rules_from_candidates}; use crate::automation::model::AutomationRulesSuggestRequest; use crate::config::resolve; use crate::store::automation::AutomationThreadCandidate; @@ -700,20 +689,6 @@ mod tests { ); } - #[test] - fn normalized_precedence_requires_exact_tokens() { - assert_eq!( - normalized_precedence(Some("bulk")), - Some(String::from("bulk")) - ); - assert_eq!( - normalized_precedence(Some("x-priority; list")), - Some(String::from("list")) - ); - assert_eq!(normalized_precedence(Some("notbulk")), None); - assert_eq!(normalized_precedence(Some("xlistx")), None); - } - #[test] fn slugify_normalizes_rule_ids() { assert_eq!(slugify(""), "digest-example-com"); From 04b32316904cc44dde735cc7e8148965d942cd67 Mon Sep 17 00:00:00 2001 From: Bjorn Melin Date: Thu, 7 May 2026 03:39:38 -0600 Subject: [PATCH 5/5] fix(automation): parse list id identifiers --- src/automation/suggestions.rs | 62 +++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/src/automation/suggestions.rs b/src/automation/suggestions.rs index d4f73c2..8a369fe 100644 --- a/src/automation/suggestions.rs +++ b/src/automation/suggestions.rs @@ -383,12 +383,12 @@ fn has_list_unsubscribe(candidate: &AutomationThreadCandidate) -> bool { } fn normalized_list_id(header: Option<&str>) -> Option { - let value = header? - .trim() - .trim_start_matches('<') - .trim_end_matches('>') - .trim() - .to_ascii_lowercase(); + let header = header?.trim(); + let identifier = match (header.rfind('<'), header.rfind('>')) { + (Some(start), Some(end)) if start < end => &header[start + 1..end], + _ => header.trim_start_matches('<').trim_end_matches('>'), + }; + let value = identifier.trim().to_ascii_lowercase(); (!value.is_empty()).then_some(value) } @@ -483,6 +483,40 @@ mod tests { toml::from_str::(&suggestion.toml).unwrap(); } + #[test] + fn list_suggestions_group_by_angle_bracket_identifier() { + let (_repo_root, config_report) = config_report(); + let mut request = request(); + request.min_thread_count = 2; + let candidates = vec![ + list_candidate_with_header("Digest ", "thread-1", "message-1", 100), + list_candidate_with_header( + "Daily Digest ", + "thread-2", + "message-2", + 200, + ), + ]; + + let report = suggest_rules_from_candidates( + &config_report, + String::from("gmail:operator@example.com"), + &candidates, + &request, + 2_000_000_000, + ) + .unwrap(); + + assert_eq!(report.suggestion_count, 1); + let suggestion = &report.suggestions[0]; + assert_eq!(suggestion.rule_id, "archive-list-digest-example-com"); + assert_eq!(suggestion.matched_thread_count, 2); + assert_eq!( + suggestion.rule.matcher.list_id_contains, + vec![String::from("digest.example.com")] + ); + } + #[test] fn suggests_sender_rule_for_recurring_bulk_sender_without_list_id() { let (_repo_root, config_report) = config_report(); @@ -731,9 +765,23 @@ mod tests { thread_id: &str, message_id: &str, internal_date_epoch_ms: i64, + ) -> AutomationThreadCandidate { + list_candidate_with_header( + &format!("<{list_id}>"), + thread_id, + message_id, + internal_date_epoch_ms, + ) + } + + fn list_candidate_with_header( + list_id_header: &str, + thread_id: &str, + message_id: &str, + internal_date_epoch_ms: i64, ) -> AutomationThreadCandidate { candidate(thread_id, message_id, internal_date_epoch_ms, |candidate| { - candidate.list_id_header = Some(format!("<{list_id}>")); + candidate.list_id_header = Some(list_id_header.to_owned()); candidate.list_unsubscribe_header = Some(String::from("")); })