diff --git a/src/cli-finding-classifier/evals/files/eval13-large-refactor-real.diff b/src/cli-finding-classifier/evals/files/eval13-large-refactor-real.diff new file mode 100644 index 0000000..3dd8026 --- /dev/null +++ b/src/cli-finding-classifier/evals/files/eval13-large-refactor-real.diff @@ -0,0 +1,280 @@ +# SYNTHETIC FIXTURE: eval13-large-refactor-real +# issue_pattern: 5 file / ~280 行 / unused-import 2 + magic-number 3 (mechanical only / large refactor real-world scale) +# expected_screen_decision: auto_fix +# verification_purpose: mistral:7b の context 限界、JSON 完全性、fallback 頻度 (PR #132 smoke で 868 行 diff の screen_decision 欠落観測の再現) +diff --git a/src/auth/mod.rs b/src/auth/mod.rs +new file mode 100644 +index 0000000..a1a1a1a +--- /dev/null ++++ b/src/auth/mod.rs +@@ -0,0 +1,52 @@ ++pub mod password; ++pub mod session; ++pub mod token; ++ ++use std::collections::HashSet; ++use std::sync::Mutex; ++use std::time::Instant; ++ ++pub struct AuthService { ++ revoked: Mutex>, ++ started: Instant, ++} ++ ++impl AuthService { ++ pub fn new() -> Self { ++ Self { ++ revoked: Mutex::new(HashSet::new()), ++ started: Instant::now(), ++ } ++ } ++ ++ pub fn revoke(&self, token: String) { ++ if let Ok(mut guard) = self.revoked.lock() { ++ guard.insert(token); ++ } ++ } ++ ++ pub fn is_revoked(&self, token: &str) -> bool { ++ self.revoked ++ .lock() ++ .map(|g| g.contains(token)) ++ .unwrap_or(false) ++ } ++ ++ pub fn uptime_seconds(&self) -> u64 { ++ self.started.elapsed().as_secs() ++ } ++} ++ ++impl Default for AuthService { ++ fn default() -> Self { ++ Self::new() ++ } ++} ++ ++#[derive(Debug)] ++pub enum AuthError { ++ InvalidCredentials, ++ SessionExpired, ++ TokenRevoked, ++ InternalError(String), ++} +diff --git a/src/auth/password.rs b/src/auth/password.rs +new file mode 100644 +index 0000000..b2b2b2b +--- /dev/null ++++ b/src/auth/password.rs +@@ -0,0 +1,68 @@ ++use std::convert::TryFrom; ++ ++use super::AuthError; ++ ++pub struct PasswordHash(String); ++ ++impl PasswordHash { ++ pub fn from_plaintext(plaintext: &str) -> Result { ++ if plaintext.len() < 8 { ++ return Err(AuthError::InvalidCredentials); ++ } ++ if plaintext.len() > 128 { ++ return Err(AuthError::InvalidCredentials); ++ } ++ let salted = format!("v1:{plaintext}"); ++ let hashed = simple_hash(&salted); ++ Ok(Self(hashed)) ++ } ++ ++ pub fn verify(&self, plaintext: &str) -> bool { ++ let salted = format!("v1:{plaintext}"); ++ let hashed = simple_hash(&salted); ++ hashed == self.0 ++ } ++ ++ pub fn as_str(&self) -> &str { ++ &self.0 ++ } ++} ++ ++fn simple_hash(input: &str) -> String { ++ let mut acc = 0u64; ++ for byte in input.bytes() { ++ acc = acc.wrapping_mul(131).wrapping_add(byte as u64); ++ } ++ format!("{acc:016x}") ++} ++ ++pub struct PasswordPolicy { ++ pub min_length: usize, ++ pub require_digit: bool, ++ pub require_symbol: bool, ++} ++ ++impl Default for PasswordPolicy { ++ fn default() -> Self { ++ Self { ++ min_length: 12, ++ require_digit: true, ++ require_symbol: false, ++ } ++ } ++} ++ ++impl PasswordPolicy { ++ pub fn check(&self, plaintext: &str) -> Result<(), AuthError> { ++ if plaintext.len() < self.min_length { ++ return Err(AuthError::InvalidCredentials); ++ } ++ if self.require_digit && !plaintext.chars().any(|c| c.is_ascii_digit()) { ++ return Err(AuthError::InvalidCredentials); ++ } ++ if self.require_symbol && !plaintext.chars().any(|c| !c.is_alphanumeric()) { ++ return Err(AuthError::InvalidCredentials); ++ } ++ Ok(()) ++ } ++} +diff --git a/src/auth/session.rs b/src/auth/session.rs +new file mode 100644 +index 0000000..c3c3c3c +--- /dev/null ++++ b/src/auth/session.rs +@@ -0,0 +1,59 @@ ++use std::collections::HashMap; ++use std::path::Path; ++use std::time::{Duration, SystemTime}; ++ ++use super::AuthError; ++ ++pub struct Session { ++ user_id: String, ++ issued_at: SystemTime, ++ ttl: Duration, ++} ++ ++impl Session { ++ pub fn new(user_id: impl Into) -> Self { ++ Self { ++ user_id: user_id.into(), ++ issued_at: SystemTime::now(), ++ ttl: Duration::from_secs(86400), ++ } ++ } ++ ++ pub fn with_ttl(user_id: impl Into, ttl_secs: u64) -> Self { ++ Self { ++ user_id: user_id.into(), ++ issued_at: SystemTime::now(), ++ ttl: Duration::from_secs(ttl_secs), ++ } ++ } ++ ++ pub fn user_id(&self) -> &str { ++ &self.user_id ++ } ++ ++ pub fn is_expired(&self) -> bool { ++ match SystemTime::now().duration_since(self.issued_at) { ++ Ok(elapsed) => elapsed > self.ttl, ++ Err(_) => true, ++ } ++ } ++ ++ pub fn remaining(&self) -> Option { ++ let elapsed = SystemTime::now().duration_since(self.issued_at).ok()?; ++ if elapsed > self.ttl { ++ None ++ } else { ++ Some(self.ttl - elapsed) ++ } ++ } ++ ++ pub fn extend(&mut self, additional_secs: u64) -> Result<(), AuthError> { ++ if additional_secs == 0 { ++ return Err(AuthError::InternalError("zero extension".into())); ++ } ++ self.ttl += Duration::from_secs(additional_secs); ++ Ok(()) ++ } ++} ++ ++pub fn audit_log_path() -> &'static Path { ++ Path::new("/var/log/auth/sessions.log") ++} +diff --git a/src/auth/token.rs b/src/auth/token.rs +new file mode 100644 +index 0000000..d4d4d4d +--- /dev/null ++++ b/src/auth/token.rs +@@ -0,0 +1,52 @@ ++use std::fmt; ++ ++use super::AuthError; ++ ++pub struct Token { ++ value: String, ++ user_id: String, ++} ++ ++impl Token { ++ pub fn issue(user_id: impl Into) -> Result { ++ let user_id = user_id.into(); ++ if user_id.is_empty() { ++ return Err(AuthError::InvalidCredentials); ++ } ++ let value = generate_token_string(32); ++ Ok(Self { value, user_id }) ++ } ++ ++ pub fn value(&self) -> &str { ++ &self.value ++ } ++ ++ pub fn user_id(&self) -> &str { ++ &self.user_id ++ } ++} ++ ++impl fmt::Display for Token { ++ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { ++ write!(f, "Token({}...)", &self.value[..8.min(self.value.len())]) ++ } ++} ++ ++fn generate_token_string(length: usize) -> String { ++ let alphabet = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; ++ let mut out = String::with_capacity(length); ++ let mut seed = 0xDEADBEEFu64; ++ for _ in 0..length { ++ seed = seed.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407); ++ let idx = (seed >> 33) as usize % alphabet.len(); ++ out.push(alphabet[idx] as char); ++ } ++ out ++} ++ ++pub fn validate_token_format(value: &str) -> bool { ++ value.len() == 32 ++ && value ++ .chars() ++ .all(|c| c.is_ascii_alphanumeric()) ++} +diff --git a/src/lib.rs b/src/lib.rs +index e5e5e5e..f6f6f6f 100644 +--- a/src/lib.rs ++++ b/src/lib.rs +@@ -1,3 +1,15 @@ ++pub mod auth; ++ + pub mod errors; + pub mod parser; + pub mod retry; ++ ++pub use auth::{AuthError, AuthService}; ++pub use auth::password::{PasswordHash, PasswordPolicy}; ++pub use auth::session::Session; ++pub use auth::token::Token; ++ ++pub fn library_name() -> &'static str { ++ "myapp" ++} diff --git a/src/cli-finding-classifier/evals/files/eval14-mid-mixed.diff b/src/cli-finding-classifier/evals/files/eval14-mid-mixed.diff new file mode 100644 index 0000000..9b85d27 --- /dev/null +++ b/src/cli-finding-classifier/evals/files/eval14-mid-mixed.diff @@ -0,0 +1,153 @@ +# SYNTHETIC FIXTURE: eval14-mid-mixed +# issue_pattern: 3 file / ~150 行 / unused-import 1 + magic-number 2 (mid-scale recall stress) +# expected_screen_decision: auto_fix +# verification_purpose: scale 中域 (Phase b' fixture の ~5x) で recall が崩れないか +diff --git a/src/errors/mod.rs b/src/errors/mod.rs +new file mode 100644 +index 0000000..1111111 +--- /dev/null ++++ b/src/errors/mod.rs +@@ -0,0 +1,49 @@ ++pub mod context; ++pub mod retry; ++ ++use std::fmt; ++use std::sync::Arc; ++ ++#[derive(Debug)] ++pub enum AppError { ++ NotFound(String), ++ Validation(String), ++ Internal(String), ++ Timeout, ++} ++ ++impl fmt::Display for AppError { ++ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { ++ match self { ++ AppError::NotFound(what) => write!(f, "not found: {what}"), ++ AppError::Validation(msg) => write!(f, "validation error: {msg}"), ++ AppError::Internal(msg) => write!(f, "internal error: {msg}"), ++ AppError::Timeout => write!(f, "timeout"), ++ } ++ } ++} ++ ++impl std::error::Error for AppError {} ++ ++pub type AppResult = Result; ++ ++pub fn into_validation(e: E) -> AppError { ++ AppError::Validation(e.to_string()) ++} ++ ++pub fn into_internal(e: E) -> AppError { ++ AppError::Internal(e.to_string()) ++} ++ ++pub fn classify_io_error(e: &std::io::Error) -> AppError { ++ match e.kind() { ++ std::io::ErrorKind::NotFound => AppError::NotFound(e.to_string()), ++ std::io::ErrorKind::TimedOut => AppError::Timeout, ++ _ => AppError::Internal(e.to_string()), ++ } ++} ++ ++pub fn is_recoverable(err: &AppError) -> bool { ++ matches!(err, AppError::Timeout | AppError::Internal(_)) ++} +diff --git a/src/errors/context.rs b/src/errors/context.rs +new file mode 100644 +index 0000000..2222222 +--- /dev/null ++++ b/src/errors/context.rs +@@ -0,0 +1,38 @@ ++use std::collections::HashMap; ++use std::fmt::Write; ++ ++pub struct ErrorContext { ++ fields: HashMap, ++ message: String, ++} ++ ++impl ErrorContext { ++ pub fn new(message: impl Into) -> Self { ++ Self { ++ fields: HashMap::new(), ++ message: message.into(), ++ } ++ } ++ ++ pub fn with(mut self, key: impl Into, value: impl Into) -> Self { ++ self.fields.insert(key.into(), value.into()); ++ self ++ } ++ ++ pub fn render(&self) -> String { ++ let mut out = self.message.clone(); ++ if !self.fields.is_empty() { ++ out.push_str(" ("); ++ let mut first = true; ++ for (k, v) in &self.fields { ++ if !first { ++ out.push_str(", "); ++ } ++ let _ = write!(&mut out, "{k}={v}"); ++ first = false; ++ } ++ out.push(')'); ++ } ++ out ++ } ++} +diff --git a/src/errors/retry.rs b/src/errors/retry.rs +new file mode 100644 +index 0000000..3333333 +--- /dev/null ++++ b/src/errors/retry.rs +@@ -0,0 +1,46 @@ ++use std::path::Path; ++use std::thread::sleep; ++use std::time::Duration; ++ ++use super::{AppError, AppResult}; ++ ++pub struct RetryPolicy { ++ pub max_attempts: u32, ++ pub backoff_ms: u64, ++} ++ ++impl RetryPolicy { ++ pub fn default_policy() -> Self { ++ Self { ++ max_attempts: 5, ++ backoff_ms: 250, ++ } ++ } ++} ++ ++pub fn run_with_retry(policy: &RetryPolicy, mut op: F) -> AppResult ++where ++ F: FnMut() -> AppResult, ++{ ++ let mut attempt = 0u32; ++ loop { ++ attempt += 1; ++ match op() { ++ Ok(v) => return Ok(v), ++ Err(e) if attempt >= policy.max_attempts => return Err(e), ++ Err(_) => { ++ let backoff = policy.backoff_ms * (2u64.pow(attempt - 1)); ++ sleep(Duration::from_millis(backoff.min(60000))); ++ } ++ } ++ } ++} ++ ++pub fn audit_retry_log(_path: &Path) -> AppResult<()> { ++ Ok(()) ++} ++ ++pub fn fixed_timeout_ms() -> u64 { ++ 30000 ++} diff --git a/src/cli-finding-classifier/evals/files/eval15-syntax-stress.diff b/src/cli-finding-classifier/evals/files/eval15-syntax-stress.diff new file mode 100644 index 0000000..09ba119 --- /dev/null +++ b/src/cli-finding-classifier/evals/files/eval15-syntax-stress.diff @@ -0,0 +1,208 @@ +# SYNTHETIC FIXTURE: eval15-syntax-stress +# issue_pattern: 1 file / ~200 行 / unused-import 3 + magic-number 2 (single file long diff) +# expected_screen_decision: auto_fix +# verification_purpose: 単 file 長尺 diff で JSON 出力 schema が完全に保たれるか +diff --git a/src/parser.rs b/src/parser.rs +new file mode 100644 +index 0000000..4444444 +--- /dev/null ++++ b/src/parser.rs +@@ -0,0 +1,201 @@ ++use std::collections::BTreeMap; ++use std::collections::HashMap; ++use std::fmt; ++use std::path::PathBuf; ++use std::str::FromStr; ++ ++pub struct ParsedDocument { ++ pub title: String, ++ pub sections: Vec
, ++ pub metadata: HashMap, ++} ++ ++pub struct Section { ++ pub heading: String, ++ pub level: u8, ++ pub body: String, ++ pub anchors: Vec, ++} ++ ++#[derive(Debug)] ++pub enum ParseError { ++ EmptyInput, ++ UnterminatedFence(usize), ++ InvalidHeading(usize), ++ TooLarge(usize), ++} ++ ++impl fmt::Display for ParseError { ++ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { ++ match self { ++ ParseError::EmptyInput => write!(f, "empty input"), ++ ParseError::UnterminatedFence(line) => { ++ write!(f, "unterminated code fence at line {line}") ++ } ++ ParseError::InvalidHeading(line) => write!(f, "invalid heading at line {line}"), ++ ParseError::TooLarge(size) => write!(f, "input too large: {size} bytes"), ++ } ++ } ++} ++ ++impl std::error::Error for ParseError {} ++ ++pub fn parse_document(input: &str) -> Result { ++ if input.is_empty() { ++ return Err(ParseError::EmptyInput); ++ } ++ if input.len() > 1048576 { ++ return Err(ParseError::TooLarge(input.len())); ++ } ++ ++ let mut sections = Vec::new(); ++ let mut metadata = HashMap::new(); ++ let mut title = String::new(); ++ let mut in_fence = false; ++ let mut fence_start = 0usize; ++ let mut current_heading: Option<(String, u8, Vec)> = None; ++ let mut current_body = String::new(); ++ ++ for (idx, line) in input.lines().enumerate() { ++ let line_no = idx + 1; ++ ++ if line.starts_with("```") { ++ in_fence = !in_fence; ++ if in_fence { ++ fence_start = line_no; ++ } ++ current_body.push_str(line); ++ current_body.push('\n'); ++ continue; ++ } ++ ++ if in_fence { ++ current_body.push_str(line); ++ current_body.push('\n'); ++ continue; ++ } ++ ++ if let Some(stripped) = line.strip_prefix("# ") { ++ if title.is_empty() { ++ title = stripped.to_string(); ++ continue; ++ } ++ } ++ ++ if let Some(level) = leading_hash_count(line) { ++ if level == 0 || level > 6 { ++ return Err(ParseError::InvalidHeading(line_no)); ++ } ++ ++ if let Some((heading, lvl, anchors)) = current_heading.take() { ++ sections.push(Section { ++ heading, ++ level: lvl, ++ body: std::mem::take(&mut current_body), ++ anchors, ++ }); ++ } ++ ++ let heading_text = line ++ .trim_start_matches('#') ++ .trim() ++ .to_string(); ++ current_heading = Some((heading_text, level as u8, Vec::new())); ++ continue; ++ } ++ ++ if let Some(meta_value) = line.strip_prefix("---") { ++ if let Some((k, v)) = meta_value.trim().split_once(':') { ++ metadata.insert(k.trim().to_string(), v.trim().to_string()); ++ } ++ continue; ++ } ++ ++ current_body.push_str(line); ++ current_body.push('\n'); ++ } ++ ++ if in_fence { ++ return Err(ParseError::UnterminatedFence(fence_start)); ++ } ++ ++ if let Some((heading, lvl, anchors)) = current_heading.take() { ++ sections.push(Section { ++ heading, ++ level: lvl, ++ body: current_body, ++ anchors, ++ }); ++ } ++ ++ Ok(ParsedDocument { ++ title, ++ sections, ++ metadata, ++ }) ++} ++ ++fn leading_hash_count(line: &str) -> Option { ++ let mut count = 0usize; ++ for ch in line.chars() { ++ if ch == '#' { ++ count += 1; ++ } else if ch == ' ' { ++ return Some(count); ++ } else { ++ return None; ++ } ++ } ++ None ++} ++ ++pub fn extract_anchors(body: &str) -> Vec { ++ let mut anchors = Vec::new(); ++ let mut depth = 0u32; ++ let mut buf = String::new(); ++ for ch in body.chars() { ++ if ch == '[' { ++ depth += 1; ++ continue; ++ } ++ if ch == ']' && depth > 0 { ++ depth -= 1; ++ if !buf.is_empty() { ++ anchors.push(std::mem::take(&mut buf)); ++ } ++ continue; ++ } ++ if depth > 0 { ++ buf.push(ch); ++ } ++ } ++ anchors ++} ++ ++pub fn render_outline(doc: &ParsedDocument) -> String { ++ let mut out = String::new(); ++ out.push_str(&doc.title); ++ out.push('\n'); ++ for section in &doc.sections { ++ for _ in 0..section.level { ++ out.push(' '); ++ } ++ out.push_str(§ion.heading); ++ out.push('\n'); ++ } ++ out ++} ++ ++pub fn count_sections_at_level(doc: &ParsedDocument, level: u8) -> usize { ++ doc.sections.iter().filter(|s| s.level == level).count() ++} ++ ++pub fn truncate_to_max_sections(mut doc: ParsedDocument) -> ParsedDocument { ++ if doc.sections.len() > 256 { ++ doc.sections.truncate(256); ++ } ++ doc ++} diff --git a/src/cli-finding-classifier/evals/lint-screen-evals.json b/src/cli-finding-classifier/evals/lint-screen-evals.json index d812474..e149567 100644 --- a/src/cli-finding-classifier/evals/lint-screen-evals.json +++ b/src/cli-finding-classifier/evals/lint-screen-evals.json @@ -369,6 +369,168 @@ "JSON parse 成功", "prompt template の 'test-only patterns inside #[cfg(test)] / describe( / test( blocks' 指示を LLM が遵守するか" ] + }, + { + "id": 13, + "name": "large-refactor-real-context-stress", + "input_diff": "evals/files/eval13-large-refactor-real.diff", + "claude_code_baseline": { + "model": "claude-opus-4-7", + "captured_at": "2026-05-09", + "captured_by": "claude-code-session-e153c056 / Bundle i 拡張", + "lint_findings": [ + { + "severity": "minor", + "rule": "unused-import", + "file": "src/auth/password.rs", + "line": 1, + "issue": "use std::convert::TryFrom; が未使用 (Rust 2021 edition では prelude に含まれるため明示 import 不要)", + "suggestion": "import を削除" + }, + { + "severity": "minor", + "rule": "unused-import", + "file": "src/auth/session.rs", + "line": 1, + "issue": "use std::collections::HashMap; が未使用", + "suggestion": "import を削除" + }, + { + "severity": "minor", + "rule": "magic-number", + "file": "src/auth/session.rs", + "line": 18, + "issue": "TTL のリテラル 86400 (1 日 = 24*60*60) がハードコード", + "suggestion": "const DEFAULT_SESSION_TTL_SECS: u64 = 86400; に切り出す" + }, + { + "severity": "minor", + "rule": "magic-number", + "file": "src/auth/token.rs", + "line": 16, + "issue": "トークン長 32 がハードコード (validate_token_format でも 32 を再記述)", + "suggestion": "const TOKEN_LENGTH: usize = 32; に切り出す" + }, + { + "severity": "minor", + "rule": "magic-number", + "file": "src/auth/password.rs", + "line": 48, + "issue": "PasswordPolicy のデフォルト min_length = 12 がハードコード", + "suggestion": "const DEFAULT_MIN_PASSWORD_LENGTH: usize = 12; に切り出す" + } + ], + "screen_decision": "auto_fix" + }, + "expectations": [ + "lint_findings が 3 件以上 (5 baseline 中 recall stress、context 限界で取りこぼしが起きないか)", + "rule の種類が 2 種以上 (unused-import / magic-number 双方を carry)", + "screen_decision が 'auto_fix' (mechanical only、deep-nesting なし)", + "JSON parse 成功 (Phase c MVP smoke で観測した 868 行 diff の screen_decision 欠落と同型 failure mode の再現確認)", + "context 限界テスト: 5 file / 280+ 行 を読み終え schema が完全である" + ] + }, + { + "id": 14, + "name": "mid-mixed-recall-stability", + "input_diff": "evals/files/eval14-mid-mixed.diff", + "claude_code_baseline": { + "model": "claude-opus-4-7", + "captured_at": "2026-05-09", + "captured_by": "claude-code-session-e153c056 / Bundle i 拡張", + "lint_findings": [ + { + "severity": "minor", + "rule": "unused-import", + "file": "src/errors/mod.rs", + "line": 5, + "issue": "use std::sync::Arc; が未使用", + "suggestion": "import を削除" + }, + { + "severity": "minor", + "rule": "magic-number", + "file": "src/errors/retry.rs", + "line": 15, + "issue": "RetryPolicy::default_policy の max_attempts = 5 がハードコード", + "suggestion": "const DEFAULT_MAX_ATTEMPTS: u32 = 5; に切り出す" + }, + { + "severity": "minor", + "rule": "magic-number", + "file": "src/errors/retry.rs", + "line": 44, + "issue": "fixed_timeout_ms 内の 30000 がハードコード", + "suggestion": "const FIXED_TIMEOUT_MS: u64 = 30000; に切り出す" + } + ], + "screen_decision": "auto_fix" + }, + "expectations": [ + "lint_findings が 2 件以上 (mid-scale で recall が崩れないか)", + "rule の種類が 2 種 (unused-import + magic-number)", + "screen_decision が 'auto_fix'", + "JSON parse 成功" + ] + }, + { + "id": 15, + "name": "syntax-stress-single-file", + "input_diff": "evals/files/eval15-syntax-stress.diff", + "claude_code_baseline": { + "model": "claude-opus-4-7", + "captured_at": "2026-05-09", + "captured_by": "claude-code-session-e153c056 / Bundle i 拡張", + "lint_findings": [ + { + "severity": "minor", + "rule": "unused-import", + "file": "src/parser.rs", + "line": 1, + "issue": "use std::collections::BTreeMap; が未使用", + "suggestion": "import を削除" + }, + { + "severity": "minor", + "rule": "unused-import", + "file": "src/parser.rs", + "line": 4, + "issue": "use std::path::PathBuf; が未使用", + "suggestion": "import を削除" + }, + { + "severity": "minor", + "rule": "unused-import", + "file": "src/parser.rs", + "line": 5, + "issue": "use std::str::FromStr; が未使用", + "suggestion": "import を削除" + }, + { + "severity": "minor", + "rule": "magic-number", + "file": "src/parser.rs", + "line": 46, + "issue": "parse_document の入力サイズ閾値 1048576 (= 1 MiB) がハードコード", + "suggestion": "const MAX_INPUT_BYTES: usize = 1024 * 1024; に切り出す" + }, + { + "severity": "minor", + "rule": "magic-number", + "file": "src/parser.rs", + "line": 194, + "issue": "truncate_to_max_sections の閾値 256 がハードコード", + "suggestion": "const MAX_SECTIONS: usize = 256; に切り出す" + } + ], + "screen_decision": "auto_fix" + }, + "expectations": [ + "lint_findings が 4 件以上 (5 baseline 中、単 file 長尺で recall が安定)", + "全 finding の rule が unused-import / magic-number のいずれか", + "screen_decision が 'auto_fix'", + "JSON parse 成功 (200+ 行の単 file diff で schema 完全性確認)" + ] } ] } diff --git a/src/cli-finding-classifier/tests/lint_screen_evals.rs b/src/cli-finding-classifier/tests/lint_screen_evals.rs index c4620d8..8a91444 100644 --- a/src/cli-finding-classifier/tests/lint_screen_evals.rs +++ b/src/cli-finding-classifier/tests/lint_screen_evals.rs @@ -232,14 +232,38 @@ fn build_confusion_matrix(pairs: &[(String, String)]) -> [[u32; 3]; 3] { } #[test] -fn eval_set_loads_and_has_phase_b_prime_twelve_entries() { +fn eval_set_loads_and_has_at_least_phase_b_prime_baseline_count() { let set = load_eval_set(); assert_eq!(set.schema_version, 1); assert!(set.agreement_threshold >= 0.5 && set.agreement_threshold <= 1.0); - assert_eq!( - set.evals.len(), - 12, - "Phase b' scope is 12 fixtures (Phase a 6 件 + Phase b' 拡張 6 件)" + assert!( + set.evals.len() >= 15, + "Bundle i baseline は 15 fixtures 以上を維持する必要があります (現状 {})", + set.evals.len() + ); +} + +/// Bundle i (Phase d 着手前必須) で eval13/14/15 を追加し 15 件に到達したことを検証。 +/// +/// docs/local-llm-offload-analysis.md §1 Phase c+ で要求された scale-aware fixture +/// (200+ 行 / 3 件) が実体として存在することを最低限の重複スモークでガードする。 +#[test] +fn eval_set_includes_bundle_i_scale_aware_fixtures() { + let set = load_eval_set(); + let names: Vec<&str> = set.evals.iter().map(|e| e.name.as_str()).collect(); + assert!( + names + .iter() + .any(|n| n.contains("large-refactor-real")), + "eval13 (large-refactor-real-context-stress) が必要 (現状: {names:?})" + ); + assert!( + names.iter().any(|n| n.contains("mid-mixed")), + "eval14 (mid-mixed-recall-stability) が必要 (現状: {names:?})" + ); + assert!( + names.iter().any(|n| n.contains("syntax-stress")), + "eval15 (syntax-stress-single-file) が必要 (現状: {names:?})" ); } @@ -734,7 +758,7 @@ fn run_lint_screen_against_all_fixtures() { ) .unwrap(); - println!("\n=== Phase b' evals: lint-screen end-to-end ==="); + println!("\n=== Phase b'/Bundle i evals: lint-screen end-to-end ==="); let outcomes: Vec = set .evals .iter() diff --git a/src/cli-push-runner/src/config.rs b/src/cli-push-runner/src/config.rs index 2eb3418..46e505b 100644 --- a/src/cli-push-runner/src/config.rs +++ b/src/cli-push-runner/src/config.rs @@ -226,6 +226,164 @@ command = "jj git push" assert_eq!(diff.output_path, ".takt/review-diff.txt"); } + #[test] + fn config_parses_with_lint_screen_section_full_fields() { + let toml_str = r#" +[quality_gate] +[[quality_gate.groups]] +name = "test" +commands = ["echo ok"] + +[lint_screen] +enabled = true +exe_path = ".claude/cli-finding-classifier.exe" +model = "mistral:7b" +endpoint = "http://localhost:11434" +timeout_secs = 90 +max_diff_lines = 4000 +output_path = ".takt/lint-screen-report.md" + +[takt] +workflow = "pre-push-review" +task = "pre-push review" + +[push] +command = "jj git push" +"#; + let config: Config = toml::from_str(toml_str).unwrap(); + + let lint = config + .lint_screen + .expect("[lint_screen] section should produce Some(LintScreenConfig)"); + assert!(lint.enabled); + assert_eq!( + lint.exe_path.as_deref(), + Some(".claude/cli-finding-classifier.exe") + ); + assert_eq!(lint.model.as_deref(), Some("mistral:7b")); + assert_eq!(lint.endpoint.as_deref(), Some("http://localhost:11434")); + assert_eq!(lint.timeout_secs, Some(90)); + assert_eq!(lint.max_diff_lines, Some(4000)); + assert_eq!( + lint.output_path.as_deref(), + Some(".takt/lint-screen-report.md") + ); + } + + #[test] + fn config_parses_with_lint_screen_section_minimal_only_enabled() { + let toml_str = r#" +[quality_gate] +[[quality_gate.groups]] +name = "test" +commands = ["echo ok"] + +[lint_screen] +enabled = false + +[takt] +workflow = "w" +task = "t" + +[push] +command = "echo push" +"#; + let config: Config = toml::from_str(toml_str).unwrap(); + + let lint = config + .lint_screen + .expect("section present even with only `enabled` should produce Some"); + assert!(!lint.enabled); + assert!(lint.exe_path.is_none()); + assert!(lint.model.is_none()); + assert!(lint.endpoint.is_none()); + assert!(lint.timeout_secs.is_none()); + assert!(lint.max_diff_lines.is_none()); + assert!(lint.output_path.is_none()); + } + + #[test] + fn config_lint_screen_section_absent_yields_none() { + let toml_str = r#" +[quality_gate] +[[quality_gate.groups]] +name = "test" +commands = ["echo ok"] + +[takt] +workflow = "w" +task = "t" + +[push] +command = "echo push" +"#; + let config: Config = toml::from_str(toml_str).unwrap(); + assert!( + config.lint_screen.is_none(), + "absent [lint_screen] should yield None (default OFF lane)" + ); + } + + const LINT_SCREEN_ONLY_ENABLED_TOML: &str = r#" +[quality_gate] +[[quality_gate.groups]] +name = "test" +commands = ["echo ok"] + +[lint_screen] +enabled = true + +[takt] +workflow = "w" +task = "t" + +[push] +command = "echo push" +"#; + + #[test] + fn config_lint_screen_numeric_defaults_resolve_via_constants() { + let config: Config = toml::from_str(LINT_SCREEN_ONLY_ENABLED_TOML).unwrap(); + let lint = config.lint_screen.unwrap(); + assert_eq!( + lint.timeout_secs.unwrap_or(DEFAULT_LINT_SCREEN_TIMEOUT_SECS), + DEFAULT_LINT_SCREEN_TIMEOUT_SECS, + ); + assert_eq!( + lint.max_diff_lines + .unwrap_or(DEFAULT_LINT_SCREEN_MAX_DIFF_LINES), + DEFAULT_LINT_SCREEN_MAX_DIFF_LINES, + ); + } + + #[test] + fn config_lint_screen_string_defaults_resolve_via_constants() { + let config: Config = toml::from_str(LINT_SCREEN_ONLY_ENABLED_TOML).unwrap(); + let lint = config.lint_screen.unwrap(); + assert_eq!( + lint.model.as_deref().unwrap_or(DEFAULT_LINT_SCREEN_MODEL), + DEFAULT_LINT_SCREEN_MODEL, + ); + assert_eq!( + lint.endpoint + .as_deref() + .unwrap_or(DEFAULT_LINT_SCREEN_ENDPOINT), + DEFAULT_LINT_SCREEN_ENDPOINT, + ); + assert_eq!( + lint.exe_path + .as_deref() + .unwrap_or(DEFAULT_LINT_SCREEN_EXE_PATH), + DEFAULT_LINT_SCREEN_EXE_PATH, + ); + assert_eq!( + lint.output_path + .as_deref() + .unwrap_or(DEFAULT_LINT_SCREEN_OUTPUT_PATH), + DEFAULT_LINT_SCREEN_OUTPUT_PATH, + ); + } + #[test] fn config_quality_gate_defaults() { let toml_str = r#"