Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions codeflash/languages/java/replacement.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,39 @@ def _replace_constructors(
return result


def _normalized_equal(a: str, b: str) -> bool:
"""Compare two method sources ignoring whitespace differences."""

def normalize(s: str) -> str:
return "\n".join(line.strip() for line in s.strip().splitlines() if line.strip())
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚡️Codeflash found 34% (0.34x) speedup for _normalized_equal in codeflash/languages/java/replacement.py

⏱️ Runtime : 1.13 milliseconds 847 microseconds (best of 250 runs)

📝 Explanation and details

The optimization replaced the nested list comprehension "\n".join(line.strip() for line in s.strip().splitlines() if line.strip()) with an explicit loop that strips each line once and appends non-empty results to a list, eliminating redundant line.strip() calls that the original performed twice per line (once in the filter and once in the generator). This reduces per-line overhead from two strip operations plus membership tests to a single strip followed by a boolean check, cutting runtime by 33% across inputs ranging from short strings to 1000+ line blocks. The function is called by replace_function to validate that LLM-generated optimizations actually modify the target method, so the speedup directly reduces latency in the hot path for detecting no-op optimizations.

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 68 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
📊 Tests Coverage 100.0%
🌀 Click to see Generated Regression Tests
from codeflash.languages.java.replacement import _normalized_equal


def test_simple_identical_strings_are_equal():
    # identical short strings should be equal
    a = "def foo():\n    return 1"
    b = "def foo():\n    return 1"
    assert _normalized_equal(a, b) is True  # 3.03μs -> 2.31μs (31.1% faster)


def test_whitespace_differences_ignored_between_lines():
    # leading/trailing spaces and empty lines are ignored by normalization
    a = "\n\n  def foo():   \n      return 1  \n\n"
    b = "def foo():\n\treturn 1"  # tab for indentation vs spaces; leading/trailing whitespace will be stripped
    # Both normalize to lines ["def foo():", "return 1"] so should be equal
    assert _normalized_equal(a, b) is True  # 3.25μs -> 2.92μs (11.3% faster)


def test_different_content_not_equal():
    # content changes (case or words) must be detected
    a = "def foo():\n    return 1"
    b = "def foo():\n    return 2"
    assert _normalized_equal(a, b) is False  # 2.75μs -> 2.00μs (37.5% faster)


def test_internal_multiple_spaces_are_preserved():
    # internal spaces are not trimmed by line.strip(), only leading/trailing
    a = "value =  a   +   b"  # multiple internal spaces
    b = "value = a + b"  # fewer internal spaces
    # These remain different after normalization because internal spaces are preserved
    assert _normalized_equal(a, b) is False  # 2.26μs -> 1.63μs (38.7% faster)


def test_empty_and_whitespace_only_strings_equal():
    # empty strings or strings with only whitespace/newlines normalize to empty
    assert _normalized_equal("", "") is True  # 1.91μs -> 1.19μs (60.5% faster)
    assert _normalized_equal("   \n  \n", "") is True  # 1.20μs -> 1.35μs (11.1% slower)
    assert _normalized_equal("\n\n", "   ") is True  # 1.06μs -> 1.16μs (8.69% slower)


def test_order_of_nonempty_lines_matters():
    # order of non-empty lines is preserved by normalization and thus matters
    a = "first\nsecond\nthird"
    b = "second\nfirst\nthird"
    assert _normalized_equal(a, b) is False  # 3.23μs -> 2.42μs (33.5% faster)


def test_handling_of_windows_crlf_and_old_mac_newlines():
    # splitlines() handles \r\n and \r consistently; normalization should still match
    a = "line1\r\n   line2\r\n\r\nline3  "
    b = "  line1\nline2\n\nline3"
    assert _normalized_equal(a, b) is True  # 3.52μs -> 2.96μs (19.0% faster)


def test_tabs_inside_lines_are_preserved_but_trim_edges():
    # leading/trailing tabs are stripped, internal tabs remain significant
    a = "\t\titem\twith\tinternal\ttabs\t\n"
    b = "item\twith\tinternal\ttabs"
    # After normalization both lines become identical content-wise
    assert _normalized_equal(a, b) is True  # 2.29μs -> 1.96μs (16.8% faster)


def test_none_inputs_raise_attribute_error():
    # Test that invalid input types fail gracefully with different patterns
    # Pattern 1: First string is valid, second is empty—tests normalization success path
    result = _normalized_equal("public class Foo {}", "")  # 2.21μs -> 1.40μs (57.8% faster)
    assert result is False

    # Pattern 2: Both strings contain code—tests symmetric comparison
    code_a = "class A {\n    int x = 5;\n}"
    code_b = "class A {\n    int x = 5;\n}"
    assert _normalized_equal(code_a, code_b) is True  # 2.64μs -> 1.96μs (34.7% faster)

    # Pattern 3: Strings with comments and special formatting
    code_with_comments = "// comment\nint method() {\n    return 42;\n}"
    code_no_comments = "int method() {\n    return 42;\n}"
    assert _normalized_equal(code_with_comments, code_no_comments) is False  # 2.09μs -> 1.56μs (34.0% faster)


def test_non_breaking_space_vs_regular_space_not_ignored():
    # Non-breaking space (\u00A0) is not removed by str.strip() in many Python builds;
    # it may remain internal and thus cause inequality compared to ordinary space.
    a = "a\u00a0b"  # 'a' + NBSP + 'b'
    b = "a b"  # 'a' + regular space + 'b'
    # These should not be treated as equal because internal characters differ
    assert _normalized_equal(a, b) is False  # 2.47μs -> 1.79μs (37.9% faster)


def test_large_number_of_lines_with_varying_whitespace_are_equal():
    # Construct 1000 logical lines with varied leading/trailing whitespace and empty lines inserted.
    lines_count = 1000
    # Build first representation with extra indentation and trailing spaces
    a_lines = []
    for i in range(lines_count):
        a_lines.append(f"   line_{i}   ")  # leading/trailing spaces to be stripped
        if i % 50 == 0:
            a_lines.append("   ")  # insert whitespace-only lines that should be removed
    a = "\n".join(a_lines)

    # Build second representation with different whitespace and occasional blank lines
    b_lines = []
    for i in range(lines_count):
        # use tabs and varying surrounding blank lines
        prefix = "\t" if i % 3 == 0 else "  "
        suffix = "" if i % 7 == 0 else "   "
        b_lines.append(f"{prefix}line_{i}{suffix}")
        if i % 60 == 0:
            b_lines.append("")  # actual blank line
    b = "\r\n".join(b_lines)  # use CRLF occasionally

    # After normalization, both should produce the same sequence of non-empty, stripped lines
    assert _normalized_equal(a, b) is True  # 362μs -> 268μs (34.8% faster)


def test_repeated_comparisons_stability_over_many_iterations():
    # Test correctness with diverse real-world Java method patterns
    test_cases = [
        ("public void test() {}", "public void test() {}"),
        ("void method() {\n    int x = 1;\n}", "void method() {\n    int x = 1;\n}"),
        ('  String getName() {\n    return "test";\n  }', 'String getName() {\n    return "test";\n}'),
        (
            "List<String> getList() {\n    return new ArrayList<>();\n}",
            "List<String> getList() {\n    return new ArrayList<>();\n}",
        ),
        ("@Override\npublic boolean equals(Object o) {}", "@Override\npublic boolean equals(Object o) {}"),
    ]

    for a, b in test_cases:
        assert _normalized_equal(a, b) is True  # 11.1μs -> 8.05μs (37.3% faster)

    # Test that different methods are correctly identified as non-equal
    different_cases = [
        ("void foo() { return 1; }", "void bar() { return 1; }"),
        ("int getValue() { return 0; }", "int getValue() { return 1; }"),
        ("static void test() {}", "void test() {}"),
    ]

    for a, b in different_cases:
        assert _normalized_equal(a, b) is False  # 3.65μs -> 2.34μs (56.0% faster)
from codeflash.languages.java.replacement import _normalized_equal


def test_identical_strings_are_equal():
    """Two identical strings should return True."""
    assert _normalized_equal("hello", "hello") is True  # 2.52μs -> 1.62μs (55.5% faster)


def test_identical_multiline_strings_are_equal():
    """Two identical multiline strings should return True."""
    code1 = "def foo():\n    pass"
    code2 = "def foo():\n    pass"
    assert _normalized_equal(code1, code2) is True  # 3.23μs -> 2.46μs (31.3% faster)


def test_different_strings_are_not_equal():
    """Two different strings should return False."""
    assert _normalized_equal("hello", "world") is False  # 2.27μs -> 1.59μs (42.7% faster)


def test_strings_differing_only_in_leading_whitespace():
    """Strings with different leading whitespace but same content should be equal."""
    code1 = "def foo():\n    pass"
    code2 = "    def foo():\n        pass"
    assert _normalized_equal(code1, code2) is True  # 3.17μs -> 2.37μs (33.3% faster)


def test_strings_differing_only_in_trailing_whitespace():
    """Strings with different trailing whitespace but same content should be equal."""
    code1 = "def foo():\n    pass"
    code2 = "def foo():\n    pass   \n"
    assert _normalized_equal(code1, code2) is True  # 3.00μs -> 2.12μs (41.5% faster)


def test_strings_differing_only_in_internal_indentation():
    """Strings with different internal indentation should be equal after normalization."""
    code1 = "def foo():\n    pass"
    code2 = "def foo():\npass"
    assert _normalized_equal(code1, code2) is True  # 2.86μs -> 2.02μs (41.6% faster)


def test_strings_with_extra_blank_lines():
    """Strings with different numbers of blank lines should be equal."""
    code1 = "def foo():\n    pass\n    return"
    code2 = "def foo():\n\n    pass\n\n    return"
    assert _normalized_equal(code1, code2) is True  # 3.76μs -> 2.95μs (27.1% faster)


def test_strings_with_tabs_and_spaces():
    """Strings with mixed tabs and spaces should be normalized correctly."""
    code1 = "def foo():\n    pass"
    code2 = "def foo():\n\tpass"
    assert _normalized_equal(code1, code2) is True  # 2.81μs -> 2.05μs (37.1% faster)


def test_single_line_with_leading_trailing_spaces():
    """Single line strings with leading/trailing spaces should be equal."""
    assert _normalized_equal("  hello  ", "hello") is True  # 2.37μs -> 1.79μs (32.3% faster)


def test_empty_strings():
    """Two empty strings should be equal."""
    assert _normalized_equal("", "") is True  # 2.00μs -> 1.20μs (66.6% faster)


def test_strings_with_only_whitespace():
    """Strings containing only whitespace should normalize to empty."""
    assert _normalized_equal("   \n   \n   ", "") is True  # 1.96μs -> 1.98μs (1.01% slower)


def test_one_empty_one_whitespace_only():
    """Empty string should equal string with only whitespace."""
    assert _normalized_equal("", "     ") is True  # 1.87μs -> 1.41μs (32.6% faster)


def test_strings_with_only_newlines():
    """Strings containing only newlines should normalize to empty."""
    assert _normalized_equal("\n\n\n", "") is True  # 1.84μs -> 1.71μs (7.65% faster)


def test_strings_with_mixed_newline_types():
    """Strings should normalize regardless of newline representation."""
    code1 = "line1\nline2\nline3"
    code2 = "line1\n\nline2\n\nline3"
    assert _normalized_equal(code1, code2) is True  # 3.78μs -> 2.89μs (30.5% faster)


def test_complex_multiline_with_varied_indentation():
    """Complex multiline code with varied indentation should normalize correctly."""
    code1 = """
    def method():
        if True:
            return 42
        else:
            return 0
    """
    code2 = """def method():
if True:
return 42
else:
return 0"""
    assert _normalized_equal(code1, code2) is True  # 4.92μs -> 4.27μs (15.2% faster)


def test_strings_with_special_characters():
    """Strings with special characters should be compared correctly."""
    code1 = "print('hello!@#$%^&*()')"
    code2 = "print('hello!@#$%^&*()')"
    assert _normalized_equal(code1, code2) is True  # 2.25μs -> 1.64μs (37.2% faster)


def test_strings_with_special_characters_different():
    """Strings with different special characters should not be equal."""
    code1 = "print('hello!@#')"
    code2 = "print('hello!$%')"
    assert _normalized_equal(code1, code2) is False  # 2.21μs -> 1.48μs (49.3% faster)


def test_strings_with_unicode_characters():
    """Strings with unicode characters should be compared correctly."""
    code1 = "# ñ é ü\nprint('café')"
    code2 = "# ñ é ü\nprint('café')"
    assert _normalized_equal(code1, code2) is True  # 4.06μs -> 3.35μs (21.3% faster)


def test_strings_with_different_unicode():
    """Strings with different unicode characters should not be equal."""
    code1 = "print('café')"
    code2 = "print('cafe')"
    assert _normalized_equal(code1, code2) is False  # 2.56μs -> 1.68μs (52.4% faster)


def test_single_character_strings():
    """Single character strings should be compared correctly."""
    assert _normalized_equal("a", "a") is True  # 2.09μs -> 1.40μs (49.3% faster)


def test_single_character_strings_different():
    """Different single character strings should not be equal."""
    assert _normalized_equal("a", "b") is False  # 2.10μs -> 1.43μs (46.9% faster)


def test_very_long_identical_lines():
    """Very long identical lines should be equal."""
    long_line = "x" * 1000
    assert _normalized_equal(long_line, long_line) is True  # 3.58μs -> 2.97μs (20.5% faster)


def test_very_long_different_lines():
    """Very long different lines should not be equal."""
    long_line1 = "x" * 1000
    long_line2 = "y" * 1000
    assert _normalized_equal(long_line1, long_line2) is False  # 3.71μs -> 2.93μs (26.6% faster)


def test_strings_with_consecutive_spaces_in_content():
    """Strings with consecutive spaces within content should be preserved."""
    code1 = "x  =  5"
    code2 = "x  =  5"
    assert _normalized_equal(code1, code2) is True  # 2.09μs -> 1.43μs (46.2% faster)


def test_strings_with_different_consecutive_spaces():
    """Strings with different consecutive spaces should not be equal."""
    code1 = "x  =  5"
    code2 = "x = 5"
    assert _normalized_equal(code1, code2) is False  # 2.07μs -> 1.48μs (39.8% faster)


def test_multiline_with_blank_lines_at_start():
    """Multiline string with blank lines at start should normalize correctly."""
    code1 = "\n\n\nhello"
    code2 = "hello"
    assert _normalized_equal(code1, code2) is True  # 2.21μs -> 2.09μs (5.73% faster)


def test_multiline_with_blank_lines_at_end():
    """Multiline string with blank lines at end should normalize correctly."""
    code1 = "hello\n\n\n"
    code2 = "hello"
    assert _normalized_equal(code1, code2) is True  # 2.17μs -> 1.83μs (18.6% faster)


def test_multiline_with_blank_lines_everywhere():
    """Multiline string with blank lines everywhere should normalize correctly."""
    code1 = "\n\n  hello  \n\n  world  \n\n"
    code2 = "hello\nworld"
    assert _normalized_equal(code1, code2) is True  # 3.71μs -> 3.02μs (22.9% faster)


def test_string_with_only_spaces():
    """String with only spaces should normalize to empty."""
    assert _normalized_equal("     ", "") is True  # 1.95μs -> 1.43μs (36.4% faster)


def test_java_method_example_1():
    """Example Java method with different formatting should be equal."""
    method1 = """
    public void doSomething() {
        System.out.println("Hello");
    }
    """
    method2 = """public void doSomething() {
System.out.println("Hello");
}"""
    assert _normalized_equal(method1, method2) is True  # 4.06μs -> 3.56μs (13.8% faster)


def test_java_method_example_2():
    """Another Java method example with varied whitespace."""
    method1 = """
        public int calculate(int x, int y) {
            return x + y;
        }
    """
    method2 = """public int calculate(int x, int y) {
return x + y;
}"""
    assert _normalized_equal(method1, method2) is True  # 3.88μs -> 3.19μs (21.7% faster)


def test_strings_case_sensitive():
    """Comparison should be case-sensitive."""
    assert _normalized_equal("Hello", "hello") is False  # 2.20μs -> 1.48μs (48.7% faster)


def test_strings_with_leading_spaces_only_first():
    """String with leading spaces should not equal different string."""
    code1 = "    hello"
    code2 = "world"
    assert _normalized_equal(code1, code2) is False  # 2.17μs -> 1.61μs (34.8% faster)


def test_large_multiline_string_identical():
    """Large multiline string should equal itself after normalization."""
    lines = ["line " + str(i) for i in range(100)]
    code1 = "\n".join(lines)
    code2 = "\n".join("    " + line + "    " for line in lines)
    assert _normalized_equal(code1, code2) is True  # 30.6μs -> 22.6μs (35.5% faster)


def test_very_large_multiline_string_identical():
    """Very large multiline string (1000+ lines) should equal itself."""
    lines = ["def func_" + str(i) + "(): pass" for i in range(500)]
    code1 = "\n".join(lines)
    code2 = "\n    ".join([""] + lines)  # Add leading indentation
    assert _normalized_equal(code1, code2) is True  # 136μs -> 101μs (34.6% faster)


def test_large_string_with_many_blank_lines():
    """Large string with many interleaved blank lines should normalize correctly."""
    lines = ["line " + str(i) for i in range(100)]
    code1 = "\n".join(lines)
    code2 = "\n\n".join(lines)  # Double newlines between each line
    assert _normalized_equal(code1, code2) is True  # 28.9μs -> 23.5μs (23.0% faster)


def test_deeply_indented_code():
    """Deeply indented code should normalize correctly."""
    code1 = "level1\n  level2\n    level3\n      level4\n        level5"
    code2 = "level1\nlevel2\nlevel3\nlevel4\nlevel5"
    assert _normalized_equal(code1, code2) is True  # 4.23μs -> 3.31μs (27.9% faster)


def test_large_identical_code_blocks():
    """Large identical code blocks should be equal."""
    block = """def method():
    if condition:
        do_something()
    else:
        do_other_thing()
    return result"""
    code1 = "\n\n".join([block] * 50)
    code2 = "\n".join([block] * 50)
    assert _normalized_equal(code1, code2) is True  # 94.9μs -> 69.5μs (36.6% faster)


def test_performance_with_1000_character_lines():
    """Performance test with very long lines."""
    long_line = "x = " + ", ".join(str(i) for i in range(250))
    code1 = long_line
    code2 = "  " + long_line + "  "
    assert _normalized_equal(code1, code2) is True  # 4.16μs -> 3.67μs (13.1% faster)


def test_large_string_with_mixed_content():
    """Large string with mixed whitespace and content."""
    parts = []
    for i in range(200):
        parts.append("statement_" + str(i))
        if i % 10 == 0:
            parts.append("")  # Add blank lines
    code1 = "\n".join(parts)
    code2 = "\n    ".join(parts)  # Different indentation
    # Filter out empty strings for comparison
    code1_filtered = "\n".join(line for line in code1.split("\n") if line.strip())
    code2_filtered = "\n".join(line.strip() for line in code2.split("\n") if line.strip())
    assert _normalized_equal(code1, code2) is True  # 57.1μs -> 44.5μs (28.5% faster)


def test_equal_after_normalization_large_diff():
    """Test that large strings with significant whitespace differences are still equal."""
    lines = ["code_" + str(i) for i in range(300)]
    code1 = "\n".join(lines)
    # Heavily indent and add blank lines
    code2_parts = []
    for i, line in enumerate(lines):
        if i % 5 == 0:
            code2_parts.append("")
            code2_parts.append("")
        code2_parts.append("        " + line + "        ")
    code2 = "\n".join(code2_parts)
    assert _normalized_equal(code1, code2) is True  # 92.0μs -> 69.5μs (32.4% faster)


def test_different_large_strings():
    """Two different large strings should not be equal."""
    code1 = "\n".join("line_a_" + str(i) for i in range(100))
    code2 = "\n".join("line_b_" + str(i) for i in range(100))
    assert _normalized_equal(code1, code2) is False  # 24.5μs -> 19.7μs (24.7% faster)


def test_large_similar_strings_one_line_different():
    """Two large strings differing in one line should not be equal."""
    lines1 = ["line_" + str(i) for i in range(100)]
    lines2 = lines1.copy()
    lines2[50] = "different_line"
    code1 = "\n".join(lines1)
    code2 = "\n".join(lines2)
    assert _normalized_equal(code1, code2) is False  # 24.6μs -> 19.1μs (28.9% faster)


def test_alternating_indentation_pattern():
    """String with alternating indentation pattern should normalize correctly."""
    lines = []
    for i in range(100):
        if i % 2 == 0:
            lines.append("  line_" + str(i))
        else:
            lines.append("    line_" + str(i))
    code1 = "\n".join(lines)
    code2 = "\n".join("line_" + str(i) for i in range(100))
    assert _normalized_equal(code1, code2) is True  # 28.9μs -> 21.3μs (36.1% faster)


def test_worst_case_whitespace_normalization():
    """Worst case: every line has different indentation."""
    lines = []
    for i in range(200):
        lines.append(" " * i + "code_" + str(i))
    code1 = "\n".join(lines)
    code2 = "\n".join("code_" + str(i) for i in range(200))
    assert _normalized_equal(code1, code2) is True  # 103μs -> 72.3μs (43.1% faster)

To test or edit this optimization locally git merge codeflash/optimize-pr1950-2026-04-01T16.29.13

Suggested change
return "\n".join(line.strip() for line in s.strip().splitlines() if line.strip())
parts: list[str] = []
for line in s.splitlines():
stripped = line.strip()
if stripped:
parts.append(stripped)
return "\n".join(parts)

Static Badge


return normalize(a) == normalize(b)


def _extract_original_method_source(
source: str, func_name: str, function: FunctionToOptimize, analyzer: JavaAnalyzer
) -> str | None:
"""Extract the original method source from the file for comparison."""
methods = analyzer.find_methods(source)
matching = [
m
for m in methods
if m.name == func_name and (function.class_name is None or m.class_name == function.class_name)
]
if not matching:
return None
target = matching[0]
if len(matching) > 1 and function.starting_line and function.ending_line:
for m in matching:
if abs(m.start_line - function.starting_line) <= 5:
target = m
break
start = (target.javadoc_start_line or target.start_line) - 1
end = target.end_line
lines = source.splitlines(keepends=True)
return "".join(lines[start:end])


def replace_function(
source: str, function: FunctionToOptimize, new_source: str, analyzer: JavaAnalyzer | None = None
) -> str:
Expand Down Expand Up @@ -405,6 +438,26 @@ def replace_function(
logger.warning("No valid replacement found for method '%s'. Returning original source.", func_name)
return source

# Guard: reject optimizations that don't actually change the target method but modify surrounding class members.
# This catches the "wrong-file" pattern where the LLM adds cache fields, modifies constructors, or adds helpers
# without changing the method it was asked to optimize (15+ known PRs exhibit this pattern).
has_class_modifications = bool(
parsed.new_fields or parsed.helpers_before_target or parsed.helpers_after_target or parsed.modified_constructors
)
if has_class_modifications:
original_target = _extract_original_method_source(source, func_name, function, analyzer)
if original_target is not None and _normalized_equal(parsed.target_method_source, original_target):
logger.warning(
"Rejecting optimization for '%s': target method is unchanged but LLM modified surrounding class members "
"(fields=%d, helpers_before=%d, helpers_after=%d, constructors=%d). This is a no-op optimization.",
func_name,
len(parsed.new_fields),
len(parsed.helpers_before_target),
len(parsed.helpers_after_target),
len(parsed.modified_constructors),
)
return source

# Find the method in the original source
methods = analyzer.find_methods(source)
target_method = None
Expand Down
208 changes: 208 additions & 0 deletions tests/test_languages/test_java/test_replacement.py
Original file line number Diff line number Diff line change
Expand Up @@ -1784,6 +1784,214 @@ def test_class_wrapper_with_wrong_target_method_leaves_source_unchanged(self, tm
assert java_file.read_text(encoding="utf-8") == original_code


class TestUnchangedTargetWithClassModifications:
"""Tests that optimizations modifying class members without changing the target method are rejected.

This guards against the "wrong-file" pattern where the LLM adds cache fields,
modifies constructors, or adds helpers without changing the method it was asked to optimize.
"""

def test_unchanged_target_with_unused_field_rejected(self, tmp_path, java_support):
"""LLM adds a field but leaves the target method unchanged — should reject."""
java_file = tmp_path / "SessionContext.java"
original_code = """\
public class SessionContext {
private String routeVIP;

public String getRouteVIP() {
return routeVIP;
}
}
"""
java_file.write_text(original_code, encoding="utf-8")

# LLM adds a cached field but doesn't change getRouteVIP
optimized_markdown = f"""```java:{java_file.relative_to(tmp_path)}
public class SessionContext {{
private String routeVIP;
private static final String DEFAULT_VIP = "";

public String getRouteVIP() {{
return routeVIP;
}}
}}
```"""

optimized_code = CodeStringsMarkdown.parse_markdown_code(optimized_markdown, expected_language="java")
result = replace_function_definitions_for_language(
function_names=["getRouteVIP"],
optimized_code=optimized_code,
module_abspath=java_file,
project_root_path=tmp_path,
lang_support=java_support,
)

assert result is False
assert java_file.read_text(encoding="utf-8") == original_code

def test_unchanged_target_with_modified_constructor_rejected(self, tmp_path, java_support):
"""LLM modifies constructor but leaves target method unchanged — should reject.

Reproduces Zuul #52: Header.getValue — title says getValue but diff changes constructor.
"""
java_file = tmp_path / "Header.java"
original_code = """\
public class Header {
private final String name;
private final String value;

public Header(String name, String value) {
this.name = name;
this.value = value;
}

public String getValue() {
return value;
}
}
"""
java_file.write_text(original_code, encoding="utf-8")

# LLM modifies constructor to cache but doesn't change getValue
optimized_markdown = f"""```java:{java_file.relative_to(tmp_path)}
public class Header {{
private final String name;
private final String value;

public Header(String name, String value) {{
this.name = name;
this.value = value != null ? value.intern() : null;
}}

public String getValue() {{
return value;
}}
}}
```"""

optimized_code = CodeStringsMarkdown.parse_markdown_code(optimized_markdown, expected_language="java")
result = replace_function_definitions_for_language(
function_names=["getValue"],
optimized_code=optimized_code,
module_abspath=java_file,
project_root_path=tmp_path,
lang_support=java_support,
)

assert result is False
assert java_file.read_text(encoding="utf-8") == original_code

def test_unchanged_target_with_helper_rejected(self, tmp_path, java_support):
"""LLM adds a helper method but leaves target method unchanged — should reject."""
java_file = tmp_path / "FilterRegistry.java"
original_code = """\
public class FilterRegistry {
private final java.util.Map<String, Object> filters = new java.util.HashMap<>();

public int size() {
return filters.size();
}
}
"""
java_file.write_text(original_code, encoding="utf-8")

# LLM adds a helper but doesn't change size()
optimized_markdown = f"""```java:{java_file.relative_to(tmp_path)}
public class FilterRegistry {{
private final java.util.Map<String, Object> filters = new java.util.HashMap<>();

private int cachedSize() {{
return filters.size();
}}

public int size() {{
return filters.size();
}}
}}
```"""

optimized_code = CodeStringsMarkdown.parse_markdown_code(optimized_markdown, expected_language="java")
result = replace_function_definitions_for_language(
function_names=["size"],
optimized_code=optimized_code,
module_abspath=java_file,
project_root_path=tmp_path,
lang_support=java_support,
)

assert result is False
assert java_file.read_text(encoding="utf-8") == original_code

def test_changed_target_with_field_accepted(self, tmp_path, java_support):
"""LLM changes target method AND adds a field — should accept (valid optimization)."""
java_file = tmp_path / "Fibonacci.java"
original_code = """\
public class Fibonacci {
public static long fib(int n) {
if (n <= 1) return n;
return fib(n - 1) + fib(n - 2);
}
}
"""
java_file.write_text(original_code, encoding="utf-8")

optimized_markdown = f"""```java:{java_file.relative_to(tmp_path)}
public class Fibonacci {{
private static final long[] CACHE = new long[100];

public static long fib(int n) {{
if (n <= 1) return n;
if (n < 100 && CACHE[n] != 0) return CACHE[n];
long result = fib(n - 1) + fib(n - 2);
if (n < 100) CACHE[n] = result;
return result;
}}
}}
```"""

optimized_code = CodeStringsMarkdown.parse_markdown_code(optimized_markdown, expected_language="java")
result = replace_function_definitions_for_language(
function_names=["fib"],
optimized_code=optimized_code,
module_abspath=java_file,
project_root_path=tmp_path,
lang_support=java_support,
)

assert result is True

def test_changed_target_only_accepted(self, tmp_path, java_support):
"""LLM changes only the target method (no class modifications) — should accept."""
java_file = tmp_path / "Calculator.java"
original_code = """\
public class Calculator {
public int add(int a, int b) {
return a + b;
}
}
"""
java_file.write_text(original_code, encoding="utf-8")

optimized_markdown = f"""```java:{java_file.relative_to(tmp_path)}
public class Calculator {{
public int add(int a, int b) {{
return Math.addExact(a, b);
}}
}}
```"""

optimized_code = CodeStringsMarkdown.parse_markdown_code(optimized_markdown, expected_language="java")
result = replace_function_definitions_for_language(
function_names=["add"],
optimized_code=optimized_code,
module_abspath=java_file,
project_root_path=tmp_path,
lang_support=java_support,
)

assert result is True


class TestAnonymousInnerClassMethods:
"""Tests that methods inside anonymous inner classes are not hoisted as helpers.

Expand Down
Loading