From 98feb96e36b33858597a7763cefa249c711f38eb Mon Sep 17 00:00:00 2001 From: Chris Burroughs Date: Thu, 8 Jan 2026 14:47:24 -0500 Subject: [PATCH 1/4] add a backend for running codespell a linter To quote from : "Fix common misspellings in text files. It's designed primarily for checking misspelled words in source code". The intent is to have few enough false positives that it could be used as a linter. When I run it at a $DAYJOB repo it picks all sorts of embarrassments like: ``` lastest ==> latest, last worfklow ==> workflow imapct ==> impact Nmber ==> Number ``` LLM Disclosure: I tried to code this as an experiment in learning hard on Claude to understand Pants backends, the vast majority of the code was generated by Claude which I then had it iterated on before minor cleanup edits. My first prompt was: ``` > We are going to create a new backend for https://pypi.org/project/codespell in the Pants build system under src/python/pants/backend/tools/backend. Look at these SHAs for examples of adding new backends f6e51c2873d51df2b63853a0b8db13b4e94292f3 cb63bba66817677a1dcb862c150e6fc7ca9f96dd 9465d3d75091d7ca44bbfc492c09e3c6d418a4e8 ``` I went back a forth a bunch on partitioning strategies. It seemed to me that what users expect is to have multiple config files and things Just Work -- albeit with uncertainty with regards to the expected behavior being "use the nearest config file" or "magically merge them". So I went with per config partitioning, but leaned in the process that most backends use a single partition. * partition_inputs is long and hard to follow... * But! it is almost identical to yamllint I think we are lacking in a good abstraction for config based partitioning and if we had one. * codespell uses different flags based on the format of the config file, which adds some more incidental conditionals. * But on the third hand, I don't think a human would bother with supporting this complicated a partitioning strategy just to check some words. So this is real code and I'd like to land and use it, but I'm neutral on which partitioning strategy is best to keep. --- .../bin/generate_builtin_lockfiles.py | 2 + docs/notes/2.31.x.md | 6 + .../experimental/tools/codespell/BUILD | 3 + .../experimental/tools/codespell/__init__.py | 2 + .../experimental/tools/codespell/register.py | 25 ++ .../pants/backend/tools/codespell/BUILD | 15 ++ .../pants/backend/tools/codespell/__init__.py | 2 + .../backend/tools/codespell/codespell.lock | 244 ++++++++++++++++++ .../tools/codespell/codespell.lock.metadata | 20 ++ .../pants/backend/tools/codespell/rules.py | 190 ++++++++++++++ .../tools/codespell/rules_integration_test.py | 227 ++++++++++++++++ .../backend/tools/codespell/subsystem.py | 83 ++++++ 12 files changed, 819 insertions(+) create mode 100644 src/python/pants/backend/experimental/tools/codespell/BUILD create mode 100644 src/python/pants/backend/experimental/tools/codespell/__init__.py create mode 100644 src/python/pants/backend/experimental/tools/codespell/register.py create mode 100644 src/python/pants/backend/tools/codespell/BUILD create mode 100644 src/python/pants/backend/tools/codespell/__init__.py create mode 100644 src/python/pants/backend/tools/codespell/codespell.lock create mode 100644 src/python/pants/backend/tools/codespell/codespell.lock.metadata create mode 100644 src/python/pants/backend/tools/codespell/rules.py create mode 100644 src/python/pants/backend/tools/codespell/rules_integration_test.py create mode 100644 src/python/pants/backend/tools/codespell/subsystem.py diff --git a/build-support/bin/generate_builtin_lockfiles.py b/build-support/bin/generate_builtin_lockfiles.py index 7864703ac32..c94de307d6a 100644 --- a/build-support/bin/generate_builtin_lockfiles.py +++ b/build-support/bin/generate_builtin_lockfiles.py @@ -56,6 +56,7 @@ from pants.backend.scala.subsystems.scalatest import Scalatest from pants.backend.sql.lint.sqlfluff.subsystem import Sqlfluff from pants.backend.terraform.dependency_inference import TerraformHcl2Parser +from pants.backend.tools.codespell.subsystem import Codespell from pants.backend.tools.semgrep.subsystem import SemgrepSubsystem from pants.backend.tools.yamllint.subsystem import Yamllint from pants.base.build_environment import get_buildroot @@ -110,6 +111,7 @@ class JvmTool(Tool[JvmToolBase]): ... PythonTool(Bandit, "pants.backend.python.lint.bandit"), PythonTool(Black, "pants.backend.python.lint.black"), PythonTool(ClangFormat, "pants.backend.experimental.cc.lint.clangformat"), + PythonTool(Codespell, "pants.backend.experimental.tools.codespell"), PythonTool(CoverageSubsystem, "pants.backend.python"), PythonTool(DebugPy, "pants.backend.python"), PythonTool(Docformatter, "pants.backend.python.lint.docformatter"), diff --git a/docs/notes/2.31.x.md b/docs/notes/2.31.x.md index 6bfcec42050..850173ecff1 100644 --- a/docs/notes/2.31.x.md +++ b/docs/notes/2.31.x.md @@ -33,6 +33,12 @@ This work stands on the shoulders of support from the [Science Projects](https:/ ### Backends +#### New: Codespell + +Pants now supports running [codespell](https://github.com/codespell-project/codespell) as a linter. `codespell` isn't a regular spell checker with a full dictionary, rather it is intended to be used on source code with a much smaller list of common misspelling. + +Enable the `pants.backend.experimental.tools.codespell` backend to try it out. + #### Helm #### JVM diff --git a/src/python/pants/backend/experimental/tools/codespell/BUILD b/src/python/pants/backend/experimental/tools/codespell/BUILD new file mode 100644 index 00000000000..3166cdd67c9 --- /dev/null +++ b/src/python/pants/backend/experimental/tools/codespell/BUILD @@ -0,0 +1,3 @@ +# Copyright 2026 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). +python_sources() diff --git a/src/python/pants/backend/experimental/tools/codespell/__init__.py b/src/python/pants/backend/experimental/tools/codespell/__init__.py new file mode 100644 index 00000000000..9c3e8bdf10b --- /dev/null +++ b/src/python/pants/backend/experimental/tools/codespell/__init__.py @@ -0,0 +1,2 @@ +# Copyright 2026 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). diff --git a/src/python/pants/backend/experimental/tools/codespell/register.py b/src/python/pants/backend/experimental/tools/codespell/register.py new file mode 100644 index 00000000000..9a91b4e4ca7 --- /dev/null +++ b/src/python/pants/backend/experimental/tools/codespell/register.py @@ -0,0 +1,25 @@ +# Copyright 2026 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +"""A tool to find common misspellings in text files. + +See https://github.com/codespell-project/codespell for details. +""" + +from __future__ import annotations + +from typing import Iterable + +from pants.backend.python.goals import lockfile as python_lockfile +from pants.backend.tools.codespell import rules as codespell_rules +from pants.backend.tools.codespell import subsystem as subsystem +from pants.engine.rules import Rule +from pants.engine.unions import UnionRule + + +def rules() -> Iterable[Rule | UnionRule]: + return ( + *codespell_rules.rules(), + *subsystem.rules(), + *python_lockfile.rules(), + ) diff --git a/src/python/pants/backend/tools/codespell/BUILD b/src/python/pants/backend/tools/codespell/BUILD new file mode 100644 index 00000000000..e16e359a2c8 --- /dev/null +++ b/src/python/pants/backend/tools/codespell/BUILD @@ -0,0 +1,15 @@ +# Copyright 2026 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +resource(name="lockfile", source="codespell.lock") + +python_sources(dependencies=[":lockfile"]) + +python_tests( + name="tests", + overrides={ + "rules_integration_test.py": { + "timeout": 180, + } + }, +) diff --git a/src/python/pants/backend/tools/codespell/__init__.py b/src/python/pants/backend/tools/codespell/__init__.py new file mode 100644 index 00000000000..9c3e8bdf10b --- /dev/null +++ b/src/python/pants/backend/tools/codespell/__init__.py @@ -0,0 +1,2 @@ +# Copyright 2026 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). diff --git a/src/python/pants/backend/tools/codespell/codespell.lock b/src/python/pants/backend/tools/codespell/codespell.lock new file mode 100644 index 00000000000..bd447c1d6fc --- /dev/null +++ b/src/python/pants/backend/tools/codespell/codespell.lock @@ -0,0 +1,244 @@ +{ + "allow_builds": true, + "allow_prereleases": false, + "allow_wheels": true, + "build_isolation": true, + "constraints": [], + "elide_unused_requires_dist": false, + "excluded": [], + "locked_resolves": [ + { + "locked_requirements": [ + { + "artifacts": [ + { + "algorithm": "sha256", + "hash": "3dadafa67df7e4a3dbf51e0d7315061b80d265f9552ebd699b3dd6834b47e425", + "url": "https://files.pythonhosted.org/packages/20/01/b394922252051e97aab231d416c86da3d8a6d781eeadcdca1082867de64e/codespell-2.4.1-py3-none-any.whl" + }, + { + "algorithm": "sha256", + "hash": "299fcdcb09d23e81e35a671bbe746d5ad7e8385972e65dbb833a2eaac33c01e5", + "url": "https://files.pythonhosted.org/packages/15/e0/709453393c0ea77d007d907dd436b3ee262e28b30995ea1aa36c6ffbccaf/codespell-2.4.1.tar.gz" + } + ], + "project_name": "codespell", + "requires_dists": [ + "Pygments; extra == \"dev\"", + "build; extra == \"dev\"", + "chardet; extra == \"dev\"", + "chardet; extra == \"hard-encoding-detection\"", + "chardet>=5.1.0; extra == \"types\"", + "mypy; extra == \"types\"", + "pre-commit; extra == \"dev\"", + "pytest-cov; extra == \"dev\"", + "pytest-cov; extra == \"types\"", + "pytest-dependency; extra == \"dev\"", + "pytest-dependency; extra == \"types\"", + "pytest; extra == \"dev\"", + "pytest; extra == \"types\"", + "ruff; extra == \"dev\"", + "tomli; extra == \"dev\"", + "tomli; python_version < \"3.11\" and extra == \"toml\"", + "twine; extra == \"dev\"" + ], + "requires_python": ">=3.8", + "version": "2.4.1" + }, + { + "artifacts": [ + { + "algorithm": "sha256", + "hash": "e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b", + "url": "https://files.pythonhosted.org/packages/77/b8/0135fadc89e73be292b473cb820b4f5a08197779206b33191e801feeae40/tomli-2.3.0-py3-none-any.whl" + }, + { + "algorithm": "sha256", + "hash": "b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879", + "url": "https://files.pythonhosted.org/packages/15/1b/8c26874ed1f6e4f1fcfeb868db8a794cbe9f227299402db58cfcc858766c/tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl" + }, + { + "algorithm": "sha256", + "hash": "cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e", + "url": "https://files.pythonhosted.org/packages/19/94/aeafa14a52e16163008060506fcb6aa1949d13548d13752171a755c65611/tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl" + }, + { + "algorithm": "sha256", + "hash": "a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530", + "url": "https://files.pythonhosted.org/packages/26/5a/4b546a0405b9cc0659b399f12b6adb750757baf04250b148d3c5059fc4eb/tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl" + }, + { + "algorithm": "sha256", + "hash": "c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc", + "url": "https://files.pythonhosted.org/packages/26/b6/d1eccb62f665e44359226811064596dd6a366ea1f985839c566cd61525ae/tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl" + }, + { + "algorithm": "sha256", + "hash": "a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f", + "url": "https://files.pythonhosted.org/packages/30/77/fed85e114bde5e81ecf9bc5da0cc69f2914b38f4708c80ae67d0c10180c5/tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl" + }, + { + "algorithm": "sha256", + "hash": "f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf", + "url": "https://files.pythonhosted.org/packages/39/67/f85d9bd23182f45eca8939cd2bc7050e1f90c41f4a2ecbbd5963a1d1c486/tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl" + }, + { + "algorithm": "sha256", + "hash": "c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b", + "url": "https://files.pythonhosted.org/packages/42/17/5e2c956f0144b812e7e107f94f1cc54af734eb17b5191c0bbfb72de5e93e/tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl" + }, + { + "algorithm": "sha256", + "hash": "c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b", + "url": "https://files.pythonhosted.org/packages/42/4f/2c12a72ae22cf7b59a7fe75b3465b7aba40ea9145d026ba41cb382075b0e/tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" + }, + { + "algorithm": "sha256", + "hash": "e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8", + "url": "https://files.pythonhosted.org/packages/45/12/ad5126d3a278f27e6701abde51d342aa78d06e27ce2bb596a01f7709a5a2/tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl" + }, + { + "algorithm": "sha256", + "hash": "4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52", + "url": "https://files.pythonhosted.org/packages/45/e5/7c5119ff39de8693d6baab6c0b6dcb556d192c165596e9fc231ea1052041/tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" + }, + { + "algorithm": "sha256", + "hash": "d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf", + "url": "https://files.pythonhosted.org/packages/47/5c/24935fb6a2ee63e86d80e4d3b58b222dafaf438c416752c8b58537c8b89a/tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl" + }, + { + "algorithm": "sha256", + "hash": "64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549", + "url": "https://files.pythonhosted.org/packages/52/ed/3f73f72945444548f33eba9a87fc7a6e969915e7b1acc8260b30e1f76a2f/tomli-2.3.0.tar.gz" + }, + { + "algorithm": "sha256", + "hash": "940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77", + "url": "https://files.pythonhosted.org/packages/54/78/5c46fff6432a712af9f792944f4fcd7067d8823157949f4e40c56b8b3c83/tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl" + }, + { + "algorithm": "sha256", + "hash": "ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05", + "url": "https://files.pythonhosted.org/packages/55/92/afed3d497f7c186dc71e6ee6d4fcb0acfa5f7d0a1a2878f8beae379ae0cc/tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl" + }, + { + "algorithm": "sha256", + "hash": "792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f", + "url": "https://files.pythonhosted.org/packages/60/83/59bff4996c2cf9f9387a0f5a3394629c7efa5ef16142076a23a90f1955fa/tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl" + }, + { + "algorithm": "sha256", + "hash": "0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845", + "url": "https://files.pythonhosted.org/packages/70/8c/f48ac899f7b3ca7eb13af73bacbc93aec37f9c954df3c08ad96991c8c373/tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl" + }, + { + "algorithm": "sha256", + "hash": "0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0", + "url": "https://files.pythonhosted.org/packages/70/91/7cdab9a03e6d3d2bb11beae108da5bdc1c34bdeb06e21163482544ddcc90/tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" + }, + { + "algorithm": "sha256", + "hash": "883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba", + "url": "https://files.pythonhosted.org/packages/86/7f/d8fffe6a7aefdb61bced88fcb5e280cfd71e08939da5894161bd71bea022/tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl" + }, + { + "algorithm": "sha256", + "hash": "5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b", + "url": "https://files.pythonhosted.org/packages/89/48/06ee6eabe4fdd9ecd48bf488f4ac783844fd777f547b8d1b61c11939974e/tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl" + }, + { + "algorithm": "sha256", + "hash": "a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441", + "url": "https://files.pythonhosted.org/packages/89/da/75dfd804fc11e6612846758a23f13271b76d577e299592b4371a4ca4cd09/tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" + }, + { + "algorithm": "sha256", + "hash": "5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67", + "url": "https://files.pythonhosted.org/packages/92/04/a038d65dbe160c3aa5a624e93ad98111090f6804027d474ba9c37c8ae186/tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl" + }, + { + "algorithm": "sha256", + "hash": "88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45", + "url": "https://files.pythonhosted.org/packages/b3/2e/299f62b401438d5fe1624119c723f5d877acc86a4c2492da405626665f12/tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl" + }, + { + "algorithm": "sha256", + "hash": "74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c", + "url": "https://files.pythonhosted.org/packages/ba/28/72f8afd73f1d0e7829bfc093f4cb98ce0a40ffc0cc997009ee1ed94ba705/tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl" + }, + { + "algorithm": "sha256", + "hash": "8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f", + "url": "https://files.pythonhosted.org/packages/be/2f/8b7c60a9d1612a7cbc39ffcca4f21a73bf368a80fc25bccf8253e2563267/tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl" + }, + { + "algorithm": "sha256", + "hash": "4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf", + "url": "https://files.pythonhosted.org/packages/d5/f4/0fbd014909748706c01d16824eadb0307115f9562a15cbb012cd9b3512c5/tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" + }, + { + "algorithm": "sha256", + "hash": "4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3", + "url": "https://files.pythonhosted.org/packages/db/e4/1e58409aa78eefa47ccd19779fc6f36787edbe7d4cd330eeeedb33a4515b/tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl" + }, + { + "algorithm": "sha256", + "hash": "be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae", + "url": "https://files.pythonhosted.org/packages/f1/01/88793757d54d8937015c75dcdfb673c65471945f6be98e6a0410fba167ed/tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl" + }, + { + "algorithm": "sha256", + "hash": "73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22", + "url": "https://files.pythonhosted.org/packages/f9/3f/d9dd692199e3b3aab2e4e4dd948abd0f790d9ded8cd10cbaae276a898434/tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl" + }, + { + "algorithm": "sha256", + "hash": "7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6", + "url": "https://files.pythonhosted.org/packages/fb/a1/4d6865da6a71c603cfe6ad0e6556c73c76548557a8d658f9e3b142df245f/tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl" + }, + { + "algorithm": "sha256", + "hash": "b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005", + "url": "https://files.pythonhosted.org/packages/fd/42/8e3c6a9a4b1a1360c1a2a39f0b972cef2cc9ebd56025168c4137192a9321/tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl" + }, + { + "algorithm": "sha256", + "hash": "d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac", + "url": "https://files.pythonhosted.org/packages/ff/b7/40f36368fcabc518bb11c8f06379a0fd631985046c038aca08c6d6a43c6e/tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl" + } + ], + "project_name": "tomli", + "requires_dists": [], + "requires_python": ">=3.8", + "version": "2.3.0" + } + ], + "marker": null, + "platform_tag": null + } + ], + "only_builds": [], + "only_wheels": [], + "overridden": [], + "path_mappings": {}, + "pex_version": "2.73.1", + "pip_version": "20.3.4-patched", + "prefer_older_binary": false, + "requirements": [ + "codespell<3,>=2.2.6", + "tomli>=1.1.0; python_version < \"3.11\"" + ], + "requires_python": [ + "CPython<4,>=3.8" + ], + "resolver_version": "pip-2020-resolver", + "style": "universal", + "target_systems": [ + "linux", + "mac" + ], + "transitive": true, + "use_pep517": null, + "use_system_time": false +} diff --git a/src/python/pants/backend/tools/codespell/codespell.lock.metadata b/src/python/pants/backend/tools/codespell/codespell.lock.metadata new file mode 100644 index 00000000000..caf0a5f3eec --- /dev/null +++ b/src/python/pants/backend/tools/codespell/codespell.lock.metadata @@ -0,0 +1,20 @@ +{ + "version": 6, + "valid_for_interpreter_constraints": [ + "CPython<4,>=3.8" + ], + "generated_with_requirements": [ + "codespell<3,>=2.2.6", + "tomli>=1.1.0; python_version < \"3.11\"" + ], + "manylinux": "manylinux2014", + "requirement_constraints": [], + "only_binary": [], + "no_binary": [], + "excludes": [], + "overrides": [], + "sources": [], + "lock_style": "universal", + "complete_platforms": [], + "description": "This lockfile was generated by Pants. To regenerate, run: ./pants run build-support/bin/generate_builtin_lockfiles.py" +} \ No newline at end of file diff --git a/src/python/pants/backend/tools/codespell/rules.py b/src/python/pants/backend/tools/codespell/rules.py new file mode 100644 index 00000000000..f68fb8a2954 --- /dev/null +++ b/src/python/pants/backend/tools/codespell/rules.py @@ -0,0 +1,190 @@ +# Copyright 2026 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import annotations + +import os +from collections import defaultdict +from dataclasses import dataclass +from typing import Any + +from pants.backend.python.util_rules.pex import PexProcess, create_pex +from pants.backend.tools.codespell.subsystem import Codespell +from pants.core.goals.lint import LintFilesRequest, LintResult +from pants.core.util_rules.config_files import ( + ConfigFiles, + ConfigFilesRequest, + GatherConfigFilesByDirectoriesRequest, + find_config_file, + gather_config_files_by_workspace_dir, +) +from pants.core.util_rules.partitions import Partition, Partitions +from pants.engine.fs import DigestSubset, MergeDigests, PathGlobs +from pants.engine.internals.native_engine import FilespecMatcher, Snapshot +from pants.engine.intrinsics import digest_to_snapshot, execute_process, merge_digests +from pants.engine.rules import collect_rules, concurrently, implicitly, rule +from pants.util.dirutil import group_by_dir +from pants.util.logging import LogLevel +from pants.util.strutil import pluralize + + +class CodespellRequest(LintFilesRequest): + tool_subsystem = Codespell # type: ignore[assignment] + + +@dataclass(frozen=True) +class PartitionInfo: + config_snapshot: Snapshot | None + # If True, this partition has no .codespellrc ancestor and should try + # to discover setup.cfg/pyproject.toml at runtime + discover_root_config: bool = False + + @property + def description(self) -> str: + if self.config_snapshot: + return self.config_snapshot.files[0] + elif self.discover_root_config: + return "" + else: + return "" + + +@rule +async def partition_inputs( + request: CodespellRequest.PartitionRequest, codespell: Codespell +) -> Partitions[Any, PartitionInfo]: + if codespell.skip: + return Partitions() + + matching_filepaths = FilespecMatcher( + includes=codespell.file_glob_include, excludes=codespell.file_glob_exclude + ).matches(request.files) + + # First, find .codespellrc files for partitioning + config_files = await gather_config_files_by_workspace_dir( + GatherConfigFilesByDirectoriesRequest( + tool_name=codespell.name, + config_filename=codespell.config_file_name, + filepaths=tuple(sorted(matching_filepaths)), + orphan_filepath_behavior=codespell.orphan_files_behavior, + ) + ) + + default_source_files: set[str] = set() + source_files_by_config_file: dict[str, set[str]] = defaultdict(set) + for source_dir, files_in_source_dir in group_by_dir(matching_filepaths).items(): + files = (os.path.join(source_dir, name) for name in files_in_source_dir) + if source_dir in config_files.source_dir_to_config_file: + config_file = config_files.source_dir_to_config_file[source_dir] + source_files_by_config_file[config_file].update(files) + else: + default_source_files.update(files) + + config_file_snapshots = await concurrently( + digest_to_snapshot( + **implicitly(DigestSubset(config_files.snapshot.digest, PathGlobs([config_file]))) + ) + for config_file in source_files_by_config_file + ) + + return Partitions( + ( + *( + Partition(tuple(sorted(files)), PartitionInfo(config_snapshot=config_snapshot)) + for files, config_snapshot in zip( + source_files_by_config_file.values(), config_file_snapshots + ) + ), + *( + ( + Partition( + tuple(sorted(default_source_files)), + PartitionInfo(config_snapshot=None, discover_root_config=True), + ), + ) + if default_source_files + else () + ), + ) + ) + + +@rule(desc="Lint with codespell", level=LogLevel.DEBUG) +async def run_codespell( + request: CodespellRequest.Batch[str, PartitionInfo], + codespell: Codespell, +) -> LintResult: + partition_info = request.partition_metadata + + codespell_pex_get = create_pex(codespell.to_pex_request()) + + # If this partition has no .codespellrc, try to discover setup.cfg/pyproject.toml at root + root_config: ConfigFiles | None = None + if partition_info.discover_root_config: + codespell_pex, root_config = await concurrently( + codespell_pex_get, + find_config_file( + ConfigFilesRequest( + discovery=True, + check_existence=[".codespellrc"], + check_content={ + "setup.cfg": b"[codespell]", + "pyproject.toml": b"[tool.codespell]", + }, + ) + ), + ) + else: + codespell_pex = await codespell_pex_get + + snapshot = await digest_to_snapshot(**implicitly(PathGlobs(request.elements))) + + # Determine which config to use and which flag to pass + # - .codespellrc and setup.cfg use --config (INI format) + # - pyproject.toml uses --toml (TOML format) + config_snapshot = partition_info.config_snapshot + config_args: tuple[str, ...] = () + + if config_snapshot is not None: + # We have a .codespellrc from directory-based discovery + config_args = ("--config", config_snapshot.files[0]) + elif root_config is not None and root_config.snapshot.files: + # We found a config at root + config_file = root_config.snapshot.files[0] + config_snapshot = root_config.snapshot + if config_file.endswith("pyproject.toml"): + config_args = ("--toml", config_file) + else: + # .codespellrc or setup.cfg use --config + config_args = ("--config", config_file) + + input_digest = await merge_digests( + MergeDigests( + ( + snapshot.digest, + codespell_pex.digest, + *((config_snapshot.digest,) if config_snapshot else ()), + ) + ) + ) + + process_result = await execute_process( + **implicitly( + PexProcess( + codespell_pex, + argv=( + *config_args, + *codespell.args, + *snapshot.files, + ), + input_digest=input_digest, + description=f"Run codespell on {pluralize(len(snapshot.files), 'file')}.", + level=LogLevel.DEBUG, + ) + ) + ) + return LintResult.create(request, process_result) + + +def rules(): + return [*collect_rules(), *CodespellRequest.rules()] diff --git a/src/python/pants/backend/tools/codespell/rules_integration_test.py b/src/python/pants/backend/tools/codespell/rules_integration_test.py new file mode 100644 index 00000000000..ea78557aca1 --- /dev/null +++ b/src/python/pants/backend/tools/codespell/rules_integration_test.py @@ -0,0 +1,227 @@ +# Copyright 2026 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import annotations + +from typing import Any + +import pytest + +from pants.backend.python.util_rules.pex import rules as pex_rules +from pants.backend.tools.codespell.rules import CodespellRequest, PartitionInfo +from pants.backend.tools.codespell.rules import rules as codespell_rules +from pants.core.goals.lint import LintResult, Partitions +from pants.core.util_rules import config_files, external_tool, source_files +from pants.engine.fs import PathGlobs +from pants.engine.internals.native_engine import Snapshot +from pants.testutil.rule_runner import QueryRule, RuleRunner + + +@pytest.fixture +def rule_runner() -> RuleRunner: + return RuleRunner( + rules=[ + *codespell_rules(), + *config_files.rules(), + *source_files.rules(), + *external_tool.rules(), + *pex_rules(), + QueryRule(Partitions, [CodespellRequest.PartitionRequest]), + QueryRule(LintResult, [CodespellRequest.Batch]), + ], + ) + + +GOOD_FILE = """\ +This file has correct spelling. +No errors here. +""" + +BAD_FILE = """\ +This file has a speling error. +And also a teh typo. +""" + +CONFIG_FILE = """\ +[codespell] +ignore-words-list = speling +""" + +PYPROJECT_TOML_CONFIG = """\ +[tool.codespell] +ignore-words-list = "speling" +""" + +SETUP_CFG_CONFIG = """\ +[codespell] +ignore-words-list = speling +""" + + +def run_codespell( + rule_runner: RuleRunner, + *, + extra_args: list[str] | None = None, +) -> LintResult: + rule_runner.set_options( + ["--backend-packages=pants.backend.experimental.tools.codespell", *(extra_args or ())], + env_inherit={"PATH", "PYENV_ROOT", "HOME"}, + ) + snapshot = rule_runner.request(Snapshot, [PathGlobs(["**"])]) + partitions = rule_runner.request( + Partitions[Any, PartitionInfo], [CodespellRequest.PartitionRequest(snapshot.files)] + ) + assert len(partitions) >= 1 + # Run on all partitions and return the first non-zero result, or the last result + results = [] + for partition in partitions: + result = rule_runner.request( + LintResult, + [CodespellRequest.Batch("", partition.elements, partition_metadata=partition.metadata)], + ) + results.append(result) + if result.exit_code != 0: + return result + return results[-1] if results else results[0] + + +def test_passing(rule_runner: RuleRunner) -> None: + rule_runner.write_files({"test.txt": GOOD_FILE}) + result = run_codespell(rule_runner) + assert result.exit_code == 0 + + +def test_failing(rule_runner: RuleRunner) -> None: + rule_runner.write_files({"test.txt": BAD_FILE}) + result = run_codespell(rule_runner) + assert result.exit_code == 65 + assert "speling" in result.stdout or "speling" in result.stderr + assert "teh" in result.stdout or "teh" in result.stderr + + +def test_config_file_discovery(rule_runner: RuleRunner) -> None: + rule_runner.write_files( + { + "test.txt": BAD_FILE, + ".codespellrc": CONFIG_FILE, + } + ) + result = run_codespell(rule_runner) + # Should still fail because "teh" is not ignored, but "speling" is + assert result.exit_code == 65 + # "speling" should not appear in output since it's ignored + output = result.stdout + result.stderr + assert "teh" in output + + +def test_skip(rule_runner: RuleRunner) -> None: + rule_runner.write_files({"test.txt": BAD_FILE}) + # When skipped, partitions should be empty + rule_runner.set_options( + ["--backend-packages=pants.backend.experimental.tools.codespell", "--codespell-skip"], + env_inherit={"PATH", "PYENV_ROOT", "HOME"}, + ) + snapshot = rule_runner.request(Snapshot, [PathGlobs(["**"])]) + partitions = rule_runner.request( + Partitions[Any, PartitionInfo], [CodespellRequest.PartitionRequest(snapshot.files)] + ) + assert len(partitions) == 0 + + +def test_extra_args(rule_runner: RuleRunner) -> None: + rule_runner.write_files({"test.txt": BAD_FILE}) + result = run_codespell( + rule_runner, extra_args=["--codespell-args='--ignore-words-list=speling,teh'"] + ) + assert result.exit_code == 0 + + +def test_file_exclusion(rule_runner: RuleRunner) -> None: + rule_runner.write_files( + { + "good.txt": GOOD_FILE, + "bad.txt": BAD_FILE, + } + ) + # First verify that without exclusion, bad.txt is caught + result = run_codespell(rule_runner) + assert result.exit_code == 65 + output = result.stdout + result.stderr + assert "bad.txt" in output + + # Now verify that with exclusion, bad.txt is not checked + result = run_codespell(rule_runner, extra_args=["--codespell-exclude=['**/bad.txt']"]) + assert result.exit_code == 0 + + +def test_pyproject_toml_config(rule_runner: RuleRunner) -> None: + """Test that pyproject.toml config is discovered for files without .codespellrc.""" + rule_runner.write_files( + { + "test.txt": BAD_FILE, + "pyproject.toml": PYPROJECT_TOML_CONFIG, + } + ) + result = run_codespell(rule_runner) + # Should still fail because "teh" is not ignored, but "speling" is + assert result.exit_code == 65 + output = result.stdout + result.stderr + assert "test.txt" in output and "teh" in output + # "speling" in test.txt should not appear since it's ignored by pyproject.toml config + # (note: pyproject.toml itself may report "speling" since it's being scanned too) + assert "test.txt:1: speling" not in output + + +def test_setup_cfg_config(rule_runner: RuleRunner) -> None: + """Test that setup.cfg config is discovered for files without .codespellrc.""" + rule_runner.write_files( + { + "test.txt": BAD_FILE, + "setup.cfg": SETUP_CFG_CONFIG, + } + ) + result = run_codespell(rule_runner) + # Should still fail because "teh" is not ignored, but "speling" is + assert result.exit_code == 65 + output = result.stdout + result.stderr + assert "test.txt" in output and "teh" in output + # "speling" in test.txt should not appear since it's ignored by setup.cfg config + # (note: setup.cfg itself may report "speling" since it's being scanned too) + assert "test.txt:1: speling" not in output + + +def test_multiple_config_partitions(rule_runner: RuleRunner) -> None: + """Test that files are correctly partitioned by their nearest config file.""" + rule_runner.write_files( + { + "src/good.txt": GOOD_FILE, + "src/.codespellrc": CONFIG_FILE, # Ignores "speling" + "tests/bad.txt": BAD_FILE, + # tests/ has no config, so both "speling" and "teh" should be caught + } + ) + rule_runner.set_options( + ["--backend-packages=pants.backend.experimental.tools.codespell"], + env_inherit={"PATH", "PYENV_ROOT", "HOME"}, + ) + snapshot = rule_runner.request(Snapshot, [PathGlobs(["**"])]) + partitions = rule_runner.request( + Partitions[Any, PartitionInfo], [CodespellRequest.PartitionRequest(snapshot.files)] + ) + + # Should have 2 partitions: one for src/ (with config) and one for tests/ (without) + assert len(partitions) == 2 + + # Find the partition with the config + config_partition = None + default_partition = None + for p in partitions: + if p.metadata.config_snapshot is not None: + config_partition = p + else: + default_partition = p + + assert config_partition is not None + assert default_partition is not None + assert "src/good.txt" in config_partition.elements + assert "tests/bad.txt" in default_partition.elements diff --git a/src/python/pants/backend/tools/codespell/subsystem.py b/src/python/pants/backend/tools/codespell/subsystem.py new file mode 100644 index 00000000000..8343fffe564 --- /dev/null +++ b/src/python/pants/backend/tools/codespell/subsystem.py @@ -0,0 +1,83 @@ +# Copyright 2026 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import annotations + +from typing import Iterable + +from pants.backend.python.subsystems.python_tool_base import PythonToolBase +from pants.backend.python.target_types import ConsoleScript +from pants.core.goals.resolves import ExportableTool +from pants.core.util_rules.config_files import OrphanFilepathConfigBehavior +from pants.engine.rules import Rule, collect_rules +from pants.engine.unions import UnionRule +from pants.option.option_types import ( + ArgsListOption, + EnumOption, + SkipOption, + StrListOption, + StrOption, +) +from pants.util.strutil import softwrap + + +class Codespell(PythonToolBase): + name = "Codespell" + options_scope = "codespell" + help_short = "A tool to find common misspellings in text files (https://github.com/codespell-project/codespell)" + + default_main = ConsoleScript("codespell") + default_requirements = ["codespell>=2.2.6,<3", "tomli>=1.1.0; python_version < '3.11'"] + + register_interpreter_constraints = True + + register_lockfile = True + default_lockfile_resource = ("pants.backend.tools.codespell", "codespell.lock") + + skip = SkipOption("lint") + + args = ArgsListOption(example="--quiet-level=2 --ignore-words-list=word1,word2") + + config_file_name = StrOption( + "--config-file-name", + default=".codespellrc", + advanced=True, + help=softwrap( + """ + Name of a config file understood by codespell + (https://github.com/codespell-project/codespell#using-a-config-file). + The plugin will search the ancestors of each directory in which files are found + for a config file of this name. + """ + ), + ) + + orphan_files_behavior = EnumOption( + default=OrphanFilepathConfigBehavior.IGNORE, + advanced=True, + help=softwrap( + f""" + Whether to ignore, error or show a warning when files are found that are not + covered by the config file provided in `[{options_scope}].config_file_name` setting. + """ + ), + ) + + file_glob_include = StrListOption( + "--include", + default=["**/*"], + help="Glob patterns for files to check with codespell.", + ) + + file_glob_exclude = StrListOption( + "--exclude", + default=[], + help="Glob patterns for files to exclude from codespell checks.", + ) + + +def rules() -> Iterable[Rule | UnionRule]: + return [ + *collect_rules(), + UnionRule(ExportableTool, Codespell), + ] From 7414c5fd5984a16988638067755dd15ccc3d9005 Mon Sep 17 00:00:00 2001 From: Chris Burroughs Date: Thu, 8 Jan 2026 15:21:27 -0500 Subject: [PATCH 2/4] empty --- src/python/pants/backend/tools/codespell/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/python/pants/backend/tools/codespell/__init__.py b/src/python/pants/backend/tools/codespell/__init__.py index 9c3e8bdf10b..e69de29bb2d 100644 --- a/src/python/pants/backend/tools/codespell/__init__.py +++ b/src/python/pants/backend/tools/codespell/__init__.py @@ -1,2 +0,0 @@ -# Copyright 2026 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). From 719faf7a0437ea555afc8eaa24bf4096987deba3 Mon Sep 17 00:00:00 2001 From: Chris Burroughs Date: Thu, 8 Jan 2026 15:40:20 -0500 Subject: [PATCH 3/4] really empty --- .../pants/backend/experimental/tools/codespell/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/python/pants/backend/experimental/tools/codespell/__init__.py b/src/python/pants/backend/experimental/tools/codespell/__init__.py index 9c3e8bdf10b..e69de29bb2d 100644 --- a/src/python/pants/backend/experimental/tools/codespell/__init__.py +++ b/src/python/pants/backend/experimental/tools/codespell/__init__.py @@ -1,2 +0,0 @@ -# Copyright 2026 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). From f7c9c5e5ddaf2407556797265d11b37a3e47c4e0 Mon Sep 17 00:00:00 2001 From: Chris Burroughs Date: Fri, 30 Jan 2026 19:20:20 -0500 Subject: [PATCH 4/4] ver move --- docs/notes/2.31.x.md | 6 ------ docs/notes/2.32.x.md | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/notes/2.31.x.md b/docs/notes/2.31.x.md index 850173ecff1..6bfcec42050 100644 --- a/docs/notes/2.31.x.md +++ b/docs/notes/2.31.x.md @@ -33,12 +33,6 @@ This work stands on the shoulders of support from the [Science Projects](https:/ ### Backends -#### New: Codespell - -Pants now supports running [codespell](https://github.com/codespell-project/codespell) as a linter. `codespell` isn't a regular spell checker with a full dictionary, rather it is intended to be used on source code with a much smaller list of common misspelling. - -Enable the `pants.backend.experimental.tools.codespell` backend to try it out. - #### Helm #### JVM diff --git a/docs/notes/2.32.x.md b/docs/notes/2.32.x.md index 22fb17e510c..9aab1ed23a3 100644 --- a/docs/notes/2.32.x.md +++ b/docs/notes/2.32.x.md @@ -20,6 +20,12 @@ Thank you to [Klaviyo](https://www.klaviyo.com/) for their Platinum tier support ### Backends +#### New: Codespell + +Pants now supports running [codespell](https://github.com/codespell-project/codespell) as a linter. `codespell` isn't a regular spell checker with a full dictionary, rather it is intended to be used on source code with a much smaller list of common misspelling. + +Enable the `pants.backend.experimental.tools.codespell` backend to try it out. + #### Helm #### JVM