From 6851d4d542b994406667fc140e52a7c5248a585f Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Fri, 17 Apr 2026 16:50:59 -0500 Subject: [PATCH 1/8] REF: Log charge provenance once per unique molecule, and for whole molecule --- openff/interchange/smirnoff/_nonbonded.py | 91 +++++++++++++---------- 1 file changed, 50 insertions(+), 41 deletions(-) diff --git a/openff/interchange/smirnoff/_nonbonded.py b/openff/interchange/smirnoff/_nonbonded.py index 86d2a8406..dbb47d078 100644 --- a/openff/interchange/smirnoff/_nonbonded.py +++ b/openff/interchange/smirnoff/_nonbonded.py @@ -952,50 +952,11 @@ def store_matches( # Have this new key (on a duplicate molecule) point to the same potential # as the old key (on a unique/reference molecule) - if type(new_key) is LibraryChargeTopologyKey: - logger.info( - "Charge section LibraryCharges applied to topology atom index " - f"{topology_atom_index}", - ) - - elif type(new_key) is SingleAtomChargeTopologyKey: - if new_key.extras["handler"] == "ToolkitAM1BCCHandler": - logger.info( - "Charge section ToolkitAM1BCC, using charge method " - f"{new_key.extras['partial_charge_method']}, " - f"applied to topology atom index {topology_atom_index}", - ) - elif new_key.extras["handler"] == "NAGLChargesHandler": - logger.info( - "Charge section NAGLCharges, using NAGL model " - f"{new_key.extras['partial_charge_method']}, " - f"applied to topology atom index {topology_atom_index}", - ) - - elif new_key.extras["handler"] == "preset": - logger.info( - f"Preset charges applied to atom index {topology_atom_index}", - ) - - else: - raise ValueError(f"Unhandled handler {new_key.extras['handler']}") - - elif type(new_key) is ChargeModelTopologyKey: - logger.info( - "Charge section ChargeIncrementModel, using charge method " - f"{new_key.partial_charge_method}, " - f"applied to topology atom index {new_key.this_atom_index}", - ) - - elif type(new_key) is ChargeIncrementTopologyKey: - # here is where the actual increments could be logged - pass - - else: - raise ValueError(f"Unhandled key type {type(new_key)}") self.key_map[new_key] = matches[key] + self._log_charge_provenance(unique_molecule, new_key) + # this logic is all only to satisfy the allow_nonintegral_charges argument - could be more performant # to do it while assignment is happening if not allow_nonintegral_charges: @@ -1025,6 +986,54 @@ def store_matches( f"{formal_sum}.", ) + @staticmethod + def _log_charge_provenance( + unique_molecule: Molecule, + key: ChargeModelTopologyKey | ChargeModelTopologyKey | SingleAtomChargeTopologyKey | LibraryChargeTopologyKey, + ): + inchi = unique_molecule.to_inchi() + + if type(key) is LibraryChargeTopologyKey: + logger.info( + f"Charge section LibraryCharges applied to molecule with InChI {inchi}", + ) + + elif type(key) is SingleAtomChargeTopologyKey: + if key.extras["handler"] == "ToolkitAM1BCCHandler": + logger.info( + "Charge section ToolkitAM1BCC, using charge method " + f"{key.extras['partial_charge_method']}, " + f"applied to molecule with InChI {inchi}", + ) + elif key.extras["handler"] == "NAGLChargesHandler": + logger.info( + "Charge section NAGLCharges, using NAGL model " + f"{key.extras['partial_charge_method']}, " + f"applied to molecule with InChI {inchi}", + ) + + elif key.extras["handler"] == "preset": + logger.info( + f"Preset charges applied to molecule with InChI {inchi}", + ) + + else: + raise ValueError(f"Unhandled handler {key.extras['handler']}") + + elif type(key) is ChargeModelTopologyKey: + logger.info( + "Charge section ChargeIncrementModel, using charge method " + f"{key.partial_charge_method}, " + f"applied to topology atom index {key.this_atom_index}", + ) + + elif type(key) is ChargeIncrementTopologyKey: + # here is where the actual increments could be logged + pass + + else: + raise ValueError(f"Unhandled key type {type(key)}") + def store_potentials( self, parameter_handler: ElectrostaticsHandlerType | list[ElectrostaticsHandlerType], From 4e117f4d5532c3696176d5b057a355356c78be88 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Mon, 20 Apr 2026 13:07:42 -0500 Subject: [PATCH 2/8] TST: Update charge logging tests for new behavior --- .../test_charge_assignment_logging.py | 125 +++++++++++------- openff/interchange/smirnoff/_nonbonded.py | 7 +- 2 files changed, 80 insertions(+), 52 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py index b35ea3821..516a25b48 100644 --- a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py +++ b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py @@ -115,7 +115,7 @@ NAGL_KEY = "openff-gnn-am1bcc-1.0.0.pt" -def map_methods_to_atom_indices(caplog: pytest.LogCaptureFixture) -> dict[str, list[int]]: +def map_methods_to_inchi(caplog: pytest.LogCaptureFixture) -> dict[str, list[int]]: """ Map partial charge assignment methods to (sorted) atom indices. """ @@ -131,13 +131,21 @@ def map_methods_to_atom_indices(caplog: pytest.LogCaptureFixture) -> dict[str, l message = record.msg if message.startswith("Charge section LibraryCharges"): - info["library"].append(int(message.split("atom index ")[-1])) + _, molecule = message.split(", ") + + info["library"].append(molecule.split("InChI ")[-1]) elif message.startswith("Charge section ToolkitAM1BCC"): - info[message.split(", ")[1].split(" ")[-1]].append(int(message.split("atom index ")[-1])) + _, method, molecule = message.split(", ") + + assert method.split()[-1] == AM1BCC_KEY, f"Expected method {AM1BCC_KEY} but got {method.split()[-1]}" + + info[method.split()[-1]].append(molecule.split("InChI ")[-1]) elif message.startswith("Charge section NAGLCharges"): - info["NAGLChargesHandler"].append(int(message.split("atom index ")[-1])) + _, _, molecule = message.split(", ") + + info["NAGL"].append(molecule.split("InChI ")[-1]) # without also pulling the virtual site - particle mapping (which is different for each engine) # it's hard to store more information than the orientation atoms that are affected by each @@ -151,12 +159,13 @@ def map_methods_to_atom_indices(caplog: pytest.LogCaptureFixture) -> dict[str, l info["orientation"].append(atom) elif message.startswith("Preset charges"): - info["preset"].append(int(message.split("atom index")[-1])) + info["preset"].append(message.split("InChI ")[-1]) elif message.startswith("Charge section ChargeIncrementModel"): if "using charge method" in message: - info[f"chargeincrements_{message.split(',')[1].split(' ')[-1]}"].append( - int(message.split("atom index ")[-1]), + _, method, molecule = message.split(", ") + info[f"chargeincrements_{method.split()[-1]}"].append( + molecule.split("InChI ")[-1], ) elif "applying charge increment" in message: @@ -285,23 +294,27 @@ def test_case0(caplog, sage_no_nagl, ligand): with caplog.at_level(logging.INFO): sage_no_nagl.create_interchange(ligand.to_topology()) - info = map_methods_to_atom_indices(caplog) + info = map_methods_to_inchi(caplog) # atoms 0 through 8 are ethanol, getting AM1-BCC - assert info[AM1BCC_KEY] == [*range(0, 9)] + assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] def test_case1(caplog, sage_no_nagl, ligand_and_water_and_ions): with caplog.at_level(logging.INFO): sage_no_nagl.create_interchange(ligand_and_water_and_ions) - info = map_methods_to_atom_indices(caplog) + info = map_methods_to_inchi(caplog) # atoms 0 through 8 are ethanol, getting AM1-BCC - assert info[AM1BCC_KEY] == [*range(0, 9)] + assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] # atoms 9 through 21 are water + ions, getting library charges - assert info["library"] == [*range(9, 22)] + assert info["library"] == [ + "InChI=1S/ClH/h1H/p-1", + "InChI=1S/H2O/h1H2", + "InChI=1S/Na/q+1", + ] def test_case2(caplog, sage_no_nagl, ligand, solvent): @@ -310,10 +323,10 @@ def test_case2(caplog, sage_no_nagl, ligand, solvent): with caplog.at_level(logging.INFO): sage_no_nagl.create_interchange(topology) - info = map_methods_to_atom_indices(caplog) + info = map_methods_to_inchi(caplog) # everything should get AM1-BCC charges - assert info[AM1BCC_KEY] == [*range(0, topology.n_atoms)] + assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3", "InChI=1S/CH5N/c1-2/h2H2,1H3"] def test_case3(caplog, sage_no_nagl, ligand_and_water_and_ions, solvent): @@ -327,14 +340,18 @@ def test_case3(caplog, sage_no_nagl, ligand_and_water_and_ions, solvent): ligand_and_water_and_ions, ) - info = map_methods_to_atom_indices(caplog) + info = map_methods_to_inchi(caplog) # atoms 0 through 8 are ethanol, getting AM1-BCC, # and also solvent molecules (starting at index 22) - assert info[AM1BCC_KEY] == [*range(0, 9), *range(22, 22 + 3 * 7)] + assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3", "InChI=1S/CH5N/c1-2/h2H2,1H3"] # atoms 9 through 21 are water + ions, getting library charges - assert info["library"] == [*range(9, 22)] + assert info["library"] == [ + "InChI=1S/ClH/h1H/p-1", + "InChI=1S/H2O/h1H2", + "InChI=1S/Na/q+1", + ] @pytest.mark.slow @@ -356,23 +373,29 @@ def test_cases4_5(caplog, ligand_and_water_and_ions, preset_on_protein): else: ff.create_interchange(complex) - info = map_methods_to_atom_indices(caplog) + info = map_methods_to_inchi(caplog) - assert info[AM1BCC_KEY] == [*range(complex.molecule(0).n_atoms, complex.molecule(0).n_atoms + 9)] + assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] if preset_on_protein: # protein gets preset charges - assert info["preset"] == [*range(0, complex.molecule(0).n_atoms)] + assert info["preset"] == [ + "InChI=1S/C9H17N3O3/c1-5(8(14)10-4)12-9(15)6(2)11-7(3)13/h5-6H,1-4H3,(H,10,14)(H,11,13)(H,12,15)/t5-,6-/m0/s1", + ] # everything after the protein and ligand should get library charges assert info["library"] == [ - *range(complex.molecule(0).n_atoms + 9, complex.n_atoms), + "InChI=1S/ClH/h1H/p-1", + "InChI=1S/H2O/h1H2", + "InChI=1S/Na/q+1", ] else: # the protein and everything after the ligand should get library charges assert info["library"] == [ - *range(0, complex.molecule(0).n_atoms), - *range(complex.molecule(0).n_atoms + 9, complex.n_atoms), + "InChI=1S/C9H17N3O3/c1-5(8(14)10-4)12-9(15)6(2)11-7(3)13/h5-6H,1-4H3,(H,10,14)(H,11,13)(H,12,15)/t5-,6-/m0/s1", + "InChI=1S/ClH/h1H/p-1", + "InChI=1S/H2O/h1H2", + "InChI=1S/Na/q+1", ] @@ -384,13 +407,13 @@ def test_case6(caplog, ligand, water): with caplog.at_level(logging.INFO): force_field.create_interchange(topology) - info = map_methods_to_atom_indices(caplog) + info = map_methods_to_inchi(caplog) # atoms 0 through 8 are ethanol, getting AM1-BCC charges - assert info[AM1BCC_KEY] == [*range(0, 9)] + assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] # atoms 9 through 17 are water atoms, getting library charges - assert info["library"] == [*range(9, 18)] + assert info["library"] == ["InChI=1S/H2O/h1H2"] # particles 18 through 20 are water virtual sites, but the current logging strategy # makes it hard to match these up (and the particle indices are different OpenMM/GROMACS/etc) @@ -409,13 +432,17 @@ def test_case7(caplog, sage_no_nagl, ligand_and_water_and_ions): charge_from_molecules=[ligand_and_water_and_ions.molecule(0)], ) - info = map_methods_to_atom_indices(caplog) + info = map_methods_to_inchi(caplog) # atoms 0 through 8 are ethanol, getting preset charges - assert info["preset"] == [*range(0, 9)] + assert info["preset"] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] # atoms 9 through 21 are water + ions, getting library charges - assert info["library"] == [*range(9, 22)] + assert info["library"] == [ + "InChI=1S/ClH/h1H/p-1", + "InChI=1S/H2O/h1H2", + "InChI=1S/Na/q+1", + ] def test_case8(caplog, sage_no_nagl, water_and_ions): @@ -427,13 +454,13 @@ def test_case8(caplog, sage_no_nagl, water_and_ions): charge_from_molecules=[water_and_ions.molecule(0)], ) - info = map_methods_to_atom_indices(caplog) + info = map_methods_to_inchi(caplog) # atoms 0 through 8 are water, getting preset charges - assert info["preset"] == [*range(0, 9)] + assert info["preset"] == ["InChI=1S/H2O/h1H2"] # atoms 9 through 12 are ions, getting library charges - assert info["library"] == [*range(9, 13)] + assert info["library"] == ["InChI=1S/ClH/h1H/p-1", "InChI=1S/Na/q+1"] def test_case9(caplog, sage_with_bond_charge): @@ -444,10 +471,10 @@ def test_case9(caplog, sage_with_bond_charge): ).to_topology(), ) - info = map_methods_to_atom_indices(caplog) + info = map_methods_to_inchi(caplog) # atoms 0 through 5 are ligand, getting AM1-BCC charges - assert info[AM1BCC_KEY] == [*range(0, 5)] + assert info[AM1BCC_KEY] == ["InChI=1S/CH3Cl/c1-2/h1H3"] # atoms 0 and 1 are the orientation atoms of the sigma hole virtual site assert info["orientation"] == [0, 1] @@ -467,10 +494,10 @@ def test_case10(caplog, sage_with_nagl_chargeincrements, ligand): ligand.to_topology(), ) - info = map_methods_to_atom_indices(caplog) + info = map_methods_to_inchi(caplog) - # atoms 0 through 5 are ligand, getting NAGL charges - assert info[f"chargeincrements_{NAGL_KEY}"] == [*range(0, 9)] + # atoms 0 through 8 are ligand, getting NAGL charges + assert info[f"chargeincrements_{NAGL_KEY}"] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] # TODO: These are logged symmetrically (i.e. hydrogens are listed) # even though the charges appear to be correct, assert should @@ -489,11 +516,11 @@ def test_case11(caplog, sage, ligand): with caplog.at_level(logging.INFO): sage.create_interchange(ligand.to_topology()) - info = map_methods_to_atom_indices(caplog) + info = map_methods_to_inchi(caplog) # Should log NAGL charges for all atoms - assert "NAGLChargesHandler" in info - assert info["NAGLChargesHandler"] == [*range(0, ligand.n_atoms)] + assert "NAGL" in info + assert info["NAGL"] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] def test_case12(caplog, sage, water): @@ -505,11 +532,11 @@ def test_case12(caplog, sage, water): with caplog.at_level(logging.INFO): sage.create_interchange(topology) - info = map_methods_to_atom_indices(caplog) + info = map_methods_to_inchi(caplog) # Water should get library charges, not NAGL - assert info["library"] == [*range(0, 6)] # 2 water molecules, 3 atoms each - assert "NAGLChargesHandler" not in info + assert info["library"] == ["InChI=1S/H2O/h1H2"] # 2 water molecules + assert "NAGL" not in info def test_case13(caplog, sage, ligand, water): @@ -521,13 +548,13 @@ def test_case13(caplog, sage, ligand, water): with caplog.at_level(logging.INFO): sage.create_interchange(topology) - info = map_methods_to_atom_indices(caplog) + info = map_methods_to_inchi(caplog) # Ligand should get NAGL charges - assert info["NAGLChargesHandler"] == [*range(0, ligand.n_atoms)] + assert info["NAGL"] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] # Water should get library charges - assert info["library"] == [*range(ligand.n_atoms, ligand.n_atoms + water.n_atoms)] + assert info["library"] == ["InChI=1S/H2O/h1H2"] def test_case14(caplog, sage, ligand): @@ -542,8 +569,8 @@ def test_case14(caplog, sage, ligand): charge_from_molecules=[ligand], ) - info = map_methods_to_atom_indices(caplog) + info = map_methods_to_inchi(caplog) # Should use preset charges, not NAGL - assert info["preset"] == [*range(0, ligand.n_atoms)] - assert "NAGLChargesHandler" not in info + assert info["preset"] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] + assert "NAGL" not in info diff --git a/openff/interchange/smirnoff/_nonbonded.py b/openff/interchange/smirnoff/_nonbonded.py index dbb47d078..5476d6a6f 100644 --- a/openff/interchange/smirnoff/_nonbonded.py +++ b/openff/interchange/smirnoff/_nonbonded.py @@ -995,7 +995,7 @@ def _log_charge_provenance( if type(key) is LibraryChargeTopologyKey: logger.info( - f"Charge section LibraryCharges applied to molecule with InChI {inchi}", + f"Charge section LibraryCharges, applied to molecule with InChI {inchi}", ) elif type(key) is SingleAtomChargeTopologyKey: @@ -1024,11 +1024,12 @@ def _log_charge_provenance( logger.info( "Charge section ChargeIncrementModel, using charge method " f"{key.partial_charge_method}, " - f"applied to topology atom index {key.this_atom_index}", + f"applied to molecule with InChI {inchi}", ) elif type(key) is ChargeIncrementTopologyKey: - # here is where the actual increments could be logged + # here is where the individual increments could be logged at creation time, but they're + # also logged in _get_charges pass else: From f4f9fed5bb88163d0e8ec792708a1560742faa3e Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Mon, 20 Apr 2026 15:22:27 -0500 Subject: [PATCH 3/8] FIX: Do not go into logging logic unnecessarily, allow InChI failure --- openff/interchange/smirnoff/_nonbonded.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/openff/interchange/smirnoff/_nonbonded.py b/openff/interchange/smirnoff/_nonbonded.py index 5476d6a6f..c285a8377 100644 --- a/openff/interchange/smirnoff/_nonbonded.py +++ b/openff/interchange/smirnoff/_nonbonded.py @@ -955,7 +955,9 @@ def store_matches( self.key_map[new_key] = matches[key] - self._log_charge_provenance(unique_molecule, new_key) + # don't bother going through logging machinery if it's not opted-in to + if logger.isEnabledFor(logging.INFO): + self._log_charge_provenance(unique_molecule, new_key) # this logic is all only to satisfy the allow_nonintegral_charges argument - could be more performant # to do it while assignment is happening @@ -991,7 +993,15 @@ def _log_charge_provenance( unique_molecule: Molecule, key: ChargeModelTopologyKey | ChargeModelTopologyKey | SingleAtomChargeTopologyKey | LibraryChargeTopologyKey, ): - inchi = unique_molecule.to_inchi() + try: + inchi = unique_molecule.to_inchi() + except Exception as error: + logger.warning( + f"Could not generate InChI for molecule with Hill formula {unique_molecule.to_hill_formula()}. " + "This molecule's charge provenance will be logged without a valid InChI. " + f"Error was {error}", + ) + inchi = "UNKNOWN_INCHI" if type(key) is LibraryChargeTopologyKey: logger.info( From 9bf8f7c743ed188b4df727eda1a5eb1abe70e938 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Mon, 20 Apr 2026 17:50:25 -0500 Subject: [PATCH 4/8] TST: Add test covering InChI failure --- .../smirnoff/test_charge_assignment_logging.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py index 516a25b48..9e721ae40 100644 --- a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py +++ b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py @@ -574,3 +574,13 @@ def test_case14(caplog, sage, ligand): # Should use preset charges, not NAGL assert info["preset"] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] assert "NAGL" not in info + + +def test_inchi_fallback(caplog, sage): + """Test that molecules that fail InChI generation are still logged in some way.""" + + # TODO: Might be a toolkit bug that needs to be worked around + with caplog.at_level(logging.INFO): + sage.create_interchange( + Molecule.from_smiles(342 * "C").to_topology(), + ) From b76f3cab91142aa371a9eb87e4294d0d8a19e7a5 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Tue, 21 Apr 2026 15:27:46 -0500 Subject: [PATCH 5/8] TST: Test InChI fallback in logging --- .../smirnoff/test_charge_assignment_logging.py | 17 +++++++++++++++-- openff/interchange/smirnoff/_nonbonded.py | 5 +++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py index 9e721ae40..7096cd334 100644 --- a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py +++ b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py @@ -117,15 +117,18 @@ def map_methods_to_inchi(caplog: pytest.LogCaptureFixture) -> dict[str, list[int]]: """ - Map partial charge assignment methods to (sorted) atom indices. + Map partial charge assignment methods to (sorted) molecule InChI. """ info = defaultdict(list) for record in caplog.records: - # skip logged warnings from upstreams/other packages if record.name.startswith("openff.interchange"): + # This is a specific one-off logging event not relevant to this function + if "Could not generate InChI for molecule with Hill formula" in record.msg: + continue assert record.levelname == "INFO", "Only INFO logs are expected." else: + # skip logged warnings from upstreams/other packages continue message = record.msg @@ -578,9 +581,19 @@ def test_case14(caplog, sage, ligand): def test_inchi_fallback(caplog, sage): """Test that molecules that fail InChI generation are still logged in some way.""" + from openff.toolkit.utils.toolkits import OpenEyeToolkitWrapper # TODO: Might be a toolkit bug that needs to be worked around with caplog.at_level(logging.INFO): sage.create_interchange( Molecule.from_smiles(342 * "C").to_topology(), ) + + info = map_methods_to_inchi(caplog) + + if OpenEyeToolkitWrapper.is_available(): + assert info["NAGL"] == ["UNKNOWN_INCHI"] + else: + # RDKit can generate an InChI for this molecule, but it's very long + assert len(info["NAGL"]) == 1 + assert info["NAGL"][0].startswith("InChI=1S/C342H686/c1-3-5") diff --git a/openff/interchange/smirnoff/_nonbonded.py b/openff/interchange/smirnoff/_nonbonded.py index c285a8377..81df463ab 100644 --- a/openff/interchange/smirnoff/_nonbonded.py +++ b/openff/interchange/smirnoff/_nonbonded.py @@ -991,12 +991,13 @@ def store_matches( @staticmethod def _log_charge_provenance( unique_molecule: Molecule, - key: ChargeModelTopologyKey | ChargeModelTopologyKey | SingleAtomChargeTopologyKey | LibraryChargeTopologyKey, + key: ChargeModelTopologyKey | SingleAtomChargeTopologyKey | LibraryChargeTopologyKey, ): try: inchi = unique_molecule.to_inchi() except Exception as error: - logger.warning( + # In principle this is more like a warning, but the context is already logging.INFO, so why not? + logger.info( f"Could not generate InChI for molecule with Hill formula {unique_molecule.to_hill_formula()}. " "This molecule's charge provenance will be logged without a valid InChI. " f"Error was {error}", From 278fc30b4ebe5f9afaea80756efbf1adb9bf5417 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Tue, 21 Apr 2026 16:08:19 -0500 Subject: [PATCH 6/8] DOC: Update user docs for new charge assignment logging --- docs/using/advanced.md | 33 ++++++++++----------------------- docs/using/edges.md | 2 ++ 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/docs/using/advanced.md b/docs/using/advanced.md index 8623d02f7..6aea66a10 100644 --- a/docs/using/advanced.md +++ b/docs/using/advanced.md @@ -2,43 +2,30 @@ ## Charge assignment logging -SMIRNOFF force fields support several different partial charge assignment methods. These are applied, in the following order - -1. Look for preset charges from the `charge_from_molecules` argument -1. Look for chemical environment matches within the `` section -1. Look for chemical environment matches within the `` section -1. Try to run AM1-BCC according to the `` section or some variant - -If a molecule gets charges from one method, attempts to match charges for later methods are skipped. Note that preset charges override the force field and are not checked for consistency; any charges provided to the `charge_from_molecules` argument technically modify the force field. For more on how SMIRNOFF defines this behavior, see [this issue](https://github.com/openforcefield/standards/issues/68) and linked discussions. - -After all (mass-bearing) atoms have partial charges assigned, virtual sites are given charges by transferring charge from the atoms they are associated with ("orientation" atoms) according to the parameters of the force field. (The value of these parameters can be 0.0.) - -Given this complexity, it may be useful to track how each atom actually got charges assigned. Interchange has opt-in logging to track this behavior. This uses the [standard library `logging` module](https://docs.python.org/3/library/logging.html) at the `INFO` level. The easiest way to get started is by adding something like `logging.basicConfig(level=logging.INFO)` to the beginning of a script or program. For example, this script: +SMIRNOFF force fields support several different partial charge assignment methods and [employ a hierarchical scheme](smirnoff-charge-assignment-hierarchy) to determine which is used on each molecule. Given this complexity, it may be useful to track how each molecule's atoms actually had their charges assigned. (Note that, except for some complexity with ``, all atoms in a given molecule are assigned charges via the same method when using SMIRNOFF force fields.) Interchange has opt-in logging to track this behavior. This uses the [standard library `logging` module](https://docs.python.org/3/library/logging.html) at the `INFO` level. The easiest way to get started is by adding something like `logging.basicConfig(level=logging.INFO)` to the beginning of a script or program. For example, this script: ```python import logging -from openff.toolkit import ForceField, Molecule +from openff.toolkit import ForceField, Molecule, Topology logging.basicConfig(level=logging.INFO) ForceField("openff-2.2.0.offxml").create_interchange( - Molecule.from_smiles("CCO").to_topology() + Topology.from_molecules( + [ + Molecule.from_smiles("CCO"), + Molecule.from_smiles("O"), + ], + ) ) ``` will produce output including something like ```shell -INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 0 -INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 1 -INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 2 -INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 3 -INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 4 -INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 5 -INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 6 -INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 7 -INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 8 +INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bccelf10, applied to molecule with InChI InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3 +INFO:openff.interchange.smirnoff._nonbonded:Charge section LibraryCharges, applied to molecule with InChI InChI=1S/H2O/h1H2 ``` This functionality is only available with SMIRNOFF force fields. diff --git a/docs/using/edges.md b/docs/using/edges.md index 4462a771b..d9413394c 100644 --- a/docs/using/edges.md +++ b/docs/using/edges.md @@ -36,6 +36,8 @@ You may also wish to make the vdW cut-off distance longer. This is typically acc ## Quirks of charge assignment +(smirnoff-charge-assignment-hierarchy)= + ### Charge assignment hierarchy Interchange, following the [SMIRNOFF specification](https://openforcefield.github.io/standards/standards/smirnoff/#partial-charge-and-electrostatics-models), assigns charges to (heavy) atoms with the following priority: From 12f13349815b0f6c5e53e4338868729165a9c20d Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Mon, 27 Apr 2026 14:14:32 -0500 Subject: [PATCH 7/8] FIX: Use InChIKey and Hill formula in charge logging --- docs/using/advanced.md | 14 +- .../test_charge_assignment_logging.py | 202 +++++++++--------- openff/interchange/smirnoff/_nonbonded.py | 36 ++-- 3 files changed, 134 insertions(+), 118 deletions(-) diff --git a/docs/using/advanced.md b/docs/using/advanced.md index 6aea66a10..28b1dec11 100644 --- a/docs/using/advanced.md +++ b/docs/using/advanced.md @@ -2,20 +2,23 @@ ## Charge assignment logging -SMIRNOFF force fields support several different partial charge assignment methods and [employ a hierarchical scheme](smirnoff-charge-assignment-hierarchy) to determine which is used on each molecule. Given this complexity, it may be useful to track how each molecule's atoms actually had their charges assigned. (Note that, except for some complexity with ``, all atoms in a given molecule are assigned charges via the same method when using SMIRNOFF force fields.) Interchange has opt-in logging to track this behavior. This uses the [standard library `logging` module](https://docs.python.org/3/library/logging.html) at the `INFO` level. The easiest way to get started is by adding something like `logging.basicConfig(level=logging.INFO)` to the beginning of a script or program. For example, this script: +SMIRNOFF force fields support several different partial charge assignment methods and [employ a hierarchical scheme](smirnoff-charge-assignment-hierarchy) to determine which is used on each molecule. Given this complexity, it may be useful to track how each molecule's atoms actually had their charges assigned. (Note that, except for some complexity with ``, all atoms in a given molecule are assigned charges via the same method when using SMIRNOFF force fields.) Interchange has opt-in logging to track this behavior. This uses the [standard library `logging` module](https://docs.python.org/3/library/logging.html) at the `DEBUG` level. The easiest way to get started is by adding something like `logging.getLogger("openff.interchange").setLevel(logging.DEBUG)` to the beginning of a script or program. For example, this script: ```python import logging from openff.toolkit import ForceField, Molecule, Topology -logging.basicConfig(level=logging.INFO) +logging.basicConfig() +logging.getLogger("openff.interchange").setLevel(logging.DEBUG) -ForceField("openff-2.2.0.offxml").create_interchange( +ForceField("openff-2.3.0.offxml").create_interchange( Topology.from_molecules( [ Molecule.from_smiles("CCO"), Molecule.from_smiles("O"), + Molecule.from_smiles("O"), + Molecule.from_smiles(341 * "C"), ], ) ) @@ -24,8 +27,9 @@ ForceField("openff-2.2.0.offxml").create_interchange( will produce output including something like ```shell -INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bccelf10, applied to molecule with InChI InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3 -INFO:openff.interchange.smirnoff._nonbonded:Charge section LibraryCharges, applied to molecule with InChI InChI=1S/H2O/h1H2 +DEBUG:openff.interchange.smirnoff._nonbonded:Charge section NAGLCharges, using NAGL model openff-gnn-am1bcc-1.0.0.pt, applied to molecule with Hill formula C2H6O and InChIKey LFQSCWFLJHTTHZ-UHFFFAOYSA-N +DEBUG:openff.interchange.smirnoff._nonbonded:Charge section LibraryCharges, applied to molecule with Hill formula H2O and InChIKey XLYOFNOQVPJJNP-UHFFFAOYSA-N +DEBUG:openff.interchange.smirnoff._nonbonded:Charge section NAGLCharges, using NAGL model openff-gnn-am1bcc-1.0.0.pt, applied to molecule with Hill formula C341H684 and InChIKey CLLUCSPVKWTWLW-UHFFFAOYSA-N ``` This functionality is only available with SMIRNOFF force fields. diff --git a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py index 7096cd334..8aaa88567 100644 --- a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py +++ b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py @@ -114,19 +114,28 @@ AM1BCC_KEY = "am1bccelf10" if OPENEYE_AVAILABLE else "am1bcc" NAGL_KEY = "openff-gnn-am1bcc-1.0.0.pt" +INCHI_KEYS = { + "ethanol": "LFQSCWFLJHTTHZ-UHFFFAOYSA-N", + "water": "XLYOFNOQVPJJNP-UHFFFAOYSA-N", + "sodium": "FKNQFGJONOIPTF-UHFFFAOYSA-N", + "chloride": "VEXZGXHMUGYJMC-UHFFFAOYSA-M", + "methylamine": "BAVYZALUXZFZLV-UHFFFAOYSA-N", + "protein": "QIVKPXNXCAPCCO-WDSKDSINSA-N", +} -def map_methods_to_inchi(caplog: pytest.LogCaptureFixture) -> dict[str, list[int]]: + +def map_methods_to_inchikey(caplog: pytest.LogCaptureFixture) -> dict[str, list[int]]: """ Map partial charge assignment methods to (sorted) molecule InChI. """ - info = defaultdict(list) + debug = defaultdict(list) for record in caplog.records: if record.name.startswith("openff.interchange"): # This is a specific one-off logging event not relevant to this function - if "Could not generate InChI for molecule with Hill formula" in record.msg: + if "Could not generate InChIKey for molecule with Hill formula" in record.msg: continue - assert record.levelname == "INFO", "Only INFO logs are expected." + assert record.levelname == "DEBUG", "Only DEBUG logs are expected." else: # skip logged warnings from upstreams/other packages continue @@ -136,22 +145,22 @@ def map_methods_to_inchi(caplog: pytest.LogCaptureFixture) -> dict[str, list[int if message.startswith("Charge section LibraryCharges"): _, molecule = message.split(", ") - info["library"].append(molecule.split("InChI ")[-1]) + debug["library"].append(molecule.split("InChIKey ")[-1]) elif message.startswith("Charge section ToolkitAM1BCC"): _, method, molecule = message.split(", ") assert method.split()[-1] == AM1BCC_KEY, f"Expected method {AM1BCC_KEY} but got {method.split()[-1]}" - info[method.split()[-1]].append(molecule.split("InChI ")[-1]) + debug[method.split()[-1]].append(molecule.split("InChIKey ")[-1]) elif message.startswith("Charge section NAGLCharges"): _, _, molecule = message.split(", ") - info["NAGL"].append(molecule.split("InChI ")[-1]) + debug["NAGL"].append(molecule.split("InChIKey ")[-1]) # without also pulling the virtual site - particle mapping (which is different for each engine) - # it's hard to store more information than the orientation atoms that are affected by each + # it's hard to store more debugrmation than the orientation atoms that are affected by each # virtual site's charges elif message.startswith("Charge section VirtualSites"): orientation_atoms: list[int] = [ @@ -159,26 +168,26 @@ def map_methods_to_inchi(caplog: pytest.LogCaptureFixture) -> dict[str, list[int ] for atom in orientation_atoms: - info["orientation"].append(atom) + debug["orientation"].append(atom) elif message.startswith("Preset charges"): - info["preset"].append(message.split("InChI ")[-1]) + debug["preset"].append(message.split("InChIKey ")[-1]) elif message.startswith("Charge section ChargeIncrementModel"): if "using charge method" in message: _, method, molecule = message.split(", ") - info[f"chargeincrements_{method.split()[-1]}"].append( - molecule.split("InChI ")[-1], + debug[f"chargeincrements_{method.split()[-1]}"].append( + molecule.split("InChIKey ")[-1], ) elif "applying charge increment" in message: # TODO: Store the "other" atoms? - info["chargeincrements"].append(int(message.split("atom ")[1].split(" ")[0])) + debug["chargeincrements"].append(int(message.split("atom ")[1].split(" ")[0])) else: raise ValueError(f"Unexpected log message {message}") - return {key: sorted(val) for key, val in info.items()} + return {key: sorted(val) for key, val in debug.items()} def _ensure_pre_nagl_sage(sage: ForceField) -> ForceField: @@ -294,42 +303,42 @@ def ligand_and_water_and_ions(ligand, water_and_ions) -> Topology: def test_case0(caplog, sage_no_nagl, ligand): - with caplog.at_level(logging.INFO): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage_no_nagl.create_interchange(ligand.to_topology()) - info = map_methods_to_inchi(caplog) + debug = map_methods_to_inchikey(caplog) # atoms 0 through 8 are ethanol, getting AM1-BCC - assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] + assert debug[AM1BCC_KEY] == [INCHI_KEYS["ethanol"]] def test_case1(caplog, sage_no_nagl, ligand_and_water_and_ions): - with caplog.at_level(logging.INFO): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage_no_nagl.create_interchange(ligand_and_water_and_ions) - info = map_methods_to_inchi(caplog) + debug = map_methods_to_inchikey(caplog) # atoms 0 through 8 are ethanol, getting AM1-BCC - assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] + assert debug[AM1BCC_KEY] == [INCHI_KEYS["ethanol"]] # atoms 9 through 21 are water + ions, getting library charges - assert info["library"] == [ - "InChI=1S/ClH/h1H/p-1", - "InChI=1S/H2O/h1H2", - "InChI=1S/Na/q+1", + assert debug["library"] == [ + INCHI_KEYS["sodium"], + INCHI_KEYS["chloride"], + INCHI_KEYS["water"], ] def test_case2(caplog, sage_no_nagl, ligand, solvent): topology = Topology.from_molecules([ligand, solvent, solvent, solvent]) - with caplog.at_level(logging.INFO): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage_no_nagl.create_interchange(topology) - info = map_methods_to_inchi(caplog) + debug = map_methods_to_inchikey(caplog) # everything should get AM1-BCC charges - assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3", "InChI=1S/CH5N/c1-2/h2H2,1H3"] + assert debug[AM1BCC_KEY] == [INCHI_KEYS["methylamine"], INCHI_KEYS["ethanol"]] def test_case3(caplog, sage_no_nagl, ligand_and_water_and_ions, solvent): @@ -338,22 +347,22 @@ def test_case3(caplog, sage_no_nagl, ligand_and_water_and_ions, solvent): ligand_and_water_and_ions.molecule(0).assign_partial_charges(partial_charge_method="gasteiger") - with caplog.at_level(logging.INFO): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage_no_nagl.create_interchange( ligand_and_water_and_ions, ) - info = map_methods_to_inchi(caplog) + debug = map_methods_to_inchikey(caplog) # atoms 0 through 8 are ethanol, getting AM1-BCC, # and also solvent molecules (starting at index 22) - assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3", "InChI=1S/CH5N/c1-2/h2H2,1H3"] + assert debug[AM1BCC_KEY] == [INCHI_KEYS["methylamine"], INCHI_KEYS["ethanol"]] # atoms 9 through 21 are water + ions, getting library charges - assert info["library"] == [ - "InChI=1S/ClH/h1H/p-1", - "InChI=1S/H2O/h1H2", - "InChI=1S/Na/q+1", + assert debug["library"] == [ + INCHI_KEYS["sodium"], + INCHI_KEYS["chloride"], + INCHI_KEYS["water"], ] @@ -370,35 +379,35 @@ def test_cases4_5(caplog, ligand_and_water_and_ions, preset_on_protein): if preset_on_protein: complex.molecule(0).assign_partial_charges(partial_charge_method="zeros") - with caplog.at_level(logging.INFO): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): if preset_on_protein: ff.create_interchange(complex, charge_from_molecules=[complex.molecule(0)]) else: ff.create_interchange(complex) - info = map_methods_to_inchi(caplog) + debug = map_methods_to_inchikey(caplog) - assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] + assert debug[AM1BCC_KEY] == [INCHI_KEYS["ethanol"]] if preset_on_protein: # protein gets preset charges - assert info["preset"] == [ - "InChI=1S/C9H17N3O3/c1-5(8(14)10-4)12-9(15)6(2)11-7(3)13/h5-6H,1-4H3,(H,10,14)(H,11,13)(H,12,15)/t5-,6-/m0/s1", + assert debug["preset"] == [ + INCHI_KEYS["protein"], ] # everything after the protein and ligand should get library charges - assert info["library"] == [ - "InChI=1S/ClH/h1H/p-1", - "InChI=1S/H2O/h1H2", - "InChI=1S/Na/q+1", + assert debug["library"] == [ + INCHI_KEYS["sodium"], + INCHI_KEYS["chloride"], + INCHI_KEYS["water"], ] else: # the protein and everything after the ligand should get library charges - assert info["library"] == [ - "InChI=1S/C9H17N3O3/c1-5(8(14)10-4)12-9(15)6(2)11-7(3)13/h5-6H,1-4H3,(H,10,14)(H,11,13)(H,12,15)/t5-,6-/m0/s1", - "InChI=1S/ClH/h1H/p-1", - "InChI=1S/H2O/h1H2", - "InChI=1S/Na/q+1", + assert debug["library"] == [ + INCHI_KEYS["sodium"], + INCHI_KEYS["protein"], + INCHI_KEYS["chloride"], + INCHI_KEYS["water"], ] @@ -407,87 +416,87 @@ def test_case6(caplog, ligand, water): topology = Topology.from_molecules([ligand, water, water, water]) - with caplog.at_level(logging.INFO): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): force_field.create_interchange(topology) - info = map_methods_to_inchi(caplog) + debug = map_methods_to_inchikey(caplog) # atoms 0 through 8 are ethanol, getting AM1-BCC charges - assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] + assert debug[AM1BCC_KEY] == [INCHI_KEYS["ethanol"]] # atoms 9 through 17 are water atoms, getting library charges - assert info["library"] == ["InChI=1S/H2O/h1H2"] + assert debug["library"] == [INCHI_KEYS["water"]] # particles 18 through 20 are water virtual sites, but the current logging strategy # makes it hard to match these up (and the particle indices are different OpenMM/GROMACS/etc) # can still check that orientation atoms are subject to virtual site # charge increments (even if the increment is +0.0 e) - assert info["orientation"] == [*range(9, 18)] + assert debug["orientation"] == [*range(9, 18)] def test_case7(caplog, sage_no_nagl, ligand_and_water_and_ions): ligand_and_water_and_ions.molecule(0).assign_partial_charges(partial_charge_method="gasteiger") - with caplog.at_level(logging.INFO): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage_no_nagl.create_interchange( ligand_and_water_and_ions, charge_from_molecules=[ligand_and_water_and_ions.molecule(0)], ) - info = map_methods_to_inchi(caplog) + debug = map_methods_to_inchikey(caplog) # atoms 0 through 8 are ethanol, getting preset charges - assert info["preset"] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] + assert debug["preset"] == [INCHI_KEYS["ethanol"]] # atoms 9 through 21 are water + ions, getting library charges - assert info["library"] == [ - "InChI=1S/ClH/h1H/p-1", - "InChI=1S/H2O/h1H2", - "InChI=1S/Na/q+1", + assert debug["library"] == [ + INCHI_KEYS["sodium"], + INCHI_KEYS["chloride"], + INCHI_KEYS["water"], ] def test_case8(caplog, sage_no_nagl, water_and_ions): water_and_ions.molecule(0).assign_partial_charges(partial_charge_method="gasteiger") - with caplog.at_level(logging.INFO): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage_no_nagl.create_interchange( water_and_ions, charge_from_molecules=[water_and_ions.molecule(0)], ) - info = map_methods_to_inchi(caplog) + debug = map_methods_to_inchikey(caplog) # atoms 0 through 8 are water, getting preset charges - assert info["preset"] == ["InChI=1S/H2O/h1H2"] + assert debug["preset"] == [INCHI_KEYS["water"]] # atoms 9 through 12 are ions, getting library charges - assert info["library"] == ["InChI=1S/ClH/h1H/p-1", "InChI=1S/Na/q+1"] + assert debug["library"] == [INCHI_KEYS["sodium"], INCHI_KEYS["chloride"]] def test_case9(caplog, sage_with_bond_charge): - with caplog.at_level(logging.INFO): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): _ensure_pre_nagl_sage(sage_with_bond_charge).create_interchange( Molecule.from_mapped_smiles( "[H:3][C:1]([H:4])([H:5])[Cl:2]", ).to_topology(), ) - info = map_methods_to_inchi(caplog) + debug = map_methods_to_inchikey(caplog) # atoms 0 through 5 are ligand, getting AM1-BCC charges - assert info[AM1BCC_KEY] == ["InChI=1S/CH3Cl/c1-2/h1H3"] + assert debug[AM1BCC_KEY] == ["NEHMKBQYUWJMIP-UHFFFAOYSA-N"] # atoms 0 and 1 are the orientation atoms of the sigma hole virtual site - assert info["orientation"] == [0, 1] + assert debug["orientation"] == [0, 1] def test_case10(caplog, sage_with_nagl_chargeincrements, ligand): pytest.importorskip("openff.nagl") pytest.importorskip("rdkit") - with caplog.at_level(logging.INFO): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): with toolkit_registry_manager( toolkit_registry=ToolkitRegistry( toolkit_precedence=[NAGLToolkitWrapper, RDKitToolkitWrapper], @@ -497,33 +506,33 @@ def test_case10(caplog, sage_with_nagl_chargeincrements, ligand): ligand.to_topology(), ) - info = map_methods_to_inchi(caplog) + debug = map_methods_to_inchikey(caplog) # atoms 0 through 8 are ligand, getting NAGL charges - assert info[f"chargeincrements_{NAGL_KEY}"] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] + assert debug[f"chargeincrements_{NAGL_KEY}"] == ["LFQSCWFLJHTTHZ-UHFFFAOYSA-N"] # TODO: These are logged symmetrically (i.e. hydrogens are listed) # even though the charges appear to be correct, assert should # simply by == [0, 1] since the hydrogens shouldn't be caught - assert 0 in info["chargeincrements"] - assert 1 in info["chargeincrements"] + assert 0 in debug["chargeincrements"] + assert 1 in debug["chargeincrements"] # the standard AM1-BCC should not have ran - assert AM1BCC_KEY not in info + assert AM1BCC_KEY not in debug def test_case11(caplog, sage, ligand): """Test that NAGL charge assignment is properly logged.""" pytest.importorskip("openff.nagl") - with caplog.at_level(logging.INFO): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage.create_interchange(ligand.to_topology()) - info = map_methods_to_inchi(caplog) + debug = map_methods_to_inchikey(caplog) # Should log NAGL charges for all atoms - assert "NAGL" in info - assert info["NAGL"] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] + assert "NAGL" in debug + assert debug["NAGL"] == [INCHI_KEYS["ethanol"]] def test_case12(caplog, sage, water): @@ -532,14 +541,14 @@ def test_case12(caplog, sage, water): topology = Topology.from_molecules([water, water]) - with caplog.at_level(logging.INFO): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage.create_interchange(topology) - info = map_methods_to_inchi(caplog) + debug = map_methods_to_inchikey(caplog) # Water should get library charges, not NAGL - assert info["library"] == ["InChI=1S/H2O/h1H2"] # 2 water molecules - assert "NAGL" not in info + assert debug["library"] == [INCHI_KEYS["water"]] # 2 water molecules + assert "NAGL" not in debug def test_case13(caplog, sage, ligand, water): @@ -548,16 +557,16 @@ def test_case13(caplog, sage, ligand, water): topology = Topology.from_molecules([ligand, water]) - with caplog.at_level(logging.INFO): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage.create_interchange(topology) - info = map_methods_to_inchi(caplog) + debug = map_methods_to_inchikey(caplog) # Ligand should get NAGL charges - assert info["NAGL"] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] + assert debug["NAGL"] == [INCHI_KEYS["ethanol"]] # Water should get library charges - assert info["library"] == ["InChI=1S/H2O/h1H2"] + assert debug["library"] == [INCHI_KEYS["water"]] def test_case14(caplog, sage, ligand): @@ -566,34 +575,33 @@ def test_case14(caplog, sage, ligand): ligand.assign_partial_charges("gasteiger") - with caplog.at_level(logging.INFO): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage.create_interchange( ligand.to_topology(), charge_from_molecules=[ligand], ) - info = map_methods_to_inchi(caplog) + debug = map_methods_to_inchikey(caplog) # Should use preset charges, not NAGL - assert info["preset"] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"] - assert "NAGL" not in info + assert debug["preset"] == [INCHI_KEYS["ethanol"]] + assert "NAGL" not in debug def test_inchi_fallback(caplog, sage): - """Test that molecules that fail InChI generation are still logged in some way.""" + """Test that molecules that fail InChIKey generation are still logged in some way.""" from openff.toolkit.utils.toolkits import OpenEyeToolkitWrapper # TODO: Might be a toolkit bug that needs to be worked around - with caplog.at_level(logging.INFO): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage.create_interchange( Molecule.from_smiles(342 * "C").to_topology(), ) - info = map_methods_to_inchi(caplog) + debug = map_methods_to_inchikey(caplog) if OpenEyeToolkitWrapper.is_available(): - assert info["NAGL"] == ["UNKNOWN_INCHI"] + assert debug["NAGL"] == ["UNKNOWN_INCHI"] else: - # RDKit can generate an InChI for this molecule, but it's very long - assert len(info["NAGL"]) == 1 - assert info["NAGL"][0].startswith("InChI=1S/C342H686/c1-3-5") + # but RDKit can generate an InChIKey + assert debug["NAGL"] == ["FGIJOQKVOYCTPJ-UHFFFAOYSA-N"] diff --git a/openff/interchange/smirnoff/_nonbonded.py b/openff/interchange/smirnoff/_nonbonded.py index 60e76f5df..9f5afd1e5 100644 --- a/openff/interchange/smirnoff/_nonbonded.py +++ b/openff/interchange/smirnoff/_nonbonded.py @@ -978,49 +978,53 @@ def _log_charge_provenance( unique_molecule: Molecule, key: ChargeModelTopologyKey | SingleAtomChargeTopologyKey | LibraryChargeTopologyKey, ): + hill_formaula = unique_molecule.to_hill_formula() + try: - inchi = unique_molecule.to_inchi() + inchikey = unique_molecule.to_inchikey() except Exception as error: - # In principle this is more like a warning, but the context is already logging.INFO, so why not? - logger.info( - f"Could not generate InChI for molecule with Hill formula {unique_molecule.to_hill_formula()}. " - "This molecule's charge provenance will be logged without a valid InChI. " + # In principle this is more like a warning, but the context is already logging.DEBUG, so why not? + logger.debug( + f"Could not generate InChIKey for molecule with Hill formula {hill_formaula}. " + "This molecule's charge provenance will be logged without a valid InChIKey. " f"Error was {error}", ) - inchi = "UNKNOWN_INCHI" + inchikey = "UNKNOWN_INCHIKEY" + + id_string = f"molecule with Hill formula {hill_formaula} and InChIKey {inchikey}" if type(key) is LibraryChargeTopologyKey: - logger.info( - f"Charge section LibraryCharges, applied to molecule with InChI {inchi}", + logger.debug( + "Charge section LibraryCharges, applied to " + id_string, ) elif type(key) is SingleAtomChargeTopologyKey: if key.extras["handler"] == "ToolkitAM1BCCHandler": - logger.info( + logger.debug( "Charge section ToolkitAM1BCC, using charge method " f"{key.extras['partial_charge_method']}, " - f"applied to molecule with InChI {inchi}", + f"applied to " + id_string, ) elif key.extras["handler"] == "NAGLChargesHandler": - logger.info( + logger.debug( "Charge section NAGLCharges, using NAGL model " f"{key.extras['partial_charge_method']}, " - f"applied to molecule with InChI {inchi}", + f"applied to " + id_string, ) elif key.extras["handler"] == "preset": - logger.info( - f"Preset charges applied to molecule with InChI {inchi}", + logger.debug( + "Preset charges applied to " + id_string, ) else: raise ValueError(f"Unhandled handler {key.extras['handler']}") elif type(key) is ChargeModelTopologyKey: - logger.info( + logger.debug( "Charge section ChargeIncrementModel, using charge method " f"{key.partial_charge_method}, " - f"applied to molecule with InChI {inchi}", + f"applied to " + id_string, ) elif type(key) is ChargeIncrementTopologyKey: From 33a1911c745ae7d4a90dc61acc4e1826fb88bc0d Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Mon, 27 Apr 2026 14:28:47 -0500 Subject: [PATCH 8/8] TST: Update test --- .../unit_tests/smirnoff/test_charge_assignment_logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py index 8aaa88567..a823ed7e2 100644 --- a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py +++ b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py @@ -601,7 +601,7 @@ def test_inchi_fallback(caplog, sage): debug = map_methods_to_inchikey(caplog) if OpenEyeToolkitWrapper.is_available(): - assert debug["NAGL"] == ["UNKNOWN_INCHI"] + assert debug["NAGL"] == ["UNKNOWN_INCHIKEY"] else: # but RDKit can generate an InChIKey assert debug["NAGL"] == ["FGIJOQKVOYCTPJ-UHFFFAOYSA-N"]