diff --git a/docs/using/advanced.md b/docs/using/advanced.md index a409efb45..28b1dec11 100644 --- a/docs/using/advanced.md +++ b/docs/using/advanced.md @@ -2,44 +2,34 @@ ## Charge assignment logging -SMIRNOFF force fields support several different partial charge assignment methods. These are applied, in the following order - -1. Look for preset charges from the `charge_from_molecules` argument -1. Look for chemical environment matches within the `` section -1. Look for chemical environment matches within the `` section -1. Try to run AM1-BCC according to the `` section or some variant - -If a molecule gets charges from one method, attempts to match charges for later methods are skipped. Note that preset charges override the force field and are not checked for consistency; any charges provided to the `charge_from_molecules` argument technically modify the force field. For more on how SMIRNOFF defines this behavior, see [this issue](https://github.com/openforcefield/standards/issues/68) and linked discussions. - -After all (mass-bearing) atoms have partial charges assigned, virtual sites are given charges by transferring charge from the atoms they are associated with ("orientation" atoms) according to the parameters of the force field. (The value of these parameters can be 0.0.) - -Given this complexity, it may be useful to track how each atom actually got charges assigned. Interchange has opt-in logging to track this behavior. This uses the [standard library `logging` module](https://docs.python.org/3/library/logging.html) at the `logging.DEBUG` level. The easiest way to get started is by adding something like `logging.getLogger("openff.interchange").setLevel(logging.DEBUG)` to the beginning of a script or program. For example, this script: +SMIRNOFF force fields support several different partial charge assignment methods and [employ a hierarchical scheme](smirnoff-charge-assignment-hierarchy) to determine which is used on each molecule. Given this complexity, it may be useful to track how each molecule's atoms actually had their charges assigned. (Note that, except for some complexity with ``, all atoms in a given molecule are assigned charges via the same method when using SMIRNOFF force fields.) Interchange has opt-in logging to track this behavior. This uses the [standard library `logging` module](https://docs.python.org/3/library/logging.html) at the `DEBUG` level. The easiest way to get started is by adding something like `logging.getLogger("openff.interchange").setLevel(logging.DEBUG)` to the beginning of a script or program. For example, this script: ```python import logging -from openff.toolkit import ForceField, Molecule +from openff.toolkit import ForceField, Molecule, Topology logging.basicConfig() logging.getLogger("openff.interchange").setLevel(logging.DEBUG) -ForceField("openff-2.2.0.offxml").create_interchange( - Molecule.from_smiles("CCO").to_topology() +ForceField("openff-2.3.0.offxml").create_interchange( + Topology.from_molecules( + [ + Molecule.from_smiles("CCO"), + Molecule.from_smiles("O"), + Molecule.from_smiles("O"), + Molecule.from_smiles(341 * "C"), + ], + ) ) ``` will produce output including something like ```shell -DEBUG:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bccelf10, applied to topology atom index 0 -DEBUG:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bccelf10, applied to topology atom index 1 -DEBUG:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bccelf10, applied to topology atom index 2 -DEBUG:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bccelf10, applied to topology atom index 3 -DEBUG:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bccelf10, applied to topology atom index 4 -DEBUG:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bccelf10, applied to topology atom index 5 -DEBUG:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bccelf10, applied to topology atom index 6 -DEBUG:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bccelf10, applied to topology atom index 7 -DEBUG:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bccelf10, applied to topology atom index 8 +DEBUG:openff.interchange.smirnoff._nonbonded:Charge section NAGLCharges, using NAGL model openff-gnn-am1bcc-1.0.0.pt, applied to molecule with Hill formula C2H6O and InChIKey LFQSCWFLJHTTHZ-UHFFFAOYSA-N +DEBUG:openff.interchange.smirnoff._nonbonded:Charge section LibraryCharges, applied to molecule with Hill formula H2O and InChIKey XLYOFNOQVPJJNP-UHFFFAOYSA-N +DEBUG:openff.interchange.smirnoff._nonbonded:Charge section NAGLCharges, using NAGL model openff-gnn-am1bcc-1.0.0.pt, applied to molecule with Hill formula C341H684 and InChIKey CLLUCSPVKWTWLW-UHFFFAOYSA-N ``` This functionality is only available with SMIRNOFF force fields. diff --git a/docs/using/edges.md b/docs/using/edges.md index 4462a771b..d9413394c 100644 --- a/docs/using/edges.md +++ b/docs/using/edges.md @@ -36,6 +36,8 @@ You may also wish to make the vdW cut-off distance longer. This is typically acc ## Quirks of charge assignment +(smirnoff-charge-assignment-hierarchy)= + ### Charge assignment hierarchy Interchange, following the [SMIRNOFF specification](https://openforcefield.github.io/standards/standards/smirnoff/#partial-charge-and-electrostatics-models), assigns charges to (heavy) atoms with the following priority: diff --git a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py index 956761d4b..a823ed7e2 100644 --- a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py +++ b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py @@ -114,33 +114,53 @@ AM1BCC_KEY = "am1bccelf10" if OPENEYE_AVAILABLE else "am1bcc" NAGL_KEY = "openff-gnn-am1bcc-1.0.0.pt" +INCHI_KEYS = { + "ethanol": "LFQSCWFLJHTTHZ-UHFFFAOYSA-N", + "water": "XLYOFNOQVPJJNP-UHFFFAOYSA-N", + "sodium": "FKNQFGJONOIPTF-UHFFFAOYSA-N", + "chloride": "VEXZGXHMUGYJMC-UHFFFAOYSA-M", + "methylamine": "BAVYZALUXZFZLV-UHFFFAOYSA-N", + "protein": "QIVKPXNXCAPCCO-WDSKDSINSA-N", +} -def map_methods_to_atom_indices(caplog: pytest.LogCaptureFixture) -> dict[str, list[int]]: + +def map_methods_to_inchikey(caplog: pytest.LogCaptureFixture) -> dict[str, list[int]]: """ - Map partial charge assignment methods to (sorted) atom indices. + Map partial charge assignment methods to (sorted) molecule InChI. """ - info = defaultdict(list) + debug = defaultdict(list) for record in caplog.records: - # skip logged warnings from upstreams/other packages if record.name.startswith("openff.interchange"): + # This is a specific one-off logging event not relevant to this function + if "Could not generate InChIKey for molecule with Hill formula" in record.msg: + continue assert record.levelname == "DEBUG", "Only DEBUG logs are expected." else: + # skip logged warnings from upstreams/other packages continue message = record.msg if message.startswith("Charge section LibraryCharges"): - info["library"].append(int(message.split("atom index ")[-1])) + _, molecule = message.split(", ") + + debug["library"].append(molecule.split("InChIKey ")[-1]) elif message.startswith("Charge section ToolkitAM1BCC"): - info[message.split(", ")[1].split(" ")[-1]].append(int(message.split("atom index ")[-1])) + _, method, molecule = message.split(", ") + + assert method.split()[-1] == AM1BCC_KEY, f"Expected method {AM1BCC_KEY} but got {method.split()[-1]}" + + debug[method.split()[-1]].append(molecule.split("InChIKey ")[-1]) elif message.startswith("Charge section NAGLCharges"): - info["NAGLChargesHandler"].append(int(message.split("atom index ")[-1])) + _, _, molecule = message.split(", ") + + debug["NAGL"].append(molecule.split("InChIKey ")[-1]) # without also pulling the virtual site - particle mapping (which is different for each engine) - # it's hard to store more information than the orientation atoms that are affected by each + # it's hard to store more debugrmation than the orientation atoms that are affected by each # virtual site's charges elif message.startswith("Charge section VirtualSites"): orientation_atoms: list[int] = [ @@ -148,25 +168,26 @@ def map_methods_to_atom_indices(caplog: pytest.LogCaptureFixture) -> dict[str, l ] for atom in orientation_atoms: - info["orientation"].append(atom) + debug["orientation"].append(atom) elif message.startswith("Preset charges"): - info["preset"].append(int(message.split("atom index")[-1])) + debug["preset"].append(message.split("InChIKey ")[-1]) elif message.startswith("Charge section ChargeIncrementModel"): if "using charge method" in message: - info[f"chargeincrements_{message.split(',')[1].split(' ')[-1]}"].append( - int(message.split("atom index ")[-1]), + _, method, molecule = message.split(", ") + debug[f"chargeincrements_{method.split()[-1]}"].append( + molecule.split("InChIKey ")[-1], ) elif "applying charge increment" in message: # TODO: Store the "other" atoms? - info["chargeincrements"].append(int(message.split("atom ")[1].split(" ")[0])) + debug["chargeincrements"].append(int(message.split("atom ")[1].split(" ")[0])) else: raise ValueError(f"Unexpected log message {message}") - return {key: sorted(val) for key, val in info.items()} + return {key: sorted(val) for key, val in debug.items()} def _ensure_pre_nagl_sage(sage: ForceField) -> ForceField: @@ -282,38 +303,42 @@ def ligand_and_water_and_ions(ligand, water_and_ions) -> Topology: def test_case0(caplog, sage_no_nagl, ligand): - with caplog.at_level(logging.DEBUG): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage_no_nagl.create_interchange(ligand.to_topology()) - info = map_methods_to_atom_indices(caplog) + debug = map_methods_to_inchikey(caplog) # atoms 0 through 8 are ethanol, getting AM1-BCC - assert info[AM1BCC_KEY] == [*range(0, 9)] + assert debug[AM1BCC_KEY] == [INCHI_KEYS["ethanol"]] def test_case1(caplog, sage_no_nagl, ligand_and_water_and_ions): - with caplog.at_level(logging.DEBUG): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage_no_nagl.create_interchange(ligand_and_water_and_ions) - info = map_methods_to_atom_indices(caplog) + debug = map_methods_to_inchikey(caplog) # atoms 0 through 8 are ethanol, getting AM1-BCC - assert info[AM1BCC_KEY] == [*range(0, 9)] + assert debug[AM1BCC_KEY] == [INCHI_KEYS["ethanol"]] # atoms 9 through 21 are water + ions, getting library charges - assert info["library"] == [*range(9, 22)] + assert debug["library"] == [ + INCHI_KEYS["sodium"], + INCHI_KEYS["chloride"], + INCHI_KEYS["water"], + ] def test_case2(caplog, sage_no_nagl, ligand, solvent): topology = Topology.from_molecules([ligand, solvent, solvent, solvent]) - with caplog.at_level(logging.DEBUG): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage_no_nagl.create_interchange(topology) - info = map_methods_to_atom_indices(caplog) + debug = map_methods_to_inchikey(caplog) # everything should get AM1-BCC charges - assert info[AM1BCC_KEY] == [*range(0, topology.n_atoms)] + assert debug[AM1BCC_KEY] == [INCHI_KEYS["methylamine"], INCHI_KEYS["ethanol"]] def test_case3(caplog, sage_no_nagl, ligand_and_water_and_ions, solvent): @@ -322,19 +347,23 @@ def test_case3(caplog, sage_no_nagl, ligand_and_water_and_ions, solvent): ligand_and_water_and_ions.molecule(0).assign_partial_charges(partial_charge_method="gasteiger") - with caplog.at_level(logging.DEBUG): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage_no_nagl.create_interchange( ligand_and_water_and_ions, ) - info = map_methods_to_atom_indices(caplog) + debug = map_methods_to_inchikey(caplog) # atoms 0 through 8 are ethanol, getting AM1-BCC, # and also solvent molecules (starting at index 22) - assert info[AM1BCC_KEY] == [*range(0, 9), *range(22, 22 + 3 * 7)] + assert debug[AM1BCC_KEY] == [INCHI_KEYS["methylamine"], INCHI_KEYS["ethanol"]] # atoms 9 through 21 are water + ions, getting library charges - assert info["library"] == [*range(9, 22)] + assert debug["library"] == [ + INCHI_KEYS["sodium"], + INCHI_KEYS["chloride"], + INCHI_KEYS["water"], + ] @pytest.mark.slow @@ -350,29 +379,35 @@ def test_cases4_5(caplog, ligand_and_water_and_ions, preset_on_protein): if preset_on_protein: complex.molecule(0).assign_partial_charges(partial_charge_method="zeros") - with caplog.at_level(logging.DEBUG): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): if preset_on_protein: ff.create_interchange(complex, charge_from_molecules=[complex.molecule(0)]) else: ff.create_interchange(complex) - info = map_methods_to_atom_indices(caplog) + debug = map_methods_to_inchikey(caplog) - assert info[AM1BCC_KEY] == [*range(complex.molecule(0).n_atoms, complex.molecule(0).n_atoms + 9)] + assert debug[AM1BCC_KEY] == [INCHI_KEYS["ethanol"]] if preset_on_protein: # protein gets preset charges - assert info["preset"] == [*range(0, complex.molecule(0).n_atoms)] + assert debug["preset"] == [ + INCHI_KEYS["protein"], + ] # everything after the protein and ligand should get library charges - assert info["library"] == [ - *range(complex.molecule(0).n_atoms + 9, complex.n_atoms), + assert debug["library"] == [ + INCHI_KEYS["sodium"], + INCHI_KEYS["chloride"], + INCHI_KEYS["water"], ] else: # the protein and everything after the ligand should get library charges - assert info["library"] == [ - *range(0, complex.molecule(0).n_atoms), - *range(complex.molecule(0).n_atoms + 9, complex.n_atoms), + assert debug["library"] == [ + INCHI_KEYS["sodium"], + INCHI_KEYS["protein"], + INCHI_KEYS["chloride"], + INCHI_KEYS["water"], ] @@ -381,83 +416,87 @@ def test_case6(caplog, ligand, water): topology = Topology.from_molecules([ligand, water, water, water]) - with caplog.at_level(logging.DEBUG): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): force_field.create_interchange(topology) - info = map_methods_to_atom_indices(caplog) + debug = map_methods_to_inchikey(caplog) # atoms 0 through 8 are ethanol, getting AM1-BCC charges - assert info[AM1BCC_KEY] == [*range(0, 9)] + assert debug[AM1BCC_KEY] == [INCHI_KEYS["ethanol"]] # atoms 9 through 17 are water atoms, getting library charges - assert info["library"] == [*range(9, 18)] + assert debug["library"] == [INCHI_KEYS["water"]] # particles 18 through 20 are water virtual sites, but the current logging strategy # makes it hard to match these up (and the particle indices are different OpenMM/GROMACS/etc) # can still check that orientation atoms are subject to virtual site # charge increments (even if the increment is +0.0 e) - assert info["orientation"] == [*range(9, 18)] + assert debug["orientation"] == [*range(9, 18)] def test_case7(caplog, sage_no_nagl, ligand_and_water_and_ions): ligand_and_water_and_ions.molecule(0).assign_partial_charges(partial_charge_method="gasteiger") - with caplog.at_level(logging.DEBUG): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage_no_nagl.create_interchange( ligand_and_water_and_ions, charge_from_molecules=[ligand_and_water_and_ions.molecule(0)], ) - info = map_methods_to_atom_indices(caplog) + debug = map_methods_to_inchikey(caplog) # atoms 0 through 8 are ethanol, getting preset charges - assert info["preset"] == [*range(0, 9)] + assert debug["preset"] == [INCHI_KEYS["ethanol"]] # atoms 9 through 21 are water + ions, getting library charges - assert info["library"] == [*range(9, 22)] + assert debug["library"] == [ + INCHI_KEYS["sodium"], + INCHI_KEYS["chloride"], + INCHI_KEYS["water"], + ] def test_case8(caplog, sage_no_nagl, water_and_ions): water_and_ions.molecule(0).assign_partial_charges(partial_charge_method="gasteiger") - with caplog.at_level(logging.DEBUG): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage_no_nagl.create_interchange( water_and_ions, charge_from_molecules=[water_and_ions.molecule(0)], ) - info = map_methods_to_atom_indices(caplog) + debug = map_methods_to_inchikey(caplog) # atoms 0 through 8 are water, getting preset charges - assert info["preset"] == [*range(0, 9)] + assert debug["preset"] == [INCHI_KEYS["water"]] # atoms 9 through 12 are ions, getting library charges - assert info["library"] == [*range(9, 13)] + assert debug["library"] == [INCHI_KEYS["sodium"], INCHI_KEYS["chloride"]] def test_case9(caplog, sage_with_bond_charge): - with caplog.at_level(logging.DEBUG): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): _ensure_pre_nagl_sage(sage_with_bond_charge).create_interchange( Molecule.from_mapped_smiles( "[H:3][C:1]([H:4])([H:5])[Cl:2]", ).to_topology(), ) - info = map_methods_to_atom_indices(caplog) + debug = map_methods_to_inchikey(caplog) # atoms 0 through 5 are ligand, getting AM1-BCC charges - assert info[AM1BCC_KEY] == [*range(0, 5)] + assert debug[AM1BCC_KEY] == ["NEHMKBQYUWJMIP-UHFFFAOYSA-N"] # atoms 0 and 1 are the orientation atoms of the sigma hole virtual site - assert info["orientation"] == [0, 1] + assert debug["orientation"] == [0, 1] def test_case10(caplog, sage_with_nagl_chargeincrements, ligand): pytest.importorskip("openff.nagl") pytest.importorskip("rdkit") - with caplog.at_level(logging.DEBUG): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): with toolkit_registry_manager( toolkit_registry=ToolkitRegistry( toolkit_precedence=[NAGLToolkitWrapper, RDKitToolkitWrapper], @@ -467,33 +506,33 @@ def test_case10(caplog, sage_with_nagl_chargeincrements, ligand): ligand.to_topology(), ) - info = map_methods_to_atom_indices(caplog) + debug = map_methods_to_inchikey(caplog) - # atoms 0 through 5 are ligand, getting NAGL charges - assert info[f"chargeincrements_{NAGL_KEY}"] == [*range(0, 9)] + # atoms 0 through 8 are ligand, getting NAGL charges + assert debug[f"chargeincrements_{NAGL_KEY}"] == ["LFQSCWFLJHTTHZ-UHFFFAOYSA-N"] # TODO: These are logged symmetrically (i.e. hydrogens are listed) # even though the charges appear to be correct, assert should # simply by == [0, 1] since the hydrogens shouldn't be caught - assert 0 in info["chargeincrements"] - assert 1 in info["chargeincrements"] + assert 0 in debug["chargeincrements"] + assert 1 in debug["chargeincrements"] # the standard AM1-BCC should not have ran - assert AM1BCC_KEY not in info + assert AM1BCC_KEY not in debug def test_case11(caplog, sage, ligand): """Test that NAGL charge assignment is properly logged.""" pytest.importorskip("openff.nagl") - with caplog.at_level(logging.DEBUG): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage.create_interchange(ligand.to_topology()) - info = map_methods_to_atom_indices(caplog) + debug = map_methods_to_inchikey(caplog) # Should log NAGL charges for all atoms - assert "NAGLChargesHandler" in info - assert info["NAGLChargesHandler"] == [*range(0, ligand.n_atoms)] + assert "NAGL" in debug + assert debug["NAGL"] == [INCHI_KEYS["ethanol"]] def test_case12(caplog, sage, water): @@ -502,14 +541,14 @@ def test_case12(caplog, sage, water): topology = Topology.from_molecules([water, water]) - with caplog.at_level(logging.DEBUG): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage.create_interchange(topology) - info = map_methods_to_atom_indices(caplog) + debug = map_methods_to_inchikey(caplog) # Water should get library charges, not NAGL - assert info["library"] == [*range(0, 6)] # 2 water molecules, 3 atoms each - assert "NAGLChargesHandler" not in info + assert debug["library"] == [INCHI_KEYS["water"]] # 2 water molecules + assert "NAGL" not in debug def test_case13(caplog, sage, ligand, water): @@ -518,16 +557,16 @@ def test_case13(caplog, sage, ligand, water): topology = Topology.from_molecules([ligand, water]) - with caplog.at_level(logging.DEBUG): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage.create_interchange(topology) - info = map_methods_to_atom_indices(caplog) + debug = map_methods_to_inchikey(caplog) # Ligand should get NAGL charges - assert info["NAGLChargesHandler"] == [*range(0, ligand.n_atoms)] + assert debug["NAGL"] == [INCHI_KEYS["ethanol"]] # Water should get library charges - assert info["library"] == [*range(ligand.n_atoms, ligand.n_atoms + water.n_atoms)] + assert debug["library"] == [INCHI_KEYS["water"]] def test_case14(caplog, sage, ligand): @@ -536,14 +575,33 @@ def test_case14(caplog, sage, ligand): ligand.assign_partial_charges("gasteiger") - with caplog.at_level(logging.DEBUG): + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): sage.create_interchange( ligand.to_topology(), charge_from_molecules=[ligand], ) - info = map_methods_to_atom_indices(caplog) + debug = map_methods_to_inchikey(caplog) # Should use preset charges, not NAGL - assert info["preset"] == [*range(0, ligand.n_atoms)] - assert "NAGLChargesHandler" not in info + assert debug["preset"] == [INCHI_KEYS["ethanol"]] + assert "NAGL" not in debug + + +def test_inchi_fallback(caplog, sage): + """Test that molecules that fail InChIKey generation are still logged in some way.""" + from openff.toolkit.utils.toolkits import OpenEyeToolkitWrapper + + # TODO: Might be a toolkit bug that needs to be worked around + with caplog.at_level(logging.DEBUG, logger="openff.interchange"): + sage.create_interchange( + Molecule.from_smiles(342 * "C").to_topology(), + ) + + debug = map_methods_to_inchikey(caplog) + + if OpenEyeToolkitWrapper.is_available(): + assert debug["NAGL"] == ["UNKNOWN_INCHIKEY"] + else: + # but RDKit can generate an InChIKey + assert debug["NAGL"] == ["FGIJOQKVOYCTPJ-UHFFFAOYSA-N"] diff --git a/openff/interchange/smirnoff/_nonbonded.py b/openff/interchange/smirnoff/_nonbonded.py index f665e22b9..9f5afd1e5 100644 --- a/openff/interchange/smirnoff/_nonbonded.py +++ b/openff/interchange/smirnoff/_nonbonded.py @@ -937,50 +937,13 @@ def store_matches( # Have this new key (on a duplicate molecule) point to the same potential # as the old key (on a unique/reference molecule) - if type(new_key) is LibraryChargeTopologyKey: - logger.debug( - "Charge section LibraryCharges applied to topology atom index " - f"{topology_atom_index}", - ) - - elif type(new_key) is SingleAtomChargeTopologyKey: - if new_key.extras["handler"] == "ToolkitAM1BCCHandler": - logger.debug( - "Charge section ToolkitAM1BCC, using charge method " - f"{new_key.extras['partial_charge_method']}, " - f"applied to topology atom index {topology_atom_index}", - ) - elif new_key.extras["handler"] == "NAGLChargesHandler": - logger.debug( - "Charge section NAGLCharges, using NAGL model " - f"{new_key.extras['partial_charge_method']}, " - f"applied to topology atom index {topology_atom_index}", - ) - - elif new_key.extras["handler"] == "preset": - logger.debug( - f"Preset charges applied to atom index {topology_atom_index}", - ) - - else: - raise ValueError(f"Unhandled handler {new_key.extras['handler']}") - - elif type(new_key) is ChargeModelTopologyKey: - logger.debug( - "Charge section ChargeIncrementModel, using charge method " - f"{new_key.partial_charge_method}, " - f"applied to topology atom index {new_key.this_atom_index}", - ) - - elif type(new_key) is ChargeIncrementTopologyKey: - # here is where the actual increments could be logged - pass - - else: - raise ValueError(f"Unhandled key type {type(new_key)}") self.key_map[new_key] = matches[key] + # don't bother going through logging machinery if it's not opted-in to + if logger.isEnabledFor(logging.DEBUG): + self._log_charge_provenance(unique_molecule, new_key) + # this logic is all only to satisfy the allow_nonintegral_charges argument - could be more performant # to do it while assignment is happening if not allow_nonintegral_charges: @@ -1010,6 +973,68 @@ def store_matches( f"{formal_sum}.", ) + @staticmethod + def _log_charge_provenance( + unique_molecule: Molecule, + key: ChargeModelTopologyKey | SingleAtomChargeTopologyKey | LibraryChargeTopologyKey, + ): + hill_formaula = unique_molecule.to_hill_formula() + + try: + inchikey = unique_molecule.to_inchikey() + except Exception as error: + # In principle this is more like a warning, but the context is already logging.DEBUG, so why not? + logger.debug( + f"Could not generate InChIKey for molecule with Hill formula {hill_formaula}. " + "This molecule's charge provenance will be logged without a valid InChIKey. " + f"Error was {error}", + ) + inchikey = "UNKNOWN_INCHIKEY" + + id_string = f"molecule with Hill formula {hill_formaula} and InChIKey {inchikey}" + + if type(key) is LibraryChargeTopologyKey: + logger.debug( + "Charge section LibraryCharges, applied to " + id_string, + ) + + elif type(key) is SingleAtomChargeTopologyKey: + if key.extras["handler"] == "ToolkitAM1BCCHandler": + logger.debug( + "Charge section ToolkitAM1BCC, using charge method " + f"{key.extras['partial_charge_method']}, " + f"applied to " + id_string, + ) + elif key.extras["handler"] == "NAGLChargesHandler": + logger.debug( + "Charge section NAGLCharges, using NAGL model " + f"{key.extras['partial_charge_method']}, " + f"applied to " + id_string, + ) + + elif key.extras["handler"] == "preset": + logger.debug( + "Preset charges applied to " + id_string, + ) + + else: + raise ValueError(f"Unhandled handler {key.extras['handler']}") + + elif type(key) is ChargeModelTopologyKey: + logger.debug( + "Charge section ChargeIncrementModel, using charge method " + f"{key.partial_charge_method}, " + f"applied to " + id_string, + ) + + elif type(key) is ChargeIncrementTopologyKey: + # here is where the individual increments could be logged at creation time, but they're + # also logged in _get_charges + pass + + else: + raise ValueError(f"Unhandled key type {type(key)}") + def store_potentials( self, parameter_handler: ElectrostaticsHandlerType | list[ElectrostaticsHandlerType],