Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@
NAGL_KEY = "openff-gnn-am1bcc-1.0.0.pt"


def map_methods_to_atom_indices(caplog: pytest.LogCaptureFixture) -> dict[str, list[int]]:
def map_methods_to_inchi(caplog: pytest.LogCaptureFixture) -> dict[str, list[int]]:
"""
Map partial charge assignment methods to (sorted) atom indices.
Comment thread
mattwthompson marked this conversation as resolved.
Outdated
"""
Expand All @@ -131,13 +131,21 @@ def map_methods_to_atom_indices(caplog: pytest.LogCaptureFixture) -> dict[str, l
message = record.msg

if message.startswith("Charge section LibraryCharges"):
info["library"].append(int(message.split("atom index ")[-1]))
_, molecule = message.split(", ")

info["library"].append(molecule.split("InChI ")[-1])
Comment thread
mattwthompson marked this conversation as resolved.
Outdated

elif message.startswith("Charge section ToolkitAM1BCC"):
info[message.split(", ")[1].split(" ")[-1]].append(int(message.split("atom index ")[-1]))
_, method, molecule = message.split(", ")

assert method.split()[-1] == AM1BCC_KEY, f"Expected method {AM1BCC_KEY} but got {method.split()[-1]}"

info[method.split()[-1]].append(molecule.split("InChI ")[-1])

elif message.startswith("Charge section NAGLCharges"):
info["NAGLChargesHandler"].append(int(message.split("atom index ")[-1]))
_, _, molecule = message.split(", ")

info["NAGL"].append(molecule.split("InChI ")[-1])

# without also pulling the virtual site - particle mapping (which is different for each engine)
# it's hard to store more information than the orientation atoms that are affected by each
Expand All @@ -151,12 +159,13 @@ def map_methods_to_atom_indices(caplog: pytest.LogCaptureFixture) -> dict[str, l
info["orientation"].append(atom)

elif message.startswith("Preset charges"):
info["preset"].append(int(message.split("atom index")[-1]))
info["preset"].append(message.split("InChI ")[-1])

elif message.startswith("Charge section ChargeIncrementModel"):
if "using charge method" in message:
info[f"chargeincrements_{message.split(',')[1].split(' ')[-1]}"].append(
int(message.split("atom index ")[-1]),
_, method, molecule = message.split(", ")
info[f"chargeincrements_{method.split()[-1]}"].append(
molecule.split("InChI ")[-1],
)

elif "applying charge increment" in message:
Expand Down Expand Up @@ -285,23 +294,27 @@ def test_case0(caplog, sage_no_nagl, ligand):
with caplog.at_level(logging.INFO):
sage_no_nagl.create_interchange(ligand.to_topology())

info = map_methods_to_atom_indices(caplog)
info = map_methods_to_inchi(caplog)

# atoms 0 through 8 are ethanol, getting AM1-BCC
assert info[AM1BCC_KEY] == [*range(0, 9)]
assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"]


def test_case1(caplog, sage_no_nagl, ligand_and_water_and_ions):
with caplog.at_level(logging.INFO):
sage_no_nagl.create_interchange(ligand_and_water_and_ions)

info = map_methods_to_atom_indices(caplog)
info = map_methods_to_inchi(caplog)

# atoms 0 through 8 are ethanol, getting AM1-BCC
assert info[AM1BCC_KEY] == [*range(0, 9)]
assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"]

# atoms 9 through 21 are water + ions, getting library charges
assert info["library"] == [*range(9, 22)]
assert info["library"] == [
"InChI=1S/ClH/h1H/p-1",
"InChI=1S/H2O/h1H2",
"InChI=1S/Na/q+1",
]


def test_case2(caplog, sage_no_nagl, ligand, solvent):
Expand All @@ -310,10 +323,10 @@ def test_case2(caplog, sage_no_nagl, ligand, solvent):
with caplog.at_level(logging.INFO):
sage_no_nagl.create_interchange(topology)

info = map_methods_to_atom_indices(caplog)
info = map_methods_to_inchi(caplog)

# everything should get AM1-BCC charges
assert info[AM1BCC_KEY] == [*range(0, topology.n_atoms)]
assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3", "InChI=1S/CH5N/c1-2/h2H2,1H3"]


def test_case3(caplog, sage_no_nagl, ligand_and_water_and_ions, solvent):
Expand All @@ -327,14 +340,18 @@ def test_case3(caplog, sage_no_nagl, ligand_and_water_and_ions, solvent):
ligand_and_water_and_ions,
)

info = map_methods_to_atom_indices(caplog)
info = map_methods_to_inchi(caplog)

# atoms 0 through 8 are ethanol, getting AM1-BCC,
# and also solvent molecules (starting at index 22)
assert info[AM1BCC_KEY] == [*range(0, 9), *range(22, 22 + 3 * 7)]
assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3", "InChI=1S/CH5N/c1-2/h2H2,1H3"]

# atoms 9 through 21 are water + ions, getting library charges
assert info["library"] == [*range(9, 22)]
assert info["library"] == [
"InChI=1S/ClH/h1H/p-1",
"InChI=1S/H2O/h1H2",
"InChI=1S/Na/q+1",
]


@pytest.mark.slow
Expand All @@ -356,23 +373,29 @@ def test_cases4_5(caplog, ligand_and_water_and_ions, preset_on_protein):
else:
ff.create_interchange(complex)

info = map_methods_to_atom_indices(caplog)
info = map_methods_to_inchi(caplog)

assert info[AM1BCC_KEY] == [*range(complex.molecule(0).n_atoms, complex.molecule(0).n_atoms + 9)]
assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"]

if preset_on_protein:
# protein gets preset charges
assert info["preset"] == [*range(0, complex.molecule(0).n_atoms)]
assert info["preset"] == [
"InChI=1S/C9H17N3O3/c1-5(8(14)10-4)12-9(15)6(2)11-7(3)13/h5-6H,1-4H3,(H,10,14)(H,11,13)(H,12,15)/t5-,6-/m0/s1",
]

# everything after the protein and ligand should get library charges
assert info["library"] == [
*range(complex.molecule(0).n_atoms + 9, complex.n_atoms),
"InChI=1S/ClH/h1H/p-1",
"InChI=1S/H2O/h1H2",
"InChI=1S/Na/q+1",
]
else:
# the protein and everything after the ligand should get library charges
assert info["library"] == [
*range(0, complex.molecule(0).n_atoms),
*range(complex.molecule(0).n_atoms + 9, complex.n_atoms),
"InChI=1S/C9H17N3O3/c1-5(8(14)10-4)12-9(15)6(2)11-7(3)13/h5-6H,1-4H3,(H,10,14)(H,11,13)(H,12,15)/t5-,6-/m0/s1",
"InChI=1S/ClH/h1H/p-1",
"InChI=1S/H2O/h1H2",
"InChI=1S/Na/q+1",
]


Expand All @@ -384,13 +407,13 @@ def test_case6(caplog, ligand, water):
with caplog.at_level(logging.INFO):
force_field.create_interchange(topology)

info = map_methods_to_atom_indices(caplog)
info = map_methods_to_inchi(caplog)

# atoms 0 through 8 are ethanol, getting AM1-BCC charges
assert info[AM1BCC_KEY] == [*range(0, 9)]
assert info[AM1BCC_KEY] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"]

# atoms 9 through 17 are water atoms, getting library charges
assert info["library"] == [*range(9, 18)]
assert info["library"] == ["InChI=1S/H2O/h1H2"]

# particles 18 through 20 are water virtual sites, but the current logging strategy
# makes it hard to match these up (and the particle indices are different OpenMM/GROMACS/etc)
Expand All @@ -409,13 +432,17 @@ def test_case7(caplog, sage_no_nagl, ligand_and_water_and_ions):
charge_from_molecules=[ligand_and_water_and_ions.molecule(0)],
)

info = map_methods_to_atom_indices(caplog)
info = map_methods_to_inchi(caplog)

# atoms 0 through 8 are ethanol, getting preset charges
assert info["preset"] == [*range(0, 9)]
assert info["preset"] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"]

# atoms 9 through 21 are water + ions, getting library charges
assert info["library"] == [*range(9, 22)]
assert info["library"] == [
"InChI=1S/ClH/h1H/p-1",
"InChI=1S/H2O/h1H2",
"InChI=1S/Na/q+1",
]


def test_case8(caplog, sage_no_nagl, water_and_ions):
Expand All @@ -427,13 +454,13 @@ def test_case8(caplog, sage_no_nagl, water_and_ions):
charge_from_molecules=[water_and_ions.molecule(0)],
)

info = map_methods_to_atom_indices(caplog)
info = map_methods_to_inchi(caplog)

# atoms 0 through 8 are water, getting preset charges
assert info["preset"] == [*range(0, 9)]
assert info["preset"] == ["InChI=1S/H2O/h1H2"]

# atoms 9 through 12 are ions, getting library charges
assert info["library"] == [*range(9, 13)]
assert info["library"] == ["InChI=1S/ClH/h1H/p-1", "InChI=1S/Na/q+1"]


def test_case9(caplog, sage_with_bond_charge):
Expand All @@ -444,10 +471,10 @@ def test_case9(caplog, sage_with_bond_charge):
).to_topology(),
)

info = map_methods_to_atom_indices(caplog)
info = map_methods_to_inchi(caplog)

# atoms 0 through 5 are ligand, getting AM1-BCC charges
assert info[AM1BCC_KEY] == [*range(0, 5)]
assert info[AM1BCC_KEY] == ["InChI=1S/CH3Cl/c1-2/h1H3"]

# atoms 0 and 1 are the orientation atoms of the sigma hole virtual site
assert info["orientation"] == [0, 1]
Expand All @@ -467,10 +494,10 @@ def test_case10(caplog, sage_with_nagl_chargeincrements, ligand):
ligand.to_topology(),
)

info = map_methods_to_atom_indices(caplog)
info = map_methods_to_inchi(caplog)

# atoms 0 through 5 are ligand, getting NAGL charges
assert info[f"chargeincrements_{NAGL_KEY}"] == [*range(0, 9)]
# atoms 0 through 8 are ligand, getting NAGL charges
assert info[f"chargeincrements_{NAGL_KEY}"] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"]

# TODO: These are logged symmetrically (i.e. hydrogens are listed)
# even though the charges appear to be correct, assert should
Expand All @@ -489,11 +516,11 @@ def test_case11(caplog, sage, ligand):
with caplog.at_level(logging.INFO):
sage.create_interchange(ligand.to_topology())

info = map_methods_to_atom_indices(caplog)
info = map_methods_to_inchi(caplog)

# Should log NAGL charges for all atoms
assert "NAGLChargesHandler" in info
assert info["NAGLChargesHandler"] == [*range(0, ligand.n_atoms)]
assert "NAGL" in info
assert info["NAGL"] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"]


def test_case12(caplog, sage, water):
Expand All @@ -505,11 +532,11 @@ def test_case12(caplog, sage, water):
with caplog.at_level(logging.INFO):
sage.create_interchange(topology)

info = map_methods_to_atom_indices(caplog)
info = map_methods_to_inchi(caplog)

# Water should get library charges, not NAGL
assert info["library"] == [*range(0, 6)] # 2 water molecules, 3 atoms each
assert "NAGLChargesHandler" not in info
assert info["library"] == ["InChI=1S/H2O/h1H2"] # 2 water molecules
assert "NAGL" not in info


def test_case13(caplog, sage, ligand, water):
Expand All @@ -521,13 +548,13 @@ def test_case13(caplog, sage, ligand, water):
with caplog.at_level(logging.INFO):
sage.create_interchange(topology)

info = map_methods_to_atom_indices(caplog)
info = map_methods_to_inchi(caplog)

# Ligand should get NAGL charges
assert info["NAGLChargesHandler"] == [*range(0, ligand.n_atoms)]
assert info["NAGL"] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"]

# Water should get library charges
assert info["library"] == [*range(ligand.n_atoms, ligand.n_atoms + water.n_atoms)]
assert info["library"] == ["InChI=1S/H2O/h1H2"]


def test_case14(caplog, sage, ligand):
Expand All @@ -542,8 +569,18 @@ def test_case14(caplog, sage, ligand):
charge_from_molecules=[ligand],
)

info = map_methods_to_atom_indices(caplog)
info = map_methods_to_inchi(caplog)

# Should use preset charges, not NAGL
assert info["preset"] == [*range(0, ligand.n_atoms)]
assert "NAGLChargesHandler" not in info
assert info["preset"] == ["InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3"]
assert "NAGL" not in info


def test_inchi_fallback(caplog, sage):
"""Test that molecules that fail InChI generation are still logged in some way."""

# TODO: Might be a toolkit bug that needs to be worked around
with caplog.at_level(logging.INFO):
sage.create_interchange(
Molecule.from_smiles(342 * "C").to_topology(),
)
Comment thread
mattwthompson marked this conversation as resolved.
Loading