Skip to content
Closed
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
22b2692
resolved merge conflict in yml.py
donerancl Jan 27, 2025
14bd3e5
fixed bugs in fragment reattachment notebook, fragment_utils, cantera…
donerancl Jan 27, 2025
2edd657
Update fragment.py (typo)
donerancl Jun 4, 2024
a493353
set size_threshold default to zero
donerancl Jun 4, 2024
6ba1c30
added function that finds all possible partial reatt rxns and adds to…
donerancl Jun 7, 2024
96aa9b2
typo
donerancl Jun 7, 2024
31d9b12
commented cutting label logic
donerancl Jun 10, 2024
4dc438a
Apply suggestions from code review - formatting, small things
donerancl Jun 10, 2024
3559562
added docstring to merge_fragment_a_to_cutting_label_on_b
donerancl Jun 10, 2024
69142c4
remove unnecessary try-except
donerancl Jun 28, 2024
164192d
moved model into its own file in ipython
donerancl Jan 27, 2025
4b15850
Added missing entries to KineticsDB local context
jonwzheng Jan 24, 2025
26be15c
Added missing entries to KineticsDB local context
ssun30 Jan 27, 2025
921105f
Extra debugging message for loading a KineticsDB
ssun30 Jan 27, 2025
f72d6a1
Improved how database library labels are created.
ssun30 Jan 27, 2025
1b84b0f
incorporating changes from JB review
donerancl Feb 4, 2025
5bf5e03
removing leftover debugging statements
donerancl Feb 4, 2025
9cea759
notebook
donerancl Feb 4, 2025
c4092d7
added docstrings to many fragment_utils functions
donerancl Feb 4, 2025
139ef68
reverted is_carbon
donerancl Feb 7, 2025
a141f11
Define e_electronic_with_corrections, zpe before e0 conditions
alongd Feb 2, 2025
da1703d
If E0 is not None (e.g., given directly by the user), use it
alongd Feb 2, 2025
79cc842
Report e_elect + ZPE in SI only if they're not None
alongd Feb 2, 2025
9724157
Added an example for inputting E0 directly
alongd Feb 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63,469 changes: 0 additions & 63,469 deletions ipython/data/chem_annotated.inp

This file was deleted.

10,168 changes: 0 additions & 10,168 deletions ipython/data/species_dictionary.txt

This file was deleted.

869 changes: 454 additions & 415 deletions ipython/fragment_reattachment_example.ipynb

Large diffs are not rendered by default.

146 changes: 98 additions & 48 deletions rmgpy/molecule/fragment.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from rmgpy.molecule.molecule import Atom, Bond, Molecule
from rmgpy.molecule.atomtype import get_atomtype, AtomTypeError, ATOMTYPES, AtomType
from rdkit import Chem

from numpy.random import randint
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for the sake of reproducibility, let's change this to:

from numpy.random import default_rng

rng = default_rng(0)
rng.randint()

at least, it should look something like that.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks! I changed over to default_rng, using rng.integers(low=0, high=2, size=1) which seems to be the equivalent to randint.

# this variable is used to name atom IDs so that there are as few conflicts by
# using the entire space of integer objects
ATOM_ID_COUNTER = -(2**15)
Expand Down Expand Up @@ -838,7 +838,7 @@ def from_rdkit_mol(self, rdkitmol, atom_replace_dict=None):

return self

def cut_molecule(self, output_smiles=False, cut_through=True, size_threshold=None):
def cut_molecule(self, output_smiles=False, cut_through=True, size_threshold=5):
"""
For given input, output a list of cut fragments (either string or Fragment).
if output_smiles = True, the output list of fragments will be smiles.
Expand Down Expand Up @@ -890,15 +890,10 @@ def cut_molecule(self, output_smiles=False, cut_through=True, size_threshold=Non
frag_list.append(res_frag)
return frag_list

def sliceitup_arom(self, molecule, size_threshold=None):
def sliceitup_arom(self, molecule, size_threshold=5):
"""
Several specified aromatic patterns
"""
# set min size for each aliphatic fragment size
if size_threshold:
size_threshold = size_threshold
else:
size_threshold = 5
# if input is smiles string, output smiles
if isinstance(molecule, str):
molecule_smiles = molecule
Expand Down Expand Up @@ -952,27 +947,67 @@ def sliceitup_arom(self, molecule, size_threshold=None):
# mol_set contains new set of fragments
mol_set = Chem.GetMolFrags(new_mol, asMols=True)
# check all fragments' size
if all(
sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6)
>= size_threshold
for mol in mol_set
):
# replace * at cutting position with cutting label
for ind, rdmol in enumerate(mol_set):
frag = Chem.MolToSmiles(rdmol)
if len(mol_set) > 2: # means it cut into 3 fragments

if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set):
Comment thread
donerancl marked this conversation as resolved.
Outdated
if len(mol_set) == 2:
frag1 = Chem.MolToSmiles(mol_set[0])
frag2 = Chem.MolToSmiles(mol_set[1])

frag1_R = frag1.count("Na")
frag1_L = frag1.count("K")
frag2_R = frag2.count("Na")
frag2_L = frag2.count("K")

# if frag 2 has the least Rs and frag 1 has the
# same or fewer Ls than frag 2 -->
# assign R to frag 2 and L to frag 1
if frag1_R > frag2_R and frag1_L <= frag2_L:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")

# if frag 2 has the least Ls and frag 1 has the
# same or fewer Rs than frag 2 -->
# assign R to frag 1 and L to frag 2
elif frag1_L > frag2_L and frag1_R <= frag2_R:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")

# if frag 1 has the least Ls and frag 2 has the
# same or fewer Rs than frag 1 -->
# assign R to frag 2 and L to frag 1
elif frag2_L > frag1_L and frag2_R <= frag1_R:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")

# if frag 1 has the least Rs and frag 2 has the
# same or fewer Ls than frag 1 -->
# assign R to frag 1 and L to frag 2
elif frag2_R > frag1_R and frag2_L <= frag1_L:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")

# else if frag 1 and frag 2 have equal number
# of Rs and Ls or one frag has more Rs and
# more Ls than the other, choose randomly
elif randint(0, 1) == 1:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
else:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")

frag_list = [frag1_smi, frag2_smi]

elif len(mol_set) > 2: # means it cut into 3 fragments
frag_list = []
for ind, rdmol in enumerate(mol_set):
frag = Chem.MolToSmiles(rdmol)
if frag.count("*") > 1:
# replace both with R
frag_smi = frag.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
else: # means it only cut once, generate 2 fragments
if ind == 0:
frag_smi = frag.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
frag_list.append(frag_smi)
break
frag_list.append(frag_smi)
break
else:
# turn to next matched_atom_map
continue
Expand Down Expand Up @@ -1016,15 +1051,10 @@ def sliceitup_arom(self, molecule, size_threshold=None):
frag_list_new.append(res_frag)
return frag_list_new

def sliceitup_aliph(self, molecule, size_threshold=None):
def sliceitup_aliph(self, molecule, size_threshold=5):
"""
Several specified aliphatic patterns
"""
# set min size for each aliphatic fragment size
if size_threshold:
size_threshold = size_threshold
else:
size_threshold = 5
# if input is smiles string, output smiles
if isinstance(molecule, str):
molecule_smiles = molecule
Expand Down Expand Up @@ -1081,27 +1111,47 @@ def sliceitup_aliph(self, molecule, size_threshold=None):
# mol_set contains new set of fragments
mol_set = Chem.GetMolFrags(new_mol, asMols=True)
# check all fragments' size
if all(
sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6)
>= size_threshold
for mol in mol_set
):
# replace * at cutting position with cutting label
for ind, rdmol in enumerate(mol_set):
frag = Chem.MolToSmiles(rdmol)
if len(mol_set) > 2: # means it cut into 3 fragments
if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set):
Comment thread
donerancl marked this conversation as resolved.
Outdated
if len(mol_set) == 2:
frag1 = Chem.MolToSmiles(mol_set[0])
frag2 = Chem.MolToSmiles(mol_set[1])

frag1_R = frag1.count("Na")
frag1_L = frag1.count("K")
frag2_R = frag2.count("Na")
frag2_L = frag2.count("K")

if frag1_R > frag2_R and frag1_L <= frag2_L:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
elif frag1_L > frag2_L and frag1_R <= frag2_R:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")
elif frag2_L > frag1_L and frag2_R <= frag1_R:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
elif frag2_R > frag1_R and frag2_L <= frag1_L:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")
elif randint(0,1)==1:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
else:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")

frag_list = [frag1_smi, frag2_smi]

elif len(mol_set) > 2: # means it cut into 3 fragments
frag_list = []
for ind, rdmol in enumerate(mol_set):
frag = Chem.MolToSmiles(rdmol)
if frag.count("*") > 1:
# replace both with R
frag_smi = frag.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
else: # means it only cut once, generate 2 fragments
if ind == 0:
frag_smi = frag.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
frag_list.append(frag_smi)
break
frag_list.append(frag_smi)
break
else:
# turn to next matched_atom_map
continue
Expand Down
Loading