From 91a8da42e231277c248678069f8af700929f9774 Mon Sep 17 00:00:00 2001 From: Sevy Harris Date: Mon, 13 Apr 2026 19:57:16 -0400 Subject: [PATCH 01/10] uncertainty uses recorded std dev for SIDT nodes Up until now the uncertainty tool has assumed the kinetics trees are the original hand-built type. This enables the uncertainty tool to grab the calculated standard deviation and number of training reactions from the specific autogen tree node used, instead of incorrectly treating it as an exact match of a single rate rule with a std dev of 0.5 --- rmgpy/data/kinetics/family.py | 9 +++-- rmgpy/tools/uncertainty.py | 62 +++++++++++++++++++++++------ test/rmgpy/tools/uncertaintyTest.py | 3 +- 3 files changed, 57 insertions(+), 17 deletions(-) diff --git a/rmgpy/data/kinetics/family.py b/rmgpy/data/kinetics/family.py index 441e57fce13..0b43a7134cd 100644 --- a/rmgpy/data/kinetics/family.py +++ b/rmgpy/data/kinetics/family.py @@ -4440,8 +4440,9 @@ def extract_source_from_comments(self, reaction): [Family_Label, {'template': originalTemplate, 'degeneracy': degeneracy, 'exact': boolean_exact?, - 'rules': a list of (original rate rule entry, weight in average) - 'training': a list of (original rate rule entry associated with training entry, original training entry, weight in average)}] + 'rules': a list of (original rate rule entry, weight in average), + 'training': a list of (original rate rule entry associated with training entry, original training entry, weight in average), + 'autogenerated': boolean for whether kinetics come from autogenerated subgraph isomorphic decision tree}] where Exact is a boolean of whether the rate is an exact match, Template is @@ -4455,6 +4456,7 @@ def extract_source_from_comments(self, reaction): rules = None training_entries = None degeneracy = 1 + autogenerated = False training_reaction_pattern = r'Matched reaction\s*(\d+).*in.*training' degeneracy_pattern = r'Multiplied by reaction path degeneracy\s*(\d+)' @@ -4494,6 +4496,7 @@ def extract_source_from_comments(self, reaction): autogen_node_matches = re.search(autogen_node_search_pattern, full_comment_string) template_matches = re.search(template_pattern, full_comment_string) if autogen_node_matches is not None: # autogenerated trees + autogenerated = True template_str = autogen_node_matches.group(1).split('Multiplied by reaction path degeneracy')[0].strip() template_str = template_str.split('in family')[0].strip() tokens = template_str.split() @@ -4510,7 +4513,7 @@ def extract_source_from_comments(self, reaction): raise ValueError(f'Could not find rate rule in comments for reaction {reaction}.') rules, training_entries = self.get_sources_for_template(template) source_dict = {'template': template, 'degeneracy': degeneracy, 'exact': exact_rule, - 'rules': rules, 'training': training_entries} + 'rules': rules, 'training': training_entries, 'autogenerated': autogenerated} # Source of the kinetics is from rate rules return False, [self.label, source_dict] diff --git a/rmgpy/tools/uncertainty.py b/rmgpy/tools/uncertainty.py index de9669dc0c5..cb97cb563ce 100644 --- a/rmgpy/tools/uncertainty.py +++ b/rmgpy/tools/uncertainty.py @@ -28,6 +28,7 @@ ############################################################################### import os +import re import numpy as np @@ -165,19 +166,35 @@ def get_uncertainty_value(self, source): dlnk += self.dlnk_family ** 2 N = len(rule_weights) + len(training_weights) - if not exact: - # nonexactness contribution increases as N increases + if 'node_std_dev' in source_dict: + # Handle autogen BM trees + if source_dict['node_std_dev'] < 0: + raise ValueError('Invalid value for std dev of kinetics family rule node') + dlnk += source_dict['node_std_dev'] + if source_dict['node_n_train'] is None: + raise ValueError('Invalid number of training reactions for kinetics family rule node') + N = source_dict['node_n_train'] + + # Technically every lookup in the autogenerated trees is an "exact" match because + # every node template has its own fitted rate rule by definition, but here we use the + # number of training reactions as an approximation of the node's specificity/generality + # and add a penalty for being too general (large # of training reactions) dlnk += np.log10(N + 1) * self.dlnk_nonexact + else: + # Handle hand-made trees + if not exact: + # nonexactness contribution increases as N increases + dlnk += np.log10(N + 1) * self.dlnk_nonexact - # Add the contributions from rules - dlnk += np.sum([weight * self.dlnk_rule for weight in rule_weights]) - # Add the contributions from training - # Even though these source from training reactions, we actually - # use the uncertainty for rate rules, since these are now approximations - # of the original reaction. We consider these to be independent of original the training - # parameters because the rate rules may be reversing the training reactions, - # which leads to more complicated dependence - dlnk += np.sum([weight * self.dlnk_rule for weight in training_weights]) + # Add the contributions from rules + dlnk += np.sum([weight * self.dlnk_rule for weight in rule_weights]) + # Add the contributions from training + # Even though these source from training reactions, we actually + # use the uncertainty for rate rules, since these are now approximations + # of the original reaction. We consider these to be independent of original the training + # parameters because the rate rules may be reversing the training reactions, + # which leads to more complicated dependence + dlnk += np.sum([weight * self.dlnk_rule for weight in training_weights]) return dlnk @@ -413,8 +430,27 @@ def extract_sources_from_model(self): # Do nothing here because training source already saves the entry from the training reaction pass elif 'Rate Rules' in source: - # Do nothing - pass + # Fetch standard deviation if autogenerated tree + if source['Rate Rules'][1]['autogenerated']: + std_dev = 1.329 # Default value is uniform distribution with upper bound of 10x the nominal value + n_train = None + try: + # try to look up the node variance in the rule data's uncertainty object + std_dev = np.sqrt(source['Rate Rules'][1]['rules'][0][0].data.uncertainty.var) + n_train = int(source['Rate Rules'][1]['rules'][0][0].data.uncertainty.N) + except AttributeError: + # fall back on the standard deviation reported in the long description of the rule + # this is the absolute average deviation and not the standard deviation of the node, but it is still probably better than the default + # note, we'd need abs(mu) to be able to convert back to std_dev from absolute average deviation, which is why we're not doing it here + long_desc = source['Rate Rules'][1]['rules'][0][0].long_desc + std_dev_matches = re.search(r'Standard Deviation in ln\(k\): ([0-9]*.[0-9]*)', long_desc) + if std_dev_matches is not None: + std_dev = float(std_dev_matches[1]) + n_train_matches = re.search('rule fitted to ([0-9]*) training reactions', long_desc) + if n_train_matches is not None: + n_train = int(n_train_matches[1]) + source['Rate Rules'][1]['node_std_dev'] = std_dev + source['Rate Rules'][1]['node_n_train'] = n_train else: raise Exception('Source of kinetics must be either Library, PDep, Training, or Rate Rules') self.reaction_sources_dict[reaction] = source diff --git a/test/rmgpy/tools/uncertaintyTest.py b/test/rmgpy/tools/uncertaintyTest.py index 0005609e8cb..85f10ca5a54 100644 --- a/test/rmgpy/tools/uncertaintyTest.py +++ b/test/rmgpy/tools/uncertaintyTest.py @@ -174,6 +174,7 @@ def test_uncertainty_assignment(self): ) np.testing.assert_allclose( kinetic_unc, - [0.5, 1.5, 3.169924, 3.169924, 2.553605, 0.5, 2.0, 2.553605, 2.553605, 0.5], + # the 7.81 value comes from the SIDT tree node uncertainty: 5.756 + non-exact penalty for N=1: log10(1+1) * 3.5 + family uncertainty: 1.0 + [0.5, 1.5, 3.169924, 3.169924, 2.553605, 0.5, 2.0, 7.81, 7.81, 0.5], rtol=1e-4 ) From 2843665781a49f9b1031cc68c4d1497102f5a063 Mon Sep 17 00:00:00 2001 From: Sevy Harris Date: Sun, 12 Apr 2026 01:01:16 -0400 Subject: [PATCH 02/10] update loader tool to include surface mech paths --- rmgpy/tools/loader.py | 7 +++++-- rmgpy/tools/uncertainty.py | 5 +++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/rmgpy/tools/loader.py b/rmgpy/tools/loader.py index de7546ed489..481d506fabb 100644 --- a/rmgpy/tools/loader.py +++ b/rmgpy/tools/loader.py @@ -50,6 +50,7 @@ def load_rmg_job( use_java=False, use_chemkin_names=False, check_duplicates=True, + surface_path=None, ): # The argument is an RMG-Py input file rmg = load_rmg_py_job( @@ -59,13 +60,14 @@ def load_rmg_job( generate_images, use_chemkin_names=use_chemkin_names, check_duplicates=check_duplicates, + surface_path=surface_path, ) return rmg def load_rmg_py_job(input_file, chemkin_file=None, species_dict=None, generate_images=True, - use_chemkin_names=False, check_duplicates=True): + use_chemkin_names=False, check_duplicates=True, surface_path=None): """ Load the results of an RMG-Py job generated from the given `input_file`. """ @@ -83,7 +85,8 @@ def load_rmg_py_job(input_file, chemkin_file=None, species_dict=None, generate_i species_dict = os.path.join(os.path.dirname(input_file), 'chemkin', 'species_dictionary.txt') species_list, reaction_list = load_chemkin_file(chemkin_file, species_dict, use_chemkin_names=use_chemkin_names, - check_duplicates=check_duplicates) + check_duplicates=check_duplicates, + surface_path=surface_path) # Created "observed" versions of all reactive species that are not explicitly # identified as "constant" species diff --git a/rmgpy/tools/uncertainty.py b/rmgpy/tools/uncertainty.py index cb97cb563ce..2cb1624c706 100644 --- a/rmgpy/tools/uncertainty.py +++ b/rmgpy/tools/uncertainty.py @@ -336,7 +336,7 @@ def load_database(self, kinetics_families='all', kinetics_depositories=None, the family.add_rules_from_training(thermo_database=self.database.thermo) family.fill_rules_by_averaging_up(verbose=True) - def load_model(self, chemkin_path, dictionary_path, transport_path=None): + def load_model(self, chemkin_path, dictionary_path, transport_path=None, surface_path=None): """ Load a RMG-generated model into the Uncertainty class `chemkin_path`: path to the chem_annotated.inp CHEMKIN mechanism @@ -351,7 +351,8 @@ def load_model(self, chemkin_path, dictionary_path, transport_path=None): self.species_list, self.reaction_list = load_chemkin_file(chemkin_path, dictionary_path=dictionary_path, - transport_path=transport_path) + transport_path=transport_path, + surface_path=surface_path) def retrieve_saturated_species_from_list(self, species): """ From ce0102b49c16e9fcc078873a1a918168e76638aa Mon Sep 17 00:00:00 2001 From: Sevy Harris Date: Sun, 12 Apr 2026 01:51:26 -0400 Subject: [PATCH 03/10] add uncertainty handling of surface libs and ads correction --- rmgpy/data/thermo.py | 38 ++++++- rmgpy/tools/uncertainty.py | 170 ++++++++++++++++++++++++---- test/rmgpy/tools/uncertaintyTest.py | 4 +- 3 files changed, 183 insertions(+), 29 deletions(-) diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py index f6d15f702b9..16461e4dd1c 100644 --- a/rmgpy/data/thermo.py +++ b/rmgpy/data/thermo.py @@ -2724,7 +2724,8 @@ def extract_source_from_comments(self, species): source = {'Library': String_Name_of_Library_Used, 'QM': String_of_Method_Used, - 'GAV': Dictionary_of_Groups_Used + 'GAV': Dictionary_of_Groups_Used, + 'ADS': Dictionary_of_Adsorption_Group_Used, } The Dictionary_of_Groups_Used looks like @@ -2743,9 +2744,42 @@ def extract_source_from_comments(self, species): # Store the level of the calculation, which is the 2nd token in the comments source['QM'] = tokens[1] + elif comment.startswith('Gas phase thermo'): + # Handle adsorption correction thermo data of the following format: + # Library example + # Gas phase thermo for C(T) from Thermo library: primaryThermoLibrary. + # Adsorption correction: + Thermo group additivity estimation: adsorptionPt111(Cq*) + + # GAV example + # Gas phase thermo for [CH]CC from Thermo group additivity estimation: group(Cs-CsCsHH) + group(Cs-CsHHH) + group(Cs-CsHHH) + radical(CCJ2_triplet). + # Adsorption correction: + Thermo group additivity estimation: adsorptionPt111(C=*RCR3)" + + comment = comment.replace(r'\n', ' ') + comment = comment.replace('\n', ' ') + assert 'Adsorption correction:' in comment, f'adsorption correction in unrecognized format {comment}' + + # Handle the gas-phase portion first + gas_comment = comment.split('Adsorption correction: + ')[0].strip() + gas_comment = gas_comment.replace('.', '', -1) # delete the . at the end if it exists + gas_comment = gas_comment[gas_comment.find('from ', len('Gas phase thermo for ')) + len('from '):] + dummy_gas_phase_species = Species() + dummy_gas_phase_species.thermo = NASA() + dummy_gas_phase_species.thermo.comment = gas_comment + source = self.extract_source_from_comments(dummy_gas_phase_species) + + # This is an adsorption correction + # comment is split into two parts: the gas phase, and the surface adsorption corection + ads_correction_comment = comment.split('Adsorption correction: +')[-1].strip() + dummy_adsorption_correction_species = Species() + dummy_adsorption_correction_species.thermo = NASA() + dummy_adsorption_correction_species.thermo.comment = ads_correction_comment + source['ADS'] = self.extract_source_from_comments(dummy_adsorption_correction_species)['GAV'] + + return source + # Check for group additivity contributions to the thermo in this species - # The contribution of the groups can be either additive or substracting + # The contribution of the groups can be either additive or subtracting # after changes to the polycyclic algorithm comment = comment.replace(' + ', ' +') diff --git a/rmgpy/tools/uncertainty.py b/rmgpy/tools/uncertainty.py index 2cb1624c706..29ef3738c3a 100644 --- a/rmgpy/tools/uncertainty.py +++ b/rmgpy/tools/uncertainty.py @@ -43,7 +43,7 @@ class ThermoParameterUncertainty(object): This class is an engine that generates the species uncertainty based on its thermo sources. """ - def __init__(self, dG_library=1.5, dG_QM=3.0, dG_GAV=1.5, dG_group=0.10): + def __init__(self, dG_library=1.5, dG_QM=3.0, dG_GAV=1.5, dG_group=0.1, dG_ADS_correction=6.918, dG_surf_lib=6.918): """ Initialize the different uncertainties dG_library, dG_QM, dG_GAV, and dG_other with set values in units of kcal/mol. @@ -55,6 +55,8 @@ def __init__(self, dG_library=1.5, dG_QM=3.0, dG_GAV=1.5, dG_group=0.10): self.dG_QM = dG_QM self.dG_GAV = dG_GAV self.dG_group = dG_group + self.dG_ADS_correction = dG_ADS_correction + self.dG_surf_lib = dG_surf_lib def get_uncertainty_value(self, source): """ @@ -63,6 +65,8 @@ def get_uncertainty_value(self, source): dG = 0.0 if 'Library' in source: dG += self.dG_library + if 'Surface_Library' in source: + dG += self.dG_surf_lib if 'QM' in source: dG += self.dG_QM if 'GAV' in source: @@ -70,6 +74,8 @@ def get_uncertainty_value(self, source): for group_type, group_entries in source['GAV'].items(): group_weights = [groupTuple[-1] for groupTuple in group_entries] dG += np.sum([weight * self.dG_group for weight in group_weights]) + if 'ADS' in source: + dG += self.dG_ADS_correction # Add adsorption correction uncertainty return dG @@ -78,7 +84,7 @@ def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=Non Obtain the partial uncertainty dG/dG_corr*dG_corr, where dG_corr is the correlated parameter `corr_param` is the parameter identifier itself, which is a integer for QM and library parameters, or a string for group values - `corr_source_type` is a string, being either 'Library', 'QM', 'GAV', or 'Estimation' + `corr_source_type` is a string, being either 'Library', 'QM', 'GAV', 'ADS', or 'Estimation' `corr_group_type` is a string used only when the source type is 'GAV' and indicates grouptype """ @@ -88,12 +94,26 @@ def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=Non # Correlated parameter is a source of the overall parameter return self.dG_library + elif corr_source_type == 'Surface_Library': + if 'Surface_Library' in source: + if source['Surface_Library'] == corr_param: + # Correlated parameter is a source of the overall parameter + return self.dG_surf_lib + elif corr_source_type == 'QM': if 'QM' in source: if source['QM'] == corr_param: # Correlated parameter is a source of the overall parameter return self.dG_QM + elif corr_source_type == 'ADS': + if 'ADS' in source: + if corr_group_type in source['ADS']: + group_list = source['ADS'][corr_group_type] + for group, weight in group_list: # there should be only one group entry for adsorption corrections + if group == corr_param: + return weight * self.dG_ADS_correction + elif corr_source_type == 'GAV': if 'GAV' in source: if corr_group_type in source['GAV']: @@ -105,8 +125,9 @@ def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=Non elif corr_source_type == 'Estimation': if 'GAV' in source: return self.dG_GAV + else: - raise Exception('Thermo correlated source must be GAV, QM, Library, or Estimation') + raise Exception('Thermo correlated source must be GAV, QM, Library, Surface_Library, ADS, ADS_estimation, or Estimation') # If we get here, it means the correlated parameter was not found return None @@ -127,7 +148,7 @@ class KineticParameterUncertainty(object): """ def __init__(self, dlnk_library=0.5, dlnk_training=0.5, dlnk_pdep=2.0, dlnk_family=1.0, dlnk_nonexact=3.5, - dlnk_rule=0.5): + dlnk_rule=0.5, dlnk_surf_library=2.659, dlnk_surf_training=2.659, dlnk_surf_rule=2.659): """ Initialize the different uncertainties dlnk @@ -140,6 +161,9 @@ def __init__(self, dlnk_library=0.5, dlnk_training=0.5, dlnk_pdep=2.0, dlnk_fami self.dlnk_family = dlnk_family self.dlnk_nonexact = dlnk_nonexact self.dlnk_rule = dlnk_rule + self.dlnk_surf_library = dlnk_surf_library + self.dlnk_surf_training = dlnk_surf_training + self.dlnk_surf_rule = dlnk_surf_rule def get_uncertainty_value(self, source): """ @@ -149,6 +173,9 @@ def get_uncertainty_value(self, source): if 'Library' in source: # Should be a single library reaction source dlnk += self.dlnk_library + elif 'Surface_Library' in source: + # Should be a single library reaction source + dlnk += self.dlnk_surf_library elif 'PDep' in source: # Should be a single pdep reaction source dlnk += self.dlnk_pdep @@ -156,7 +183,10 @@ def get_uncertainty_value(self, source): # Should be a single training reaction # Although some training entries may be used in reverse, # We still consider the kinetics to be directly dependent - dlnk += self.dlnk_training + if 'surface' in source['Training'][0].lower(): + dlnk += self.dlnk_surf_training + else: + dlnk += self.dlnk_training elif 'Rate Rules' in source: family_label = source['Rate Rules'][0] source_dict = source['Rate Rules'][1] @@ -164,7 +194,8 @@ def get_uncertainty_value(self, source): rule_weights = [ruleTuple[-1] for ruleTuple in source_dict['rules']] training_weights = [trainingTuple[-1] for trainingTuple in source_dict['training']] - dlnk += self.dlnk_family ** 2 + dlnk += self.dlnk_family + N = len(rule_weights) + len(training_weights) if 'node_std_dev' in source_dict: # Handle autogen BM trees @@ -186,15 +217,19 @@ def get_uncertainty_value(self, source): # nonexactness contribution increases as N increases dlnk += np.log10(N + 1) * self.dlnk_nonexact - # Add the contributions from rules - dlnk += np.sum([weight * self.dlnk_rule for weight in rule_weights]) - # Add the contributions from training - # Even though these source from training reactions, we actually - # use the uncertainty for rate rules, since these are now approximations - # of the original reaction. We consider these to be independent of original the training - # parameters because the rate rules may be reversing the training reactions, - # which leads to more complicated dependence - dlnk += np.sum([weight * self.dlnk_rule for weight in training_weights]) + if 'surface' in family_label.lower(): + dlnk += np.sum([weight * self.dlnk_surf_rule for weight in rule_weights]) + dlnk += np.sum([weight * self.dlnk_surf_training for weight in training_weights]) + else: + # Add the contributions from rules + dlnk += np.sum([weight * self.dlnk_rule for weight in rule_weights]) + # Add the contributions from training + # Even though these source from training reactions, we actually + # use the uncertainty for rate rules, since these are now approximations + # of the original reaction. We consider these to be independent of original the training + # parameters because the rate rules may be reversing the training reactions, + # which leads to more complicated dependence + dlnk += np.sum([weight * self.dlnk_rule for weight in training_weights]) return dlnk @@ -211,17 +246,24 @@ def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=Non if 'Rate Rules' in source: family_label = source['Rate Rules'][0] if corr_family == family_label: + surface_family = 'surface' in family_label.lower() source_dict = source['Rate Rules'][1] rules = source_dict['rules'] training = source_dict['training'] if rules: for ruleEntry, weight in rules: if corr_param == ruleEntry: - return weight * self.dlnk_rule + if surface_family: + return weight * self.dlnk_surf_rule + else: + return weight * self.dlnk_rule if training: for ruleEntry, trainingEntry, weight in training: if corr_param == ruleEntry: - return weight * self.dlnk_rule + if surface_family: + return weight * self.dlnk_surf_rule + else: + return weight * self.dlnk_rule # Writing it this way in the function is not the most efficient, but makes it easy to use, and # testing a few if statements is not too costly @@ -230,6 +272,11 @@ def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=Non if corr_param == source['Library']: # Should be a single library reaction source return self.dlnk_library + elif corr_source_type == 'Surface_Library': + if 'Surface_Library' in source: + if corr_param == source['Surface_Library']: + # Should be a single library reaction source + return self.dlnk_surf_library elif corr_source_type == 'PDep': if 'PDep' in source: if corr_param == source['PDep']: @@ -238,7 +285,10 @@ def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=Non if 'Training' in source: # Should be a unique single training reaction if corr_param == source['Training']: - return self.dlnk_training + if 'surface' in source['Training'][0].lower(): + return self.dlnk_surf_training + else: + return self.dlnk_training elif corr_source_type == 'Estimation': # Return all the uncorrelated uncertainty associated with using an estimation scheme @@ -247,7 +297,9 @@ def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=Non source_dict = source['Rate Rules'][1] exact = source_dict['exact'] + family_label = source['Rate Rules'][0] dlnk = self.dlnk_family # Base uncorrelated uncertainty just from using rate rule estimation + # Additional uncertainty from using non-exact rate rule N = len(source_dict['rules']) + len(source_dict['training']) if not exact: @@ -371,6 +423,7 @@ def retrieve_saturated_species_from_list(self, species): # couldn't find saturated species in the model, try libraries new_spc = Species(molecule=[saturated_struct]) + new_spc.generate_resonance_structures() thermo = self.database.thermo.get_thermo_data_from_libraries(new_spc) if thermo is not None: @@ -386,6 +439,7 @@ def extract_sources_from_model(self): Must be done after loading model and database to work. """ self.species_sources_dict = {} + self.extra_species = [] for species in self.species_list: if species not in self.extra_species: source = self.database.thermo.extract_source_from_comments(species) @@ -397,13 +451,43 @@ def extract_sources_from_model(self): if 'Library' in source: # Use just the species index in self.species_list, for better shorter printouts when debugging source['Library'] = self.species_list.index(species) + if species.contains_surface_site(): + source['Surface_Library'] = source.pop('Library') if 'QM' in source: source['QM'] = self.species_list.index(species) elif len(source) == 2: - # The thermo has two sources, which indicates it's an HBI correction on top of a library or QM value. - # We must retrieve the original saturated molecule's thermo instead of using the radical species as the source of thermo - saturated_species, ignore_spc = self.retrieve_saturated_species_from_list(species) + # The thermo has two sources, which indicates it's an HBI correction on top of a library or QM value... + # OR it is an adsorption correction with gas-phase thermo from Library/QM + if 'ADS' in source: + if 'Library' in source: + # Use just the species index in self.species_list, for better shorter printouts when debugging + source['Library'] = self.species_list.index(species) + if 'QM' in source: + source['QM'] = self.species_list.index(species) + else: + # We must retrieve the original saturated molecule's thermo instead of using the radical species as the source of thermo + saturated_species, ignore_spc = self.retrieve_saturated_species_from_list(species) + + if ignore_spc: # this is saturated species that isn't in the actual model + self.extra_species.append(saturated_species) + + if 'Library' in source: + source['Library'] = self.species_list.index(saturated_species) + + if saturated_species.contains_surface_site(): + source['Surface_Library'] = source.pop('Library') # surface species library + radical correction + if 'QM' in source: + source['QM'] = self.species_list.index(saturated_species) + elif len(source) == 3: + # combination of adsorption correction, GAV (radical), and Library/ML + + assert species.contains_surface_site(), 'only surface species should have 3 sources: adsorption correction, GAV, library/ML' + + # retrieve the desorbed version of the surface species-- the thing the adsorption correction was applied to during thermo estimation + dummy_gas_species = Species() + dummy_gas_species.molecule = species.molecule[0].get_desorbed_molecules() + saturated_species, ignore_spc = self.retrieve_saturated_species_from_list(dummy_gas_species) if ignore_spc: # this is saturated species that isn't in the actual model self.extra_species.append(saturated_species) @@ -413,18 +497,20 @@ def extract_sources_from_model(self): if 'QM' in source: source['QM'] = self.species_list.index(saturated_species) else: - raise Exception('Source of thermo should not use more than two sources out of QM, Library, or GAV.') + raise Exception('Source of thermo should not use more than three sources out of ADS, QM, Library, or GAV.') self.species_sources_dict[species] = source self.reaction_sources_dict = {} for reaction in self.reaction_list: source = self.database.kinetics.extract_source_from_comments(reaction) - # Prep the source data + # Prep the source data # Consider any library or PDep reaction to be an independent parameter for now # and assign the source to the index of the reaction within self.reaction_list if 'Library' in source: source['Library'] = self.reaction_list.index(reaction) + if reaction.is_surface_reaction(): + source['Surface_Library'] = source.pop('Library') elif 'PDep' in source: source['PDep'] = self.reaction_list.index(reaction) elif 'Training' in source: @@ -465,7 +551,7 @@ def compile_all_sources(self): be performed after extract_sources_from_model function """ # Account for all the thermo sources - all_thermo_sources = {'GAV': {}, 'Library': set(), 'QM': set()} + all_thermo_sources = {'GAV': {}, 'Library': set(), 'QM': set(), 'ADS': {}, 'Surface_Library': set()} for source in self.species_sources_dict.values(): if 'GAV' in source: for groupType in source['GAV'].keys(): @@ -478,6 +564,15 @@ def compile_all_sources(self): all_thermo_sources['Library'].add(source['Library']) if 'QM' in source: all_thermo_sources['QM'].add(source['QM']) + if 'ADS' in source: + for ads_group in source['ADS'].keys(): + ads_group_entries = [groupTuple[0] for groupTuple in source['ADS'][ads_group]] + if ads_group not in all_thermo_sources['ADS']: + all_thermo_sources['ADS'][ads_group] = set(ads_group_entries) + else: + all_thermo_sources['ADS'][ads_group].update(ads_group_entries) + if 'Surface_Library' in source: + all_thermo_sources['Surface_Library'].add(source['Surface_Library']) # Convert to lists self.all_thermo_sources = {} @@ -486,9 +581,13 @@ def compile_all_sources(self): self.all_thermo_sources['GAV'] = {} for groupType in all_thermo_sources['GAV'].keys(): self.all_thermo_sources['GAV'][groupType] = list(all_thermo_sources['GAV'][groupType]) + self.all_thermo_sources['ADS'] = {} + for ads_group in all_thermo_sources['ADS'].keys(): + self.all_thermo_sources['ADS'][ads_group] = list(all_thermo_sources['ADS'][ads_group]) + self.all_thermo_sources['Surface_Library'] = list(all_thermo_sources['Surface_Library']) # Account for all the kinetics sources - all_kinetic_sources = {'Rate Rules': {}, 'Training': {}, 'Library': [], 'PDep': []} + all_kinetic_sources = {'Rate Rules': {}, 'Training': {}, 'Library': [], 'PDep': [], 'Surface_Library': []} for source in self.reaction_sources_dict.values(): if 'Training' in source: family_label = source['Training'][0] @@ -499,6 +598,8 @@ def compile_all_sources(self): all_kinetic_sources['Training'][family_label].add(training_entry) elif 'Library' in source: all_kinetic_sources['Library'].append(source['Library']) + elif 'Surface_Library' in source: + all_kinetic_sources['Surface_Library'].append(source['Surface_Library']) elif 'PDep' in source: all_kinetic_sources['PDep'].append(source['PDep']) elif 'Rate Rules' in source: @@ -523,6 +624,7 @@ def compile_all_sources(self): self.all_kinetic_sources = {} self.all_kinetic_sources['Library'] = all_kinetic_sources['Library'] + self.all_kinetic_sources['Surface_Library'] = all_kinetic_sources['Surface_Library'] self.all_kinetic_sources['PDep'] = all_kinetic_sources['PDep'] # Convert to lists self.all_kinetic_sources['Rate Rules'] = {} @@ -559,10 +661,23 @@ def assign_parameter_uncertainties(self, g_param_engine=None, k_param_engine=Non except IndexError: label = 'Library {}'.format(self.extra_species[source['Library'] - len(self.species_list)].to_chemkin()) dG[label] = pdG + if 'Surface_Library' in source: + pdG = g_param_engine.get_partial_uncertainty_value(source, 'Surface_Library', corr_param=source['Surface_Library']) + try: + label = 'Surface_Library {}'.format(self.species_list[source['Surface_Library']].to_chemkin()) + except IndexError: + label = 'Surface_Library {}'.format(self.extra_species[source['Surface_Library'] - len(self.species_list)].to_chemkin()) + dG[label] = pdG if 'QM' in source: pdG = g_param_engine.get_partial_uncertainty_value(source, 'QM', corr_param=source['QM']) label = 'QM {}'.format(self.species_list[source['QM']].to_chemkin()) dG[label] = pdG + if 'ADS' in source: + for adsGroupType, groupList in source['ADS'].items(): + for group, weight in groupList: + pdG = g_param_engine.get_partial_uncertainty_value(source, 'ADS', group, adsGroupType) + label = 'AdsorptionGroup({}) {}'.format(adsGroupType, group.label) + dG[label] = pdG if 'GAV' in source: for groupType, groupList in source['GAV'].items(): for group, weight in groupList: @@ -616,6 +731,11 @@ def assign_parameter_uncertainties(self, g_param_engine=None, k_param_engine=Non label = 'Library {}'.format(reaction.to_chemkin(self.species_list, kinetics=False)) dlnk[label] = dplnk + elif 'Surface_Library' in source: + dplnk = k_param_engine.get_partial_uncertainty_value(source, 'Surface_Library', source['Surface_Library']) + label = 'Library {}'.format(reaction.to_chemkin(self.species_list, kinetics=False)) + dlnk[label] = dplnk + elif 'Training' in source: dplnk = k_param_engine.get_partial_uncertainty_value(source, 'Training', source['Training']) family = source['Training'][0] diff --git a/test/rmgpy/tools/uncertaintyTest.py b/test/rmgpy/tools/uncertaintyTest.py index 85f10ca5a54..44f16f85486 100644 --- a/test/rmgpy/tools/uncertaintyTest.py +++ b/test/rmgpy/tools/uncertaintyTest.py @@ -127,7 +127,7 @@ def test_uncertainty_assignment(self): "RCCJ", } other_expected = {"ketene", "R"} - assert set(self.uncertainty.all_thermo_sources) == {"GAV", "Library", "QM"} + assert set(self.uncertainty.all_thermo_sources) == {"GAV", "Library", "QM", "ADS", "Surface_Library"} assert set(self.uncertainty.all_thermo_sources["GAV"]) == {"group", "radical", "other"} grp = set([e.label for e in self.uncertainty.all_thermo_sources["GAV"]["group"]]) rad = set([e.label for e in self.uncertainty.all_thermo_sources["GAV"]["radical"]]) @@ -151,7 +151,7 @@ def test_uncertainty_assignment(self): 'C/H3/Cs\\H3;C_rad/H2/Cs\\H\\Cs\\Cs|O', 'C/H3/Cs\\H3;Cd_Cd\\H2_pri_rad', } - assert set(self.uncertainty.all_kinetic_sources) == {"Rate Rules", "Training", "Library", "PDep"} + assert set(self.uncertainty.all_kinetic_sources) == {"Rate Rules", "Training", "Library", "PDep", "Surface_Library"} assert set(self.uncertainty.all_kinetic_sources["Rate Rules"].keys()) == {"Disproportionation", "H_Abstraction"} rr = set([e.label for e in self.uncertainty.all_kinetic_sources["Rate Rules"]["Disproportionation"]]) assert rr == Disproportionation_rr_expected From bd964d942154d8e2b1e494dc4325affd2f3128d6 Mon Sep 17 00:00:00 2001 From: Sevy Harris Date: Mon, 13 Apr 2026 12:13:17 -0400 Subject: [PATCH 04/10] fix uncertainty whitespace/linting errors --- rmgpy/tools/uncertainty.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/rmgpy/tools/uncertainty.py b/rmgpy/tools/uncertainty.py index 29ef3738c3a..eb40246131b 100644 --- a/rmgpy/tools/uncertainty.py +++ b/rmgpy/tools/uncertainty.py @@ -47,7 +47,7 @@ def __init__(self, dG_library=1.5, dG_QM=3.0, dG_GAV=1.5, dG_group=0.1, dG_ADS_c """ Initialize the different uncertainties dG_library, dG_QM, dG_GAV, and dG_other with set values in units of kcal/mol. - + We expect a uniform distribution for some species free energy G in [Gmin, Gmax]. dG = (Gmax-Gmin)/2 """ @@ -82,7 +82,7 @@ def get_uncertainty_value(self, source): def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=None, corr_group_type=None): """ Obtain the partial uncertainty dG/dG_corr*dG_corr, where dG_corr is the correlated parameter - + `corr_param` is the parameter identifier itself, which is a integer for QM and library parameters, or a string for group values `corr_source_type` is a string, being either 'Library', 'QM', 'GAV', 'ADS', or 'Estimation' `corr_group_type` is a string used only when the source type is 'GAV' and indicates grouptype @@ -135,11 +135,12 @@ def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=Non def get_uncertainty_factor(self, source): """ Retrieve the uncertainty factor f in kcal/mol when the source of the thermo of a species is given. - + This is equivalent to sqrt(3)*dG in a uniform uncertainty interval """ dG = self.get_uncertainty_value(source) f = np.sqrt(3) * dG + return f class KineticParameterUncertainty(object): @@ -151,7 +152,7 @@ def __init__(self, dlnk_library=0.5, dlnk_training=0.5, dlnk_pdep=2.0, dlnk_fami dlnk_rule=0.5, dlnk_surf_library=2.659, dlnk_surf_training=2.659, dlnk_surf_rule=2.659): """ Initialize the different uncertainties dlnk - + We expect a uniform distribution for some reaction kinetics about ln(k0) in [ln(kmin), ln(kmax)]. dlnk = (ln(kmax)-ln(kmin))/2 """ @@ -236,7 +237,7 @@ def get_uncertainty_value(self, source): def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=None, corr_family=None): """ Obtain the partial uncertainty dlnk/dlnk_corr*dlnk_corr, where dlnk_corr is the correlated parameter - + `corr_param` is the parameter identifier itself, which is the string identifier of the rate rule `corr_source_type` is a string, being either 'Rate Rules', 'Library', 'PDep', 'Training' or 'Estimation' `corr_family` is a string used only when the source type is 'Rate Rules' and indicates the family @@ -315,11 +316,12 @@ def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=Non def get_uncertainty_factor(self, source): """ Retrieve the uncertainty factor f when the source of the reaction kinetics are given. - + This is equivalent to sqrt(3)/ln(10) * dlnk in a uniform uncertainty interval """ dlnk = self.get_uncertainty_value(source) f = np.sqrt(3) / np.log(10) * dlnk + return f class Uncertainty(object): @@ -352,16 +354,16 @@ def __init__(self, species_list=None, reaction_list=None, output_directory=''): if not os.path.exists(self.output_directory): try: os.makedirs(self.output_directory) - except: + except OSError: raise Exception('Uncertainty output directory could not be created.') def load_database(self, kinetics_families='all', kinetics_depositories=None, thermo_libraries=None, reaction_libraries=None): """ This function loads a single copy of the RMGDatabase with full verbose averaging - of the rate rule to trace kinetics sources. - + of the rate rule to trace kinetics sources. + By default, this function loads all the kinetics families, only the training kinetics depository, - the primaryThermoLibrary, and no reaction libraries. + the primaryThermoLibrary, and no reaction libraries. """ from rmgpy.data.rmg import RMGDatabase from rmgpy import settings @@ -392,7 +394,7 @@ def load_model(self, chemkin_path, dictionary_path, transport_path=None, surface """ Load a RMG-generated model into the Uncertainty class `chemkin_path`: path to the chem_annotated.inp CHEMKIN mechanism - `dictionary_path`: path to the species_dictionary.txt file + `dictionary_path`: path to the species_dictionary.txt file `transport_path`: path to the tran.dat file (optional) Then create dictionaries stored in self.thermoGroups and self.rateRules @@ -469,7 +471,7 @@ def extract_sources_from_model(self): # We must retrieve the original saturated molecule's thermo instead of using the radical species as the source of thermo saturated_species, ignore_spc = self.retrieve_saturated_species_from_list(species) - if ignore_spc: # this is saturated species that isn't in the actual model + if ignore_spc: # this is saturated species that isn't in the actual model self.extra_species.append(saturated_species) if 'Library' in source: @@ -748,7 +750,7 @@ def sensitivity_analysis(self, initial_mole_fractions, sensitive_species, T, P, sensitivity_threshold=1e-3, number=10, fileformat='.png'): """ Run sensitivity analysis using the RMG solver in a single ReactionSystem object - + initial_mole_fractions is a dictionary with Species objects as keys and mole fraction initial conditions sensitive_species is a list of sensitive Species objects number is the number of top species thermo or reaction kinetics desired to be plotted @@ -817,7 +819,7 @@ def local_analysis(self, sensitive_species, reaction_system_index=0, correlated= output = {} for sens_species in sensitive_species: csvfile_path = os.path.join(self.output_directory, 'solver', - 'sensitivity_{0}_SPC_{1}.csv'.format(reaction_system_index+1, + 'sensitivity_{0}_SPC_{1}.csv'.format(reaction_system_index + 1, sens_species.index)) time, data_list = parse_csv_data(csvfile_path) # Assign uncertainties @@ -874,7 +876,7 @@ def local_analysis(self, sensitive_species, reaction_system_index=0, correlated= # Add the reaction index to the data label of the reaction uncertainties # data.index stores the physical index of the reaction + 1, so we convert it to the RMG index here for data in reaction_data_list: - data.label = 'k' + str(self.reaction_list[data.index-1].index) + ': ' + data.label.split()[-1] + data.label = 'k' + str(self.reaction_list[data.index - 1].index) + ': ' + data.label.split()[-1] if correlated: folder = os.path.join(self.output_directory, 'correlated') From fa00c573df0de8133990a71061384fec4dc5e295 Mon Sep 17 00:00:00 2001 From: Sevy Harris Date: Tue, 14 Apr 2026 09:33:34 -0400 Subject: [PATCH 05/10] add parse_thermo_comment tests for surface species --- test/rmgpy/data/thermoTest.py | 78 +++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/test/rmgpy/data/thermoTest.py b/test/rmgpy/data/thermoTest.py index 57d2fb8a5b7..0b5b1f6de38 100644 --- a/test/rmgpy/data/thermoTest.py +++ b/test/rmgpy/data/thermoTest.py @@ -571,6 +571,84 @@ def test_parse_thermo_comments(self): assert source["GAV"]["radical"][0][1] == 1 assert all(source['GAV']['group'][x][1] == 1 for x in [0, 1, 2, 3, 5]) assert source['GAV']['group'][4][1] == 2 + + # ------------------------------- Surface Examples --------------------------- + # Surface library only + OX = rmgpy.species.Species(smiles="O=*") + OX.thermo = rmgpy.thermo.NASA() + OX.thermo.comment = 'Thermo library: surfaceThermoPt111' + source = self.database.extract_source_from_comments(OX) + assert "Library" in source + assert source["Library"] == "surfaceThermoPt111" + + # Gas library + adsorption correction + CH2X = rmgpy.species.Species(smiles="[CH2]=*") + CH2X.thermo = rmgpy.thermo.NASA() + CH2X.thermo.comment = 'Gas phase thermo for CH2(T) from Thermo library: primaryThermoLibrary. Adsorption correction: + Thermo group additivity estimation:\nadsorptionPt111(C=*R2)' + source = self.database.extract_source_from_comments(CH2X) + assert "Library" in source + assert source["Library"] == "primaryThermoLibrary" + assert "ADS" in source + assert source['ADS']['adsorptionPt111'][0][0].label == 'C=*R2' + assert source['ADS']['adsorptionPt111'][0][1] == 1 # weight should be 1 + assert len(source['ADS']['adsorptionPt111']) == 1 # there should only be one adsorption contribution + + # GAV gas + adsorption correction + CO2X = rmgpy.species.Species(smiles="O=C=O.*") + CO2X.thermo = rmgpy.thermo.NASA() + CO2X.thermo.comment = 'Gas phase thermo for O=C=O from Thermo group additivity estimation: group(Cdd-OdOd). Adsorption correction: + Thermo group additivity estimation:\nadsorptionPt111((CR2)*)' + source = self.database.extract_source_from_comments(CO2X) + assert "GAV" in source + assert source["GAV"]["group"][0][0].label == "Cdd-OdOd" + assert source["GAV"]["group"][0][1] == 1 # weight should be 1 + assert len(source["GAV"]["group"]) == 1 # there should only be one GAV contribution + assert "ADS" in source + assert source['ADS']['adsorptionPt111'][0][0].label == '(CR2)*' + assert source['ADS']['adsorptionPt111'][0][1] == 1 # weight should be 1 + assert len(source['ADS']['adsorptionPt111']) == 1 # there should only be one adsorption contribution + + # Gas library + radical for HBI + adsorption correction + CHOX = rmgpy.species.Species(smiles="O=[CH]*") + CHOX.thermo = rmgpy.thermo.NASA() + CHOX.thermo.comment = 'Gas phase thermo for [CH]=O from Thermo library: primaryThermoLibrary + radical(HCdsJO). Adsorption correction: + Thermo group additivity estimation: adsorptionPt111(C-*R3)' + source = self.database.extract_source_from_comments(CHOX) + assert "Library" in source + assert source["Library"] == "primaryThermoLibrary" + assert "GAV" in source + assert source["GAV"]["radical"][0][0].label == "HCdsJO" + assert source["GAV"]["radical"][0][1] == 1 # weight should be 1 + assert len(source["GAV"]["radical"]) == 1 # there should only be one radical contribution + assert "ADS" in source + assert source['ADS']['adsorptionPt111'][0][0].label == 'C-*R3' + assert source['ADS']['adsorptionPt111'][0][1] == 1 # weight should be 1 + assert len(source['ADS']['adsorptionPt111']) == 1 # there should only be one adsorption contribution + + # Also test that the database is retrieving and assembling comments correctly + # Here we use the extremely limited test database already available in memory + OX_comment = self.database.get_thermo_data(OX).comment + OX.thermo.comment = OX_comment # set the comment to be the generated comment + source = self.database.extract_source_from_comments(OX) + assert "Library" in source + assert source["Library"] == "primaryThermoLibrary" + assert 'ADS' in source + assert source['ADS']['adsorptionPt111'][0][0].label == 'O=*' + assert source['ADS']['adsorptionPt111'][0][1] == 1 # weight should be 1 + assert len(source['ADS']['adsorptionPt111']) == 1 # there should only be one adsorption contribution + + # an example with a radical and a library and an adsorption correction + CH2CHCH_ads = rmgpy.species.Species(smiles="[CH]C=C.*") + CH2CHCH_ads.thermo = self.database.get_thermo_data(CH2CHCH_ads) + source = self.database.extract_source_from_comments(CH2CHCH_ads) + assert "Library" in source + assert source["Library"] == "DFT_QCI_thermo" + assert "GAV" in source + assert source["GAV"]["radical"][0][0].label == "AllylJ2_triplet" + assert source["GAV"]["radical"][0][1] == 1 # weight should be 1 + assert len(source["GAV"]["radical"]) == 1 # there should only be one radical contribution + assert "ADS" in source + assert source['ADS']['adsorptionPt111'][0][0].label == '(CR2CR)*' + assert source['ADS']['adsorptionPt111'][0][1] == 1 # weight should be 1 + assert len(source['ADS']['adsorptionPt111']) == 1 # there should only be one adsorption contribution def test_species_thermo_generation_hbi_library(self): """Test thermo generation for species objects for HBI correction on library value. From 5e7a171629782183cd93e6be5cbb919c63bd9ab5 Mon Sep 17 00:00:00 2001 From: Sevy Harris Date: Wed, 15 Apr 2026 18:14:42 -0400 Subject: [PATCH 06/10] add uncertainty tests for some surface species --- test/rmgpy/tools/uncertaintyTest.py | 44 +++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/test/rmgpy/tools/uncertaintyTest.py b/test/rmgpy/tools/uncertaintyTest.py index 44f16f85486..7d55e4ff503 100644 --- a/test/rmgpy/tools/uncertaintyTest.py +++ b/test/rmgpy/tools/uncertaintyTest.py @@ -30,6 +30,7 @@ import os +from arkane.input import species import numpy as np import rmgpy @@ -178,3 +179,46 @@ def test_uncertainty_assignment(self): [0.5, 1.5, 3.169924, 3.169924, 2.553605, 0.5, 2.0, 7.81, 7.81, 0.5], rtol=1e-4 ) + + def test_specific_species_uncertainties(self): + """ + Test uncertainties for a few specific examples + """ + + expected_results = { # order is (total_uncertainty, [group_names], [group_counts]) + 'CCCC': (1.9, ['Cs-CsCsHH', 'Cs-CsHHH'], [2, 2]), + 'CCCCCCCCCC': (2.5, ['Cs-CsCsHH', 'Cs-CsHHH'], [8, 2]), + 'CC(OO)CC': (2.1, ['O2s-OsCs', 'O2s-OsH', 'Cs-CsCsOsH', 'Cs-CsCsHH', 'Cs-CsHHH'], [1, 1, 1, 1, 2]), + 'C=NCC': (1.9, ['N3d-CdCs', 'Cs-(N3dCd)CsHH', 'Cs-CsHHH', 'Cd-N3dHH'], [1, 1, 1, 1]), + 'C=C': (1.7, ['Cds-CdsHH'], [2]), + 'C*': (10.018, ['CH3'], [1]), # Gas library + radical + adsorption correction + 'O=[CH]*': (8.618, ['Cds-OdHH', 'HCdsJO'], [1, 1]), # GAV + radical + adsorption correction + } + + uncertainty = rmgpy.tools.uncertainty.Uncertainty() + uncertainty.database = self.uncertainty.database # use the same database as the main test + new_species_list = [rmgpy.species.Species(smiles=spc) for spc in expected_results.keys()] + for spc in new_species_list: + spc.thermo = self.uncertainty.database.thermo.get_thermo_data(spc) + if not isinstance(spc.thermo, rmgpy.thermo.NASA): + spc.thermo = spc.thermo.to_nasa(Tmin=298, Tmax=3000, Tint=1000) + uncertainty.species_list = new_species_list + uncertainty.reaction_list = [] # no need to test kinetics here + + uncertainty.extract_sources_from_model() # this will populate the sources dict with the new species + uncertainty.assign_parameter_uncertainties() # this will assign the uncertainties based on the sources and the database + + for i, sp in enumerate(uncertainty.species_list): + source = uncertainty.species_sources_dict[sp] + + group_counts = [] + group_names = [] + for group_type, group_entries in source['GAV'].items(): + for group, count in group_entries: + group_names.append(group.label) + group_counts.append(count) + + result = expected_results[sp.smiles] + assert np.isclose(uncertainty.thermo_input_uncertainties[i], result[0], rtol=1e-4) + assert group_names == result[1] + assert group_counts == result[2] From acc67ab98373628bc4b9729ee6ee2fed5b4f753b Mon Sep 17 00:00:00 2001 From: Sevy Harris Date: Mon, 13 Apr 2026 12:22:19 -0400 Subject: [PATCH 07/10] make uncertainties add in quadrature --- rmgpy/tools/uncertainty.py | 50 ++++++++++++++--------------- test/rmgpy/tools/uncertaintyTest.py | 19 ++++++----- 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/rmgpy/tools/uncertainty.py b/rmgpy/tools/uncertainty.py index eb40246131b..533d2bd77c7 100644 --- a/rmgpy/tools/uncertainty.py +++ b/rmgpy/tools/uncertainty.py @@ -43,7 +43,7 @@ class ThermoParameterUncertainty(object): This class is an engine that generates the species uncertainty based on its thermo sources. """ - def __init__(self, dG_library=1.5, dG_QM=3.0, dG_GAV=1.5, dG_group=0.1, dG_ADS_correction=6.918, dG_surf_lib=6.918): + def __init__(self, dG_library=1.5, dG_QM=3.0, dG_GAV=1.5, dG_group=0.316, dG_ADS_correction=6.918, dG_surf_lib=6.918): """ Initialize the different uncertainties dG_library, dG_QM, dG_GAV, and dG_other with set values in units of kcal/mol. @@ -62,22 +62,22 @@ def get_uncertainty_value(self, source): """ Retrieve the uncertainty value in kcal/mol when the source of the thermo of a species is given. """ - dG = 0.0 + varG = 0.0 if 'Library' in source: - dG += self.dG_library + varG += self.dG_library ** 2 if 'Surface_Library' in source: - dG += self.dG_surf_lib + varG += self.dG_surf_lib ** 2 if 'QM' in source: - dG += self.dG_QM + varG += self.dG_QM ** 2 if 'GAV' in source: - dG += self.dG_GAV # Add a fixed uncertainty for the GAV method + varG += self.dG_GAV ** 2 # Add a fixed uncertainty for the GAV method for group_type, group_entries in source['GAV'].items(): group_weights = [groupTuple[-1] for groupTuple in group_entries] - dG += np.sum([weight * self.dG_group for weight in group_weights]) + varG += np.sum([weight ** 2 * self.dG_group ** 2 for weight in group_weights]) if 'ADS' in source: - dG += self.dG_ADS_correction # Add adsorption correction uncertainty + varG += self.dG_ADS_correction ** 2 # Add adsorption correction uncertainty - return dG + return np.sqrt(varG) def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=None, corr_group_type=None): """ @@ -170,24 +170,24 @@ def get_uncertainty_value(self, source): """ Retrieve the dlnk uncertainty when the source of the reaction kinetics are given """ - dlnk = 0.0 + varlnk = 0.0 if 'Library' in source: # Should be a single library reaction source - dlnk += self.dlnk_library + varlnk += self.dlnk_library ** 2 elif 'Surface_Library' in source: # Should be a single library reaction source - dlnk += self.dlnk_surf_library + varlnk += self.dlnk_surf_library ** 2 elif 'PDep' in source: # Should be a single pdep reaction source - dlnk += self.dlnk_pdep + varlnk += self.dlnk_pdep ** 2 elif 'Training' in source: # Should be a single training reaction # Although some training entries may be used in reverse, - # We still consider the kinetics to be directly dependent + # We still consider the kinetics to be directly dependent if 'surface' in source['Training'][0].lower(): - dlnk += self.dlnk_surf_training + varlnk += self.dlnk_surf_training ** 2 else: - dlnk += self.dlnk_training + varlnk += self.dlnk_training ** 2 elif 'Rate Rules' in source: family_label = source['Rate Rules'][0] source_dict = source['Rate Rules'][1] @@ -195,14 +195,14 @@ def get_uncertainty_value(self, source): rule_weights = [ruleTuple[-1] for ruleTuple in source_dict['rules']] training_weights = [trainingTuple[-1] for trainingTuple in source_dict['training']] - dlnk += self.dlnk_family + varlnk += self.dlnk_family ** 2 N = len(rule_weights) + len(training_weights) if 'node_std_dev' in source_dict: # Handle autogen BM trees if source_dict['node_std_dev'] < 0: raise ValueError('Invalid value for std dev of kinetics family rule node') - dlnk += source_dict['node_std_dev'] + varlnk += np.float_power(source_dict['node_std_dev'], 2.0) if source_dict['node_n_train'] is None: raise ValueError('Invalid number of training reactions for kinetics family rule node') N = source_dict['node_n_train'] @@ -211,28 +211,28 @@ def get_uncertainty_value(self, source): # every node template has its own fitted rate rule by definition, but here we use the # number of training reactions as an approximation of the node's specificity/generality # and add a penalty for being too general (large # of training reactions) - dlnk += np.log10(N + 1) * self.dlnk_nonexact + varlnk += (np.log10(N + 1) * self.dlnk_nonexact) ** 2 else: # Handle hand-made trees if not exact: # nonexactness contribution increases as N increases - dlnk += np.log10(N + 1) * self.dlnk_nonexact + varlnk += (np.log10(N + 1) * self.dlnk_nonexact) ** 2 if 'surface' in family_label.lower(): - dlnk += np.sum([weight * self.dlnk_surf_rule for weight in rule_weights]) - dlnk += np.sum([weight * self.dlnk_surf_training for weight in training_weights]) + varlnk += np.sum([weight ** 2 * self.dlnk_surf_rule ** 2 for weight in rule_weights]) + varlnk += np.sum([weight ** 2 * self.dlnk_surf_training ** 2 for weight in training_weights]) else: # Add the contributions from rules - dlnk += np.sum([weight * self.dlnk_rule for weight in rule_weights]) + varlnk += np.sum([weight ** 2 * self.dlnk_rule ** 2 for weight in rule_weights]) # Add the contributions from training # Even though these source from training reactions, we actually # use the uncertainty for rate rules, since these are now approximations # of the original reaction. We consider these to be independent of original the training # parameters because the rate rules may be reversing the training reactions, # which leads to more complicated dependence - dlnk += np.sum([weight * self.dlnk_rule for weight in training_weights]) + varlnk += np.sum([weight ** 2 * self.dlnk_rule ** 2 for weight in training_weights]) - return dlnk + return np.sqrt(varlnk) def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=None, corr_family=None): """ diff --git a/test/rmgpy/tools/uncertaintyTest.py b/test/rmgpy/tools/uncertaintyTest.py index 7d55e4ff503..bf04e5803bc 100644 --- a/test/rmgpy/tools/uncertaintyTest.py +++ b/test/rmgpy/tools/uncertaintyTest.py @@ -170,13 +170,12 @@ def test_uncertainty_assignment(self): np.testing.assert_allclose( thermo_unc, - [1.5, 1.5, 2.0, 1.9, 3.1, 1.5, 1.9, 2.0, 2.0, 1.9, 2.2, 1.9, 2.0, 1.5, 3.1, 1.9, 1.5, 2.0, 1.7, 1.8, 1.8, 1.9, 1.8, 1.9, 1.9], + [1.5, 1.5, 1.7745, 1.7461, 2.1447, 1.5, 1.6879, 1.7173, 1.7745, 1.7461, 1.7745, 1.7461, 1.7745, 1.5, 2.1447, 1.6879, 1.5, 1.7745, 1.6277, 1.6581, 1.6581, 1.6879, 1.5967, 1.6277, 1.6277], rtol=1e-4, ) np.testing.assert_allclose( kinetic_unc, - # the 7.81 value comes from the SIDT tree node uncertainty: 5.756 + non-exact penalty for N=1: log10(1+1) * 3.5 + family uncertainty: 1.0 - [0.5, 1.5, 3.169924, 3.169924, 2.553605, 0.5, 2.0, 7.81, 7.81, 0.5], + [0.5, 1.118, 1.9783, 1.9783, 1.5363, 0.5, 2.0, 5.9369, 5.9369, 0.5], rtol=1e-4 ) @@ -186,13 +185,13 @@ def test_specific_species_uncertainties(self): """ expected_results = { # order is (total_uncertainty, [group_names], [group_counts]) - 'CCCC': (1.9, ['Cs-CsCsHH', 'Cs-CsHHH'], [2, 2]), - 'CCCCCCCCCC': (2.5, ['Cs-CsCsHH', 'Cs-CsHHH'], [8, 2]), - 'CC(OO)CC': (2.1, ['O2s-OsCs', 'O2s-OsH', 'Cs-CsCsOsH', 'Cs-CsCsHH', 'Cs-CsHHH'], [1, 1, 1, 1, 2]), - 'C=NCC': (1.9, ['N3d-CdCs', 'Cs-(N3dCd)CsHH', 'Cs-CsHHH', 'Cd-N3dHH'], [1, 1, 1, 1]), - 'C=C': (1.7, ['Cds-CdsHH'], [2]), - 'C*': (10.018, ['CH3'], [1]), # Gas library + radical + adsorption correction - 'O=[CH]*': (8.618, ['Cds-OdHH', 'HCdsJO'], [1, 1]), # GAV + radical + adsorption correction + 'CCCC': (1.7460950718675086, ['Cs-CsCsHH', 'Cs-CsHHH'], [2, 2]), + 'CCCCCCCCCC': (3.006693865361088, ['Cs-CsCsHH', 'Cs-CsHHH'], [8, 2]), + 'CC(OO)CC': (1.7460950718675086, ['O2s-OsCs', 'O2s-OsH', 'Cs-CsCsOsH', 'Cs-CsCsHH', 'Cs-CsHHH'], [1, 1, 1, 1, 2]), + 'C=NCC': (1.6277051330016747, ['N3d-CdCs', 'Cs-(N3dCd)CsHH', 'Cs-CsHHH', 'Cd-N3dHH'], [1, 1, 1, 1]), + 'C=C': (1.6277051330016747, ['Cds-CdsHH'], [2]), + 'C*': (7.242829557569335, ['CH3'], [1]), # Gas library + radical + adsorption correction + 'O=[CH]*': (7.0928439994123655, ['Cds-OdHH', 'HCdsJO'], [1, 1]), # GAV + radical + adsorption correction } uncertainty = rmgpy.tools.uncertainty.Uncertainty() From 0c13f45fcc2e4307ce427e8d01ddbaab04472d68 Mon Sep 17 00:00:00 2001 From: Sevy Harris Date: Wed, 15 Apr 2026 17:34:12 -0400 Subject: [PATCH 08/10] Use dG_group=0.7159 kcal/mol in uncertainty tool This value was fitted to median group additivity errors for 564 species with quality thermo libraries See: https://github.com/comocheng/uncertainty_estimator/blob/main/uncertainty_tool_dev/demos/quadrature/specific_examples.ipynb --- rmgpy/tools/uncertainty.py | 2 +- test/rmgpy/tools/uncertaintyTest.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/rmgpy/tools/uncertainty.py b/rmgpy/tools/uncertainty.py index 533d2bd77c7..a9ffdb1ed35 100644 --- a/rmgpy/tools/uncertainty.py +++ b/rmgpy/tools/uncertainty.py @@ -43,7 +43,7 @@ class ThermoParameterUncertainty(object): This class is an engine that generates the species uncertainty based on its thermo sources. """ - def __init__(self, dG_library=1.5, dG_QM=3.0, dG_GAV=1.5, dG_group=0.316, dG_ADS_correction=6.918, dG_surf_lib=6.918): + def __init__(self, dG_library=1.5, dG_QM=3.0, dG_GAV=1.5, dG_group=0.7159, dG_ADS_correction=6.918, dG_surf_lib=6.918): """ Initialize the different uncertainties dG_library, dG_QM, dG_GAV, and dG_other with set values in units of kcal/mol. diff --git a/test/rmgpy/tools/uncertaintyTest.py b/test/rmgpy/tools/uncertaintyTest.py index bf04e5803bc..a949d21252d 100644 --- a/test/rmgpy/tools/uncertaintyTest.py +++ b/test/rmgpy/tools/uncertaintyTest.py @@ -170,7 +170,7 @@ def test_uncertainty_assignment(self): np.testing.assert_allclose( thermo_unc, - [1.5, 1.5, 1.7745, 1.7461, 2.1447, 1.5, 1.6879, 1.7173, 1.7745, 1.7461, 1.7745, 1.7461, 1.7745, 1.5, 2.1447, 1.6879, 1.5, 1.7745, 1.6277, 1.6581, 1.6581, 1.6879, 1.5967, 1.6277, 1.6277], + [1.5, 1.5, 2.61966, 2.51994, 2.23886, 1.5, 2.30761, 2.41611, 2.61966, 2.51994, 2.61966, 2.51994, 2.61966, 1.5, 2.23886, 2.30761, 1.5, 2.61966, 2.07366, 2.19376, 2.19376, 2.30761, 1.94616, 2.07366, 2.07366], rtol=1e-4, ) np.testing.assert_allclose( @@ -185,13 +185,13 @@ def test_specific_species_uncertainties(self): """ expected_results = { # order is (total_uncertainty, [group_names], [group_counts]) - 'CCCC': (1.7460950718675086, ['Cs-CsCsHH', 'Cs-CsHHH'], [2, 2]), - 'CCCCCCCCCC': (3.006693865361088, ['Cs-CsCsHH', 'Cs-CsHHH'], [8, 2]), - 'CC(OO)CC': (1.7460950718675086, ['O2s-OsCs', 'O2s-OsH', 'Cs-CsCsOsH', 'Cs-CsCsHH', 'Cs-CsHHH'], [1, 1, 1, 1, 2]), - 'C=NCC': (1.6277051330016747, ['N3d-CdCs', 'Cs-(N3dCd)CsHH', 'Cs-CsHHH', 'Cd-N3dHH'], [1, 1, 1, 1]), - 'C=C': (1.6277051330016747, ['Cds-CdsHH'], [2]), - 'C*': (7.242829557569335, ['CH3'], [1]), # Gas library + radical + adsorption correction - 'O=[CH]*': (7.0928439994123655, ['Cds-OdHH', 'HCdsJO'], [1, 1]), # GAV + radical + adsorption correction + 'CCCC': (2.5199409675625337, ['Cs-CsCsHH', 'Cs-CsHHH'], [2, 2]), + 'CCCCCCCCCC': (6.091048438487417, ['Cs-CsCsHH', 'Cs-CsHHH'], [8, 2]), + 'CC(OO)CC': (2.5199409675625337, ['O2s-OsCs', 'O2s-OsH', 'Cs-CsCsOsH', 'Cs-CsCsHH', 'Cs-CsHHH'], [1, 1, 1, 1, 2]), + 'C=NCC': (2.07365649035707, ['N3d-CdCs', 'Cs-(N3dCd)CsHH', 'Cs-CsHHH', 'Cd-N3dHH'], [1, 1, 1, 1]), + 'C=C': (2.07365649035707, ['Cds-CdsHH'], [2]), + 'C*': (7.271261019245562, ['CH3'], [1]), # Gas library + radical + adsorption correction + 'O=[CH]*': (7.150786643440007, ['Cds-OdHH', 'HCdsJO'], [1, 1]), # GAV + radical + adsorption correction } uncertainty = rmgpy.tools.uncertainty.Uncertainty() From a5bfb96e647f27b72a6bdc13b6d0a1ddf8ab5e66 Mon Sep 17 00:00:00 2001 From: Sevy Harris Date: Mon, 27 Apr 2026 16:35:57 -0400 Subject: [PATCH 09/10] update local uncertainty notebook to explain adding in quadrature --- ipython/local_uncertainty.ipynb | 55 +++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/ipython/local_uncertainty.ipynb b/ipython/local_uncertainty.ipynb index 9afc08d09fe..2ff32ca32be 100644 --- a/ipython/local_uncertainty.ipynb +++ b/ipython/local_uncertainty.ipynb @@ -114,11 +114,29 @@ "\n", "$$\\Delta G = \\frac{1}{\\sqrt{12}}(G_{max} - G_{min})$$\n", "\n", - "Several parameters are used to formulate $\\Delta G$. These are $\\Delta G_\\mathrm{library}$, $\\Delta G_\\mathrm{QM}$, $\\Delta G_\\mathrm{GAV}$, and $\\Delta _\\mathrm{group}$.\n", + "But first, to formulate $G$, we need to compile all the uncertainty sources. These are $ G_\\mathrm{library}$, $ G_\\mathrm{QM}$, $ G_\\mathrm{GAV}$, and $G _\\mathrm{group}$. We treat each as continuous random variable and add them to get the species uncertainty.\n", " \n", - "$$\\Delta G = \\delta_\\mathrm{library} \\Delta G_\\mathrm{library} + \\delta_\\mathrm{QM} \\Delta G_\\mathrm{QM} + \\delta_\\mathrm{GAV} \\left( \\Delta G_\\mathrm{GAV} + \\sum_{\\mathrm{group}\\; j} d_{j} \\Delta G_{\\mathrm{group},j} \\right)$$\n", + "$$G = \\delta_\\mathrm{library} G_\\mathrm{library} + \\delta_\\mathrm{QM} G_\\mathrm{QM} + \\delta_\\mathrm{GAV} \\left( G_\\mathrm{GAV} + \\sum_{\\mathrm{group}\\; j} d_{j} G_{\\mathrm{group},j} \\right)$$\n", + "\n", + "Here $\\delta$ is the Kronecker delta function which equals one if the species thermochemistry parameter contains the particular source type and $d_{j}$ is the degeneracy (number of appearances) of the thermo group used to construct the species thermochemistry in the group additivity method.\n", + "\n", + "To compute $\\Delta G$, we'll use the property that the variance of the sum of two random variables, $X$ and $Y$, is:\n", + "$$Var(X+Y)=Var(X)+Var(Y)+2Cov(X,Y)$$\n", + "\n", + "If we assume that each uncertainty source is uncorrelated with the others, then the covariance term drops out:\n", + "\n", + "$$Var(X+Y)=Var(X)+Var(Y), \\ \\ \\ \\ \\ X \\neq Y$$\n", + "\n", + "And we can compute the variance of $G$:\n", + "\n", + "$$Var(G) = \\delta_\\mathrm{library} Var(G_\\mathrm{library}) + \\delta_\\mathrm{QM} Var(G_\\mathrm{QM})+\\delta_\\mathrm{GAV} \\left(Var(G_\\mathrm{GAV}) +\\sum_{group\\ j} d_j^2 Var(G_\\mathrm{group\\ j}) \\right)$$\n", + "\n", + "\n", + "The standard deviation $\\Delta G$ is then the square root of that (this is an example of adding in quadrature):\n", + "$$\\Delta(G) = \\sqrt{\\delta_\\mathrm{library} Var(G_\\mathrm{library}) + \\delta_\\mathrm{QM} Var(G_\\mathrm{QM})+\\delta_\\mathrm{GAV} \\left(Var(G_\\mathrm{GAV}) +\\sum_{group\\ j} d_j^2 Var(G_\\mathrm{group\\ j}) \\right)}$$\n", + "\n", + "\n", "\n", - "where $\\delta$ is the Kronecker delta function which equals one if the species thermochemistry parameter contains the particular source type and $d_{j}$ is the degeneracy (number of appearances) of the thermo group used to construct the species thermochemistry in the group additivity method.\n", "\n", "### Kinetics Uncertainty\n", "\n", @@ -130,13 +148,20 @@ "\n", "$$\\Delta \\ln(k) = \\frac{1}{\\sqrt{12}}(\\ln k_{max} - \\ln k_{min})$$\n", "\n", - "The parameters used to formulate $\\Delta \\ln k$ are $\\Delta \\ln k_\\mathrm{library}$, $\\Delta \\ln k_\\mathrm{training}$, $\\Delta \\ln k_\\mathrm{pdep}$, $\\Delta \\ln k_\\mathrm{family}$, $\\Delta \\ln k_\\mathrm{non-exact}$, and $\\Delta \\ln k_\\mathrm{rule}$.\n", + "The sources used to formulate $ \\ln k$ are $ \\ln k_\\mathrm{library}$, $ \\ln k_\\mathrm{training}$, $ \\ln k_\\mathrm{pdep}$, $ \\ln k_\\mathrm{family}$, $ \\ln k_\\mathrm{non-exact}$, and $ \\ln k_\\mathrm{rule}$.\n", + "\n", + "For library, training, and pdep reactions, the kinetic uncertainty is assigned according to their uncertainty type. For kinetics estimated using RMG's rate rules, the following formula is used to calculate the uncertainty:\n", "\n", - "For library, training, and pdep reactions, the kinetic uncertainty is assigned according to their uncertainty type. For kinetics estimated using RMG's rate rules, the following formula is used to calculate the uncertainty:\n", + "$$ \\ln k_\\mathrm{rate\\; rules} = \\ln k_\\mathrm{family} + \\log_{10}(N+1) \\left(\\ln k_\\mathrm{non-exact}\\right) + \\sum_{\\mathrm{rule}\\; i} w_i \\ln k_{\\mathrm{rule},i}$$\n", "\n", - "$$\\Delta \\ln k_\\mathrm{rate\\; rules} = \\Delta\\ln k_\\mathrm{family} + \\log_{10}(N+1) \\left(\\Delta\\ln k_\\mathrm{non-exact}\\right) + \\sum_{\\mathrm{rule}\\; i} w_i \\Delta \\ln k_{\\mathrm{rule},i}$$\n", + "where N is the total number of rate rules used and $w_{i}$ is the weight of the rate rule in the averaging scheme for that kinetics estimate.\n", "\n", - "where N is the total number of rate rules used and $w_{i}$ is the weight of the rate rule in the averaging scheme for that kinetics estimate. " + "Once again we use the property of adding variances with the assumption that sources are uncorrelated to each other. The standard deviation then adds in quadrature.\n", + "\n", + "$$\\Delta\\ln k_\\mathrm{rate\\; rules} = \\sqrt{(\\Delta \\ln k_\\mathrm{family})^2 + \\left(\\log_{10}(N+1) \\right)^2\\left(\\Delta \\ln k_\\mathrm{non-exact}\\right)^2 + \\sum_{\\mathrm{rule}\\; i} w_i^2 (\\Delta \\ln k_{\\mathrm{rule},i})^2}$$\n", + "\n", + "\n", + "\n" ] }, { @@ -178,9 +203,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "result = uncertainty.local_analysis(sensitive_species, correlated=False, number=5, fileformat='.png')\n", @@ -190,9 +213,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "# Show the uncertainty plots\n", @@ -230,9 +251,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "uncertainty.assign_parameter_uncertainties(correlated=True)\n", @@ -243,9 +262,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "# Show the uncertainty plots\n", From 7b1d05caec45981ee10d7562aecc0f30db16e626 Mon Sep 17 00:00:00 2001 From: Sevy Harris Date: Thu, 23 Apr 2026 08:09:53 -0400 Subject: [PATCH 10/10] Add uncertainty tool export covariance functions This commit allows the user to export covariance matrices of a mechanism's estimated uncertainty as numpy arrays` --- rmgpy/tools/uncertainty.py | 184 +++++++++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) diff --git a/rmgpy/tools/uncertainty.py b/rmgpy/tools/uncertainty.py index a9ffdb1ed35..8b7cd18d282 100644 --- a/rmgpy/tools/uncertainty.py +++ b/rmgpy/tools/uncertainty.py @@ -29,6 +29,7 @@ import os import re +import pickle import numpy as np @@ -36,6 +37,7 @@ from rmgpy.species import Species from rmgpy.tools.data import GenericData from rmgpy.tools.plot import parse_csv_data, plot_sensitivity, ReactionSensitivityPlot, ThermoSensitivityPlot +from rmgpy.kinetics.surface import SurfaceArrheniusBEP, StickingCoefficientBEP, SurfaceArrheniusBEP class ThermoParameterUncertainty(object): @@ -345,6 +347,9 @@ def __init__(self, species_list=None, reaction_list=None, output_directory=''): self.all_kinetic_sources = None self.thermo_input_uncertainties = None self.kinetic_input_uncertainties = None + self.thermo_covariance_matrix = None + self.kinetic_covariance_matrix = None + self.overall_covariance_matrix = None self.output_directory = output_directory if output_directory else os.getcwd() # For extra species needed for correlated analysis but not in model @@ -897,6 +902,185 @@ def local_analysis(self, sensitive_species, reaction_system_index=0, correlated= return output + def get_thermo_covariance_matrix(self): + """ + Export the thermo covariance matrix as a numpy array + """ + assert self.thermo_input_uncertainties is not None, 'Must call assign_parameter_uncertainties first' + assert len(self.thermo_input_uncertainties) > 0, 'No thermodynamic parameters found' + if isinstance(self.thermo_input_uncertainties[0], np.float64): + print("""Warning -- parameter uncertainties assigned without correlations. +All off diagonals will be zero unless you call assign_parameter_uncertainties(correlated=True)""") + self.thermo_covariance_matrix = np.float_power(np.diag(self.thermo_input_uncertainties), 2.0) + return self.thermo_covariance_matrix + + self.thermo_covariance_matrix = np.zeros((len(self.species_list), len(self.species_list))) + + for i in range(len(self.species_list)): + for j in range(len(self.species_list)): + + # assuming only sources that match are correlated + for source_i in self.thermo_input_uncertainties[i].keys(): + if source_i in self.thermo_input_uncertainties[j].keys(): + self.thermo_covariance_matrix[i, j] += self.thermo_input_uncertainties[i][source_i] * self.thermo_input_uncertainties[j][source_i] + + return self.thermo_covariance_matrix + + def get_kinetic_covariance_matrix(self, k_param_engine=None): + """ + Export the kinetic covariance matrix as a numpy array + """ + assert self.kinetic_input_uncertainties is not None, 'Must call assign_parameter_uncertainties first' + assert len(self.kinetic_input_uncertainties) > 0, 'No kinetic parameters found' + if isinstance(self.kinetic_input_uncertainties[0], np.float64): + print("""Warning -- parameter uncertainties assigned without correlations. +All off diagonals will be zero unless you call assign_parameter_uncertainties(correlated=True)""") + self.kinetic_covariance_matrix = np.float_power(np.diag(self.kinetic_input_uncertainties), 2.0) + return self.kinetic_covariance_matrix + + if k_param_engine is None: + k_param_engine = KineticParameterUncertainty() + + self.kinetic_covariance_matrix = np.zeros((len(self.reaction_list), len(self.reaction_list))) + + # takes a while to load the family reaction maps + auto_gen_family_rxn_maps = {} + if self.all_kinetic_sources is None: + self.compile_all_sources() + for family in self.all_kinetic_sources['Rate Rules'].keys(): + if self.database.kinetics.families[family].auto_generated: + auto_gen_family_rxn_maps[family] = self.database.kinetics.families[family].get_reaction_matches( + thermo_database=self.database.thermo, + remove_degeneracy=True, + get_reverse=True, + exact_matches_only=False, + fix_labels=True + ) + + for i, reaction in enumerate(self.reaction_list): + source_dict_i = self.reaction_sources_dict[self.reaction_list[i]] + for j, other_reaction in enumerate(self.reaction_list): + # assuming only sources that match are correlated + source_dict_j = self.reaction_sources_dict[self.reaction_list[j]] + + for source_i in self.kinetic_input_uncertainties[i].keys(): + if source_i in self.kinetic_input_uncertainties[j].keys(): + self.kinetic_covariance_matrix[i, j] += self.kinetic_input_uncertainties[i][source_i] * self.kinetic_input_uncertainties[j][source_i] + else: + # no match in rules, but there may be overlap if they're SIDT trees using the same family + if 'Rate Rules' in source_dict_i.keys() and 'Rate Rules' in source_dict_j.keys(): + if source_dict_i['Rate Rules'][1]['autogenerated'] and source_dict_j['Rate Rules'][1]['autogenerated'] and \ + source_dict_i['Rate Rules'][0] == source_dict_j['Rate Rules'][0]: + # get #training reactions in overlap + family = source_dict_i['Rate Rules'][0] + node_name_i = source_dict_i['Rate Rules'][1]['template'][0].label + node_name_j = source_dict_j['Rate Rules'][1]['template'][0].label + rxns_i = auto_gen_family_rxn_maps[family][node_name_i] + rxns_j = auto_gen_family_rxn_maps[family][node_name_j] + + # count overlapping reactions: + overlap_count = 0 + for r_i in rxns_i: + if r_i in rxns_j: + overlap_count += 1 + + self.kinetic_covariance_matrix[i, j] += (overlap_count / len(rxns_i)) * (overlap_count / len(rxns_j)) * (k_param_engine.dlnk_rule ** 2.0) + + # check if a training reaction exactly matches a rate rule data entry + if 'Training' in source_dict_i.keys() and 'Rate Rules' in source_dict_j.keys(): + rate_rules_training_reactions = [t[1] for t in source_dict_j['Rate Rules'][1]['training']] + weights = [t[2] for t in source_dict_j['Rate Rules'][1]['training']] + training_reaction = source_dict_i['Training'][1] + for k in range(len(rate_rules_training_reactions)): + if rate_rules_training_reactions[k].item.is_isomorphic(training_reaction.item): + self.kinetic_covariance_matrix[i, j] += weights[k] * k_param_engine.dlnk_training * k_param_engine.dlnk_rule + elif 'Training' in source_dict_j.keys() and 'Rate Rules' in source_dict_i.keys(): + rate_rules_training_reactions = [t[1] for t in source_dict_i['Rate Rules'][1]['training']] + weights = [t[2] for t in source_dict_i['Rate Rules'][1]['training']] + training_reaction = source_dict_j['Training'][1] + for k in range(len(rate_rules_training_reactions)): + if rate_rules_training_reactions[k].item.is_isomorphic(training_reaction.item): + self.kinetic_covariance_matrix[i, j] += weights[k] * k_param_engine.dlnk_training * k_param_engine.dlnk_rule + + # Add in thermo correlations if both BEP + if isinstance(reaction.kinetics, (SurfaceArrheniusBEP, StickingCoefficientBEP)) and \ + isinstance(other_reaction.kinetics, (SurfaceArrheniusBEP, StickingCoefficientBEP)): + + alpha_i = reaction.kinetics.alpha.value_si + alpha_j = other_reaction.kinetics.alpha.value_si + + R = 8.314472 + T = 1000.0 + r1_sp_indices = [self.species_list.index(sp) for sp in reaction.reactants + reaction.products] + r1_coefficients = [-1 for x in reaction.reactants] + r1_coefficients.extend([1 for x in reaction.products]) + + r2_sp_indices = [self.species_list.index(sp) for sp in other_reaction.reactants + other_reaction.products] + r2_coefficients = [-1 for x in other_reaction.reactants] + r2_coefficients.extend([1 for x in other_reaction.products]) + for r1 in range(len(r1_sp_indices)): + for r2 in range(len(r2_sp_indices)): + + covH = self.thermo_covariance_matrix[r1_sp_indices[r1], r2_sp_indices[r2]] * 4184 * 4184 # convert from kcal/mol to J/mol + nu_i = r1_coefficients[r1] + nu_j = r2_coefficients[r2] + + self.kinetic_covariance_matrix[i, j] += nu_i * nu_j * alpha_i * alpha_j * covH / np.float_power(R * T, 2.0) + + return self.kinetic_covariance_matrix + + def get_overall_covariance_matrix(self): + # make combined thermo and kinetics covariance matrix, species first, then reactions + + assert self.kinetic_input_uncertainties is not None, 'Must call assign_parameter_uncertainties first' + assert len(self.kinetic_input_uncertainties) > 0, 'No kinetic parameters found' + assert self.thermo_covariance_matrix is not None, 'Must create thermo covariance matrix first' + assert self.kinetic_covariance_matrix is not None, 'Must create kinetics covariance matrix first' + + self.overall_covariance_matrix = np.zeros((len(self.species_list) + len(self.reaction_list), len(self.species_list) + len(self.reaction_list))) + self.overall_covariance_matrix[:len(self.species_list), :len(self.species_list)] = self.thermo_covariance_matrix + self.overall_covariance_matrix[len(self.species_list):, len(self.species_list):] = self.kinetic_covariance_matrix + + # fill in the covariance between reaction and species based on BEP relationships if applicable + for i in range(len(self.reaction_list)): + reaction = self.reaction_list[i] + + BEP = None + BEP_types = [SurfaceArrheniusBEP, StickingCoefficientBEP] + if isinstance(reaction.kinetics, tuple(BEP_types)): + BEP = reaction.kinetics + elif 'Rate Rules' not in self.reaction_sources_dict[reaction]: + pass # nothing to do here if kinetics doesn't depend on thermo through a BEP + elif self.reaction_sources_dict[reaction]['Rate Rules'][1]['rules'] and \ + isinstance(self.reaction_sources_dict[reaction]['Rate Rules'][1]['rules'][0][0].data, tuple(BEP_types)): + BEP = self.reaction_sources_dict[reaction]['Rate Rules'][1]['rules'][0][0].data + elif self.reaction_sources_dict[reaction]['Rate Rules'][1]['training'] and \ + isinstance(self.reaction_sources_dict[reaction]['Rate Rules'][1]['training'][0][0].data, tuple(BEP_types)): + BEP = self.reaction_sources_dict[reaction]['Rate Rules'][1]['training'][0][0].data + + if BEP: + alpha_i = BEP.alpha.value_si + + R = 8.314472 + T = 1000.0 + r1_sp_indices = [self.species_list.index(sp) for sp in reaction.reactants + reaction.products] + r1_coefficients = [-1 for x in reaction.reactants] + r1_coefficients.extend([1 for x in reaction.products]) + + for r1 in range(len(r1_sp_indices)): # loop over species in the reaction + for j in range(len(self.species_list)): # loop over all species + covH = self.thermo_covariance_matrix[r1_sp_indices[r1], j] * 4184 * 4184 # convert from kcal/mol to J/mol + nu_i = r1_coefficients[r1] + + self.overall_covariance_matrix[len(self.species_list) + i, j] += nu_i * alpha_i * covH / (R * T) / 4184 # convert back to kcal/mol + + # fill in the lower triangle by copying from the top + for i in range(len(self.reaction_list)): + for j in range(len(self.species_list)): + self.overall_covariance_matrix[j, len(self.species_list) + i] = self.overall_covariance_matrix[len(self.species_list) + i, j] + + return self.overall_covariance_matrix + def process_local_results(results, sensitive_species, number=10): """