diff --git a/pyomo/contrib/doe/doe.py b/pyomo/contrib/doe/doe.py index a13e6785bf4..9e3c9950835 100644 --- a/pyomo/contrib/doe/doe.py +++ b/pyomo/contrib/doe/doe.py @@ -52,11 +52,15 @@ import pyomo.environ as pyo from pyomo.contrib.doe.utils import ( - check_FIM, + check_matrix, compute_FIM_metrics, _SMALL_TOLERANCE_DEFINITENESS, + snake_traversal_grid_sampling, ) +from pyomo.contrib.parmest.utils.model_utils import update_model_from_suffix + + from pyomo.opt import SolverStatus @@ -75,6 +79,11 @@ class FiniteDifferenceStep(Enum): backward = "backward" +class DesignSpaceTraversal(Enum): + snake_traversal = "snake_traversal" + nested_for_loop = "nested_for_loop" + + class DesignOfExperiments: def __init__( self, @@ -748,6 +757,9 @@ def _sequential_FIM(self, model=None): if self.scale_nominal_param_value: scale_factor *= v + # TODO: scale by response values (i.e., measurement values) + # if self.scale_response_values: + # scale_factor /= measurement_vals_np[:, col_1].mean() # Calculate the column of the sensitivity matrix self.seq_jac[:, i] = ( measurement_vals_np[:, col_1] - measurement_vals_np[:, col_2] @@ -1750,7 +1762,8 @@ def check_model_FIM(self, model=None, FIM=None): ) ) - check_FIM(FIM) + # Check FIM is positive definite and symmetric + check_matrix(FIM) self.logger.info( "FIM provided matches expected dimensions from model " @@ -2005,6 +2018,378 @@ def compute_FIM_full_factorial( return self.fim_factorial_results + def compute_FIM_factorial( + self, + model=None, + design_vals: dict = None, + abs_change: list = None, + rel_change: list = None, + n_design_points: int = None, + method="sequential", + return_df=True, + traversal_scheme="snake_traversal", + file_name: str = None, + ): + """Perform a simulation-based factorial exploration of the experimental input + space (a.k.a. a ``grid search`` / ``parameter sweep``) to evaluate how FIM + metrics vary over the design space. + + Exactly one design-space specification mode must be provided: + + 1. ``design_vals``: + Explicit values for selected design variables (string names). + 2. ``n_design_points``: + Uniform ``np.linspace(lb, ub, n_design_points)`` values for each design + variable using model bounds. + 3. ``abs_change`` and/or ``rel_change``: + Step-based values for each design variable using model bounds and: + ``delta_i = lb_i * rel_change[i] + abs_change[i]``. + Values are generated from lower to upper bound in steps of ``delta_i``. + + Parameters + ---------- + model : DoE model, optional + Reserved for API compatibility. A fresh labeled model clone is built + internally for the factorial exploration. + design_vals : dict, optional + Mapping of design variable names (strings) to 1D array-like values: + ``{"var_name": [values...]}``. Keys must be a subset of + ``model.experiment_inputs`` names. Variables not included are fixed at + their model values. + abs_change : list, optional + Absolute change for each design variable. Used only when + ``design_vals`` and ``n_design_points`` are not provided. Length must + match the number of design variables in ``model.experiment_inputs``. + If provided without ``rel_change``, ``rel_change`` is treated as zeros. + rel_change : list, optional + Relative change for each design variable. Used only when + ``design_vals`` and ``n_design_points`` are not provided. Length must + match the number of design variables in ``model.experiment_inputs``. + If provided without ``abs_change``, ``abs_change`` is treated as zeros. + n_design_points : int, optional + Number of equally spaced points per design variable generated from + bounds via ``np.linspace``. Requires finite lower and upper bounds. + method : str, optional + Method used for FIM computation. Options: ``"kaug"`` and + ``"sequential"``. Default: ``"sequential"``. + return_df : bool, optional + If ``True``, print results as a pandas DataFrame. + traversal_scheme : str, optional + Which scheme to use for initializing the design variables. + Options are ``"snake_traversal"`` and ``"nested_for_loop"``. + If ``"snake_traversal"`` is used, the design variables will be initialized in a + snake-like pattern where only one design variables change at a time. + If ``"nested_for_loop"`` is used, the design variables will be initialized using a + nested for loop. Default: "snake_traversal" + file_name : str, optional + If provided, save results to ``.json``. + + Returns + ------- + factorial_results: dict + A dictionary containing the results of the factorial design with the + following keys: + - keys of model's experiment_inputs + - "log10 D-opt": list of D-optimality values + - "log10 A-opt": list of A-optimality values + - "log10 pseudo A-opt": list of pseudo A-optimality values + - "log10 E-opt": list of E-optimality values + - "log10 ME-opt": list of ME-optimality values + - "eigval_min": list of minimum eigenvalues + - "eigval_max": list of maximum eigenvalues + - "det_FIM": list of determinants of the FIM + - "trace_cov": list of traces of covariance matrix + - "trace_FIM": list of traces of the FIM + - "solve_time": list of solve times + - "total_points": total number of points in the factorial design + - "success_count": number of successful runs + - "failure_count": number of failed runs + - "FIM_all": list of all FIMs computed for each point in the factorial + """ + + # Start timer + sp_timer = TicTocTimer() + sp_timer.tic(msg=None) + self.logger.info("Beginning Factorial Design.") + + # Make new model for factorial design + self.factorial_model = self.experiment.get_labeled_model( + **self.get_labeled_model_args + ).clone() + model = self.factorial_model + + # Group the mutually exclusive arguments for passing design values + # abs_change, and rel_change can be provided together or separately + num_des_provided = ( + (design_vals is not None) + + (n_design_points is not None) + + (abs_change is not None or rel_change is not None) + ) + + # Check the count and raise an error if more than one was provided + if num_des_provided > 1: + raise ValueError( + "Please provide only one of the following arguments: " + "`design_vals`, `n_design_points`, or `abs_change` and/or `rel_change`." + ) + + # Optional: Check if at least one was provided, if that's a requirement + if num_des_provided == 0: + raise ValueError( + "At least one of the following arguments: " + "`design_vals`, `n_design_points`, or `abs_change` and/or `rel_change`" + " must be provided." + ) + + if design_vals is not None: + # Check whether the design_ranges keys are in the experiment_inputs + design_keys = set(design_vals.keys()) + map_keys = set([k.name for k in model.experiment_inputs.keys()]) + if not design_keys.issubset(map_keys): + incorrect_given_keys = design_keys - map_keys + suggested_keys = map_keys - design_keys + raise ValueError( + f"design_values keys: {incorrect_given_keys} are incorrect." + f"The keys should be from the following keys: {suggested_keys}." + ) + + # Get the design map keys that match the design_values keys + design_map_keys = [ + k + for k in model.experiment_inputs.keys() + if k.name in design_vals.keys() + ] + # This ensures that the order of the design_values keys matches the order of the + # design_map_keys so that design_point can be constructed correctly in the loop. + design_values = [design_vals[k.name] for k in design_map_keys] + + # Create a temporary suffix to pass in `update_model_from_suffix` + design_suff = pyo.Suffix(direction=pyo.Suffix.LOCAL) + design_suff.update((k, None) for k in design_map_keys) + + else: + design_keys = [k for k in model.experiment_inputs.keys()] + if abs_change: + if len(abs_change) != len(design_keys): + raise ValueError( + "`abs_change` must have the same length of " + f"`{len(design_keys)}` as `design_keys`." + ) + + if rel_change: + if len(rel_change) != len(design_keys): + raise ValueError( + "`rel_change` must have the same length of " + f"`{len(design_keys)}` as `design_keys`." + ) + + # if either abs_change or rel_change is not provided, set it to list of + # zeros + if abs_change or rel_change: + if not abs_change: + abs_change = [0] * len(design_keys) + elif not rel_change: + rel_change = [0] * len(design_keys) + + design_values = [] + # loop over design keys and generate design values + for i, comp in enumerate(design_keys): + lb = comp.lb + ub = comp.ub + # Check if the component has finite lower and upper bounds + if lb is None or ub is None: + raise ValueError( + f"{comp.name} does not have a lower or upper bound." + ) + + if abs_change is None and rel_change is None: + des_val = np.linspace(lb, ub, n_design_points) + + # if abs_change and/or rel_change is provided, generate design values + # using the formula: + # change_in_value = lower_bound * rel_change + abs_change + elif abs_change or rel_change: + des_val = [] + # Calculate the change in value, delta value + del_val = comp.lb * rel_change[i] + abs_change[i] + if del_val == 0: + raise ValueError( + f"Design variable {comp.name} has no change in value - " + "check abs_change and rel_change values." + ) + val = lb + while val <= ub: + des_val.append(val) + val += del_val + + else: + raise ValueError( + "Unexpected error in generating design values. Please check the" + " input parameters." + ) + + design_values.append(des_val) + + # generate the factorial points based on the initialization scheme + try: + scheme_enum = DesignSpaceTraversal(traversal_scheme) + except ValueError: + raise ValueError( + f"{traversal_scheme=} is not recognized. " + "Please use one of the following: " + f"{[e.value for e in DesignSpaceTraversal]}" + ) + + if scheme_enum == DesignSpaceTraversal.snake_traversal: + factorial_points = snake_traversal_grid_sampling(*design_values) + elif scheme_enum == DesignSpaceTraversal.nested_for_loop: + factorial_points = product(*design_values) + else: + raise ValueError( + f"{traversal_scheme=} is not recognized. " + "Please use one of the following: " + f"{[e.value for e in DesignSpaceTraversal]}" + ) + + factorial_points_list = list(factorial_points) + + factorial_results = {k.name: [] for k in model.experiment_inputs.keys()} + factorial_results.update( + { + "log10 D-opt": [], + "log10 A-opt": [], + "log10 pseudo A-opt": [], + "log10 E-opt": [], + "log10 ME-opt": [], + "eigval_min": [], + "eigval_max": [], + "det_FIM": [], + "trace_cov": [], + "trace_FIM": [], + "solve_time": [], + } + ) + + success_count = 0 + failure_count = 0 + total_points = len(factorial_points_list) + + # save the FIM for each point in the factorial design + self.n_parameters = len(model.unknown_parameters) + FIM_all = np.zeros((total_points, self.n_parameters, self.n_parameters)) + + time_set = [] + curr_point = 1 # Initial current point + for design_point in factorial_points_list: + if design_vals is not None: + update_model_from_suffix(design_suff, design_point) + else: + update_model_from_suffix(model.experiment_inputs, design_point) + + # Timing and logging objects + self.logger.info(f"=======Iteration Number: {curr_point} =======") + iter_timer = TicTocTimer() + iter_timer.tic(msg=None) + + try: + curr_point = success_count + failure_count + 1 + self.logger.info(f"This is run {curr_point} out of {total_points}.") + self.compute_FIM(model=model, method=method) + success_count += 1 + # iteration time + iter_t = iter_timer.toc(msg=None) + time_set.append(iter_t) + + # More logging + self.logger.info( + f"The code has run for {round(sum(time_set), 2)} seconds." + ) + self.logger.info( + "Estimated remaining time: %s seconds", + round( + sum(time_set) / (curr_point) * (total_points - curr_point + 1), + 2, + ), + ) + except: + self.logger.warning( + ":::::::::::Warning: Cannot converge this run.::::::::::::" + ) + failure_count += 1 + self.logger.warning("failed count:", failure_count) + + self._computed_FIM = np.zeros(self.prior_FIM.shape) + + iter_t = iter_timer.toc(msg=None) + time_set.append(iter_t) + + FIM = self._computed_FIM + + # Save FIM for the current design point + FIM_all[curr_point - 1, :, :] = FIM + + # Compute and record metrics on FIM + ( + det_FIM, + trace_cov, + trace_FIM, + E_vals, + _, + D_opt, + A_opt, + pseudo_A_opt, + E_opt, + ME_opt, + ) = compute_FIM_metrics(FIM) + + for k in model.experiment_inputs.keys(): + factorial_results[k.name].append(pyo.value(k)) + + factorial_results["log10 D-opt"].append(D_opt) + factorial_results["log10 A-opt"].append(A_opt) + factorial_results["log10 pseudo A-opt"].append(pseudo_A_opt) + factorial_results["log10 E-opt"].append(E_opt) + factorial_results["log10 ME-opt"].append(ME_opt) + factorial_results["eigval_min"].append(np.min(E_vals)) + factorial_results["eigval_max"].append(np.max(E_vals)) + factorial_results["det_FIM"].append(det_FIM) + factorial_results["trace_cov"].append(trace_cov) + factorial_results["trace_FIM"].append(trace_FIM) + factorial_results["solve_time"].append(time_set[-1]) + + factorial_results.update( + { + "total_points": total_points, + "success_count": success_count, + "failure_count": failure_count, + "FIM_all": FIM_all.tolist(), # Save all FIMs + } + ) + self.factorial_results = factorial_results + + # Set pandas DataFrame options + if return_df: + # exclude matrix-like entries, and unchanging results from the DataFrame + exclude_keys = {"total_points", "success_count", "failure_count", "FIM_all"} + dict_for_df = { + k: v for k, v in factorial_results.items() if k not in exclude_keys + } + res_df = pd.DataFrame(dict_for_df) + print("\n\n=========Factorial results===========") + print("Total points:", total_points) + print("Success count:", success_count) + print("Failure count:", failure_count) + print("\n") + print(res_df) + + # Save the results to a json file based on the file_name provided + if file_name is not None: + with open(file_name + ".json", "w") as f: + json.dump(self.factorial_results, f, indent=4) + self.logger.info(f"Results saved to {file_name}.json.") + + return self.factorial_results + # TODO: Overhaul plotting functions to not use strings # TODO: Make the plotting functionalities work for >2 design features def draw_factorial_figure( @@ -2022,20 +2407,26 @@ def draw_factorial_figure( log_scale=True, ): """ - Extract results needed for drawing figures from - the results dictionary provided by the - ``compute_FIM_full_factorial`` function. + Extract and filter factorial results for 1D/2D sensitivity plots. + + Results can come from either ``compute_FIM_full_factorial`` or + ``compute_FIM_factorial``. Design variable names are specified as strings + to align with the factorial APIs. Draw either the 1D sensitivity curve or 2D heatmap. Parameters ---------- - results: dictionary, results dictionary from ``compute_FIM_full_factorial`` - default: None (self.fim_factorial_results) - sensitivity_design_variables: a list, design variable names to draw sensitivity - fixed_design_variables: a dictionary, keys are the design variable names to be - fixed, values are the value of it to be fixed. - full_design_variable_names: a list, all the design variables in the problem. + results: dictionary/pandas.DataFrame + Results dictionary (or DataFrame) from factorial evaluation. + Default: ``self.fim_factorial_results``. + sensitivity_design_variables: iterable of strings + Design variable names used as sensitivity degrees of freedom. + At most two are currently supported. + fixed_design_variables: dict + Dictionary of fixed design variable names and values. + full_design_variable_names: list of strings + All design variable names in the problem. title_text: a string, name for the figure xlabel_text: a string, label for the x-axis of the figure default: last design variable name @@ -2071,7 +2462,9 @@ def draw_factorial_figure( "include all the design variable names." ) - des_names = full_design_variable_names + des_names = list(full_design_variable_names) + if any(not isinstance(name, str) for name in des_names): + raise ValueError("``full_design_variable_names`` entries must be strings.") # Inputs must exist for the function to do anything # TODO: Put in a default value function????? @@ -2081,23 +2474,63 @@ def draw_factorial_figure( if fixed_design_variables is None: raise ValueError("``fixed_design_variables`` must be included.") - # Check that the provided design variables are within the results object - check_des_vars = True - for k, v in fixed_design_variables.items(): - check_des_vars *= k in ([k2 for k2, v2 in results.items()]) - check_sens_vars = True - for k in sensitivity_design_variables: - check_sens_vars *= k in [k2 for k2, v2 in results.items()] + if isinstance(sensitivity_design_variables, str): + sensitivity_design_variables = [sensitivity_design_variables] + else: + try: + sensitivity_design_variables = list(sensitivity_design_variables) + except TypeError: + raise TypeError( + "``sensitivity_design_variables`` must be an iterable of strings." + ) + + if any(not isinstance(name, str) for name in sensitivity_design_variables): + raise ValueError("Sensitivity design variable names must be strings.") - if not check_des_vars: + if len(set(sensitivity_design_variables)) != len(sensitivity_design_variables): + raise ValueError( + "Sensitivity design variables contain duplicates. Please provide " + "unique design variable names." + ) + + if not isinstance(fixed_design_variables, dict): + raise ValueError("``fixed_design_variables`` must be a dictionary.") + if any(not isinstance(name, str) for name in fixed_design_variables.keys()): + raise ValueError("Fixed design variable names must be strings.") + + des_name_set = set(des_names) + missing_from_results = des_name_set - set(results.keys()) + if missing_from_results: + raise ValueError( + "The following design variable names were not found in the results " + f"object: {missing_from_results}." + ) + + invalid_fixed_names = set(fixed_design_variables.keys()) - des_name_set + if invalid_fixed_names: raise ValueError( "Fixed design variables do not all appear " - "in the results object keys." + "in the results object keys. " + f"Invalid entries: {invalid_fixed_names}. " + f"Valid design variable names are: {des_name_set}." ) - if not check_sens_vars: + + invalid_sensitivity_names = set(sensitivity_design_variables) - des_name_set + if invalid_sensitivity_names: raise ValueError( "Sensitivity design variables do not all appear " - "in the results object keys." + "in the results object keys. " + f"Invalid entries: {invalid_sensitivity_names}. " + f"Valid design variable names are: {des_name_set}." + ) + + overlap_names = set(sensitivity_design_variables).intersection( + fixed_design_variables.keys() + ) + if overlap_names: + raise ValueError( + "Design variables cannot be both sensitivity and fixed variables. " + f"Overlapping names: {overlap_names}." ) # TODO: Make it possible to plot pair-wise sensitivities for all variables diff --git a/pyomo/contrib/doe/tests/test_doe_errors.py b/pyomo/contrib/doe/tests/test_doe_errors.py index 10165aa6267..f4de4b46704 100644 --- a/pyomo/contrib/doe/tests/test_doe_errors.py +++ b/pyomo/contrib/doe/tests/test_doe_errors.py @@ -219,8 +219,7 @@ def test_reactor_check_bad_prior_negative_eigenvalue(self): with self.assertRaisesRegex( ValueError, - r"FIM provided is not positive definite. It has one or more " - r"negative eigenvalue\(s\) less than -{:.1e}".format( + r"Matrix provided is not positive definite. It has one or more negative eigenvalue\(s\) less than -{:.1e}".format( _SMALL_TOLERANCE_DEFINITENESS ), ): @@ -244,7 +243,7 @@ def test_reactor_check_bad_prior_not_symmetric(self): with self.assertRaisesRegex( ValueError, - "FIM provided is not symmetric using absolute tolerance {}".format( + "Matrix provided is not symmetric using absolute tolerance {}".format( _SMALL_TOLERANCE_SYMMETRY ), ): diff --git a/pyomo/contrib/doe/tests/test_utils.py b/pyomo/contrib/doe/tests/test_utils.py index a6f6cb38a23..76b721d77ec 100644 --- a/pyomo/contrib/doe/tests/test_utils.py +++ b/pyomo/contrib/doe/tests/test_utils.py @@ -6,13 +6,20 @@ # Solutions of Sandia, LLC, the U.S. Government retains certain rights in this # software. This software is distributed under the 3-clause BSD License. # ____________________________________________________________________________________ -from pyomo.common.dependencies import numpy as np, numpy_available +from pyomo.common.dependencies import ( + numpy as np, + numpy_available, + pandas as pd, + pandas_available, +) import pyomo.common.unittest as unittest from pyomo.contrib.doe.utils import ( - check_FIM, + check_matrix, compute_FIM_metrics, get_FIM_metrics, + snake_traversal_grid_sampling, + compute_correlation_matrix as compcorr, _SMALL_TOLERANCE_DEFINITENESS, _SMALL_TOLERANCE_SYMMETRY, _SMALL_TOLERANCE_IMG, @@ -20,45 +27,49 @@ @unittest.skipIf(not numpy_available, "Numpy is not available") +@unittest.skipIf(not pandas_available, "Pandas is not available") class TestUtilsFIM(unittest.TestCase): - """Test the check_FIM() from utils.py.""" + """Test the check_matrix() from utils.py.""" - def test_check_FIM_valid(self): + # TODO: add tests when `check_pos_def = False` is used in check_matrix() + def test_check_matrix_valid(self): """Test case where the FIM is valid (square, positive definite, symmetric).""" FIM = np.array([[4, 1], [1, 3]]) try: - check_FIM(FIM) + check_matrix(FIM) except ValueError as e: self.fail(f"Unexpected error: {e}") - def test_check_FIM_non_square(self): + def test_check_matrix_non_square(self): """Test case where the FIM is not square.""" FIM = np.array([[4, 1], [1, 3], [2, 1]]) - with self.assertRaisesRegex(ValueError, "FIM must be a square matrix"): - check_FIM(FIM) + with self.assertRaisesRegex( + ValueError, "argument mat must be a 2D square matrix" + ): + check_matrix(FIM) - def test_check_FIM_non_positive_definite(self): + def test_check_matrix_non_positive_definite(self): """Test case where the FIM is not positive definite.""" FIM = np.array([[1, 0], [0, -2]]) with self.assertRaisesRegex( ValueError, - "FIM provided is not positive definite. It has one or more negative " + "Matrix provided is not positive definite. It has one or more negative " + r"eigenvalue\(s\) less than -{:.1e}".format( _SMALL_TOLERANCE_DEFINITENESS ), ): - check_FIM(FIM) + check_matrix(FIM) - def test_check_FIM_non_symmetric(self): + def test_check_matrix_non_symmetric(self): """Test case where the FIM is not symmetric.""" FIM = np.array([[4, 1], [0, 3]]) with self.assertRaisesRegex( ValueError, - "FIM provided is not symmetric using absolute tolerance {}".format( + "Matrix provided is not symmetric using absolute tolerance {}".format( _SMALL_TOLERANCE_SYMMETRY ), ): - check_FIM(FIM) + check_matrix(FIM) """Test the compute_FIM_metrics() from utils.py.""" @@ -153,6 +164,142 @@ def test_get_FIM_metrics(self): fim_metrics["log10(Modified E-Optimality)"], expected['ME_opt'] ) + def test_snake_traversal_grid_sampling_errors(self): + # Test the error handling with lists + list_2d_bad = [[1, 2, 3], [4, 5, 6]] + with self.assertRaises(ValueError) as cm: + list(snake_traversal_grid_sampling(list_2d_bad)) + self.assertEqual( + str(cm.exception), "Argument at position 0 is not 1D. Got shape (2, 3)." + ) + + list_2d_wrong_shape_bad = [[1, 2, 3], [4, 5, 6, 7]] + with self.assertRaises(ValueError) as cm: + list(snake_traversal_grid_sampling(list_2d_wrong_shape_bad)) + self.assertEqual( + str(cm.exception), "Argument at position 0 is not 1D array-like." + ) + + # Test the error handling with tuples + tuple_2d_bad = ((1, 2, 3), (4, 5, 6)) + with self.assertRaises(ValueError) as cm: + list(snake_traversal_grid_sampling(tuple_2d_bad)) + self.assertEqual( + str(cm.exception), "Argument at position 0 is not 1D. Got shape (2, 3)." + ) + + tuple_2d_wrong_shape_bad = ((1, 2, 3), (4, 5, 6, 7)) + with self.assertRaises(ValueError) as cm: + list(snake_traversal_grid_sampling(tuple_2d_wrong_shape_bad)) + self.assertEqual( + str(cm.exception), "Argument at position 0 is not 1D array-like." + ) + + # Test the error handling with numpy arrays + array_2d_bad = np.array([[1, 2, 3], [4, 5, 6]]) + with self.assertRaises(ValueError) as cm: + list(snake_traversal_grid_sampling(array_2d_bad)) + self.assertEqual( + str(cm.exception), "Argument at position 0 is not 1D. Got shape (2, 3)." + ) + + def test_snake_traversal_grid_sampling_values(self): + # Test with lists + # Test with a single list + list1 = [1, 2, 3] + result_list1 = list(snake_traversal_grid_sampling(list1)) + expected_list1 = [(1,), (2,), (3,)] + self.assertEqual(result_list1, expected_list1) + + # Test with two lists + list2 = [4, 5, 6] + result_list2 = list(snake_traversal_grid_sampling(list1, list2)) + expected_list2 = [ + (1, 4), + (1, 5), + (1, 6), + (2, 6), + (2, 5), + (2, 4), + (3, 4), + (3, 5), + (3, 6), + ] + self.assertEqual(result_list2, expected_list2) + + # Test with three lists + list3 = [7, 8] + result_list3 = list(snake_traversal_grid_sampling(list1, list2, list3)) + expected_list3 = [ + (1, 4, 7), + (1, 4, 8), + (1, 5, 8), + (1, 5, 7), + (1, 6, 7), + (1, 6, 8), + (2, 6, 8), + (2, 6, 7), + (2, 5, 7), + (2, 5, 8), + (2, 4, 8), + (2, 4, 7), + (3, 4, 7), + (3, 4, 8), + (3, 5, 8), + (3, 5, 7), + (3, 6, 7), + (3, 6, 8), + ] + self.assertEqual(result_list3, expected_list3) + + # Test with tuples + tuple1 = (1, 2, 3) + result_tuple1 = list(snake_traversal_grid_sampling(tuple1)) + tuple2 = (4, 5, 6) + result_tuple2 = list(snake_traversal_grid_sampling(tuple1, tuple2)) + tuple3 = (7, 8) + result_tuple3 = list(snake_traversal_grid_sampling(tuple1, tuple2, tuple3)) + self.assertEqual(result_tuple1, expected_list1) + self.assertEqual(result_tuple2, expected_list2) + self.assertEqual(result_tuple3, expected_list3) + + # Test with numpy arrays + array1 = np.array([1, 2, 3]) + array2 = np.array([4, 5, 6]) + array3 = np.array([7, 8]) + result_array1 = list(snake_traversal_grid_sampling(array1)) + result_array2 = list(snake_traversal_grid_sampling(array1, array2)) + result_array3 = list(snake_traversal_grid_sampling(array1, array2, array3)) + self.assertEqual(result_array1, expected_list1) + self.assertEqual(result_array2, expected_list2) + self.assertEqual(result_array3, expected_list3) + + # Test with mixed types(List, Tuple, numpy array) + result_mixed = list(snake_traversal_grid_sampling(list1, tuple2, array3)) + self.assertEqual(result_mixed, expected_list3) + + # TODO: Add more tests as needed + def test_compute_correlation_matrix(self): + # Create a sample covariance matrix + covariance_matrix = np.array([[4, 2], [2, 3]]) + var_name = ["X1", "X2"] + + # Compute the correlation matrix + correlation_matrix = compcorr(covariance_matrix, var_name, significant_digits=3) + + # Expected correlation matrix + expected_correlation_matrix = pd.DataFrame( + [[1.0, 0.577], [0.577, 1.0]], index=var_name, columns=var_name + ) + + # Check if the computed correlation matrix matches the expected one + pd.testing.assert_frame_equal( + correlation_matrix, + expected_correlation_matrix, + check_exact=False, + atol=1e-6, + ) + if __name__ == "__main__": unittest.main() diff --git a/pyomo/contrib/doe/utils.py b/pyomo/contrib/doe/utils.py index c85fbbe4c0b..7dce85aec54 100644 --- a/pyomo/contrib/doe/utils.py +++ b/pyomo/contrib/doe/utils.py @@ -26,7 +26,12 @@ import pyomo.environ as pyo -from pyomo.common.dependencies import numpy as np, numpy_available +from pyomo.common.dependencies import ( + numpy as np, + numpy_available, + pandas as pd, + pandas_available, +) from pyomo.core.base.param import ParamData from pyomo.core.base.var import VarData @@ -91,41 +96,43 @@ def rescale_FIM(FIM, param_vals): return scaled_FIM -def check_FIM(FIM): +def check_matrix(mat): """ - Checks that the FIM is square, positive definite, and symmetric. + Checks that the matrix is square, positive definite, and symmetric. Parameters ---------- - FIM: 2D numpy array representing the FIM + mat: 2D numpy array representing the matrix Returns ------- None, but will raise error messages as needed """ - # Check that the FIM is a square matrix - if FIM.shape[0] != FIM.shape[1]: - raise ValueError("FIM must be a square matrix") + # Check that the matrix is a square matrix + if mat.ndim != 2 or mat.shape[0] != mat.shape[1]: + raise ValueError("argument mat must be a 2D square matrix") - # Compute the eigenvalues of the FIM - evals = np.linalg.eigvals(FIM) + # Compute the eigenvalues of the matrix + evals = np.linalg.eigvals(mat) - # Check if the FIM is positive definite + # Check if the matrix is positive definite if np.min(evals) < -_SMALL_TOLERANCE_DEFINITENESS: raise ValueError( - "FIM provided is not positive definite. It has one or more negative " + "Matrix provided is not positive definite. It has one or more negative " + "eigenvalue(s) less than -{:.1e}".format(_SMALL_TOLERANCE_DEFINITENESS) ) - # Check if the FIM is symmetric - if not np.allclose(FIM, FIM.T, atol=_SMALL_TOLERANCE_SYMMETRY): + # Check if the matrix is symmetric + if not np.allclose(mat, mat.T, atol=_SMALL_TOLERANCE_SYMMETRY): raise ValueError( - "FIM provided is not symmetric using absolute tolerance {}".format( + "Matrix provided is not symmetric using absolute tolerance {}".format( _SMALL_TOLERANCE_SYMMETRY ) ) +# TODO: probably can merge compute_FIM_metrics() and get_FIM_metrics() in a single + +# TODO: function with an argument (e.g., return_dict = True) to compute the metrics # Functions to compute FIM metrics def compute_FIM_metrics(FIM): """ @@ -161,7 +168,7 @@ def compute_FIM_metrics(FIM): """ # Check whether the FIM is square, positive definite, and symmetric - check_FIM(FIM) + check_matrix(FIM) # Compute FIM metrics det_FIM = np.linalg.det(FIM) @@ -266,3 +273,155 @@ def get_FIM_metrics(FIM): "log10(E-Optimality)": E_opt, "log10(Modified E-Optimality)": ME_opt, } + + +def snake_traversal_grid_sampling(*array_like_args): + """ + Generates a multi-dimensional pattern for an arbitrary number of 1D array-like arguments. + This pattern is useful for generating patterns for sensitivity analysis when we want + to change one variable at a time. This function uses recursion and acts as a generator. + + Parameters + ---------- + *array_like_args: A variable number of 1D array-like objects as arguments. + + Yields + ------ + A tuple representing points in the snake pattern. + """ + # throw an error if the array_like_args are not array-like or all 1D + for i, arg in enumerate(array_like_args): + try: + arr_np = np.asarray(arg) + except Exception: + raise ValueError(f"Argument at position {i} is not 1D array-like.") + + if arr_np.ndim != 1: + raise ValueError( + f"Argument at position {i} is not 1D. Got shape {arr_np.shape}." + ) + + # The main logic is in a nested recursive helper function. + def _generate_recursive(depth, index_sum): + # Base case: If we've processed all lists, we're at the end of a path. + if depth == len(array_like_args): + yield () + return + + current_list = array_like_args[depth] + + # Determine the iteration direction based on the sum of parent indices. + # This is the mathematical rule for the zigzag. + is_forward = index_sum % 2 == 0 + iterable = current_list if is_forward else reversed(current_list) + + # Enumerate to get the index `i` for the *next* recursive call's sum. + for i, value in enumerate(iterable): + # Recur for the next list, updating the index_sum. + for sub_pattern in _generate_recursive(depth + 1, index_sum + i): + # Prepend the current value to the results from deeper levels. + yield (value,) + sub_pattern + + # Start the recursion at the first list (depth 0) with an initial sum of 0. + yield from _generate_recursive(depth=0, index_sum=0) + + +def compute_correlation_matrix( + covariance_matrix, var_name: list = None, significant_digits=None +): + """ + Computes the correlation matrix from a covariance matrix. + + Parameters + ---------- + covariance_matrix : numpy.ndarray or pandas.DataFrame + 2D square covariance matrix. If a DataFrame is provided and ``var_name`` + is not given, DataFrame column names are used for both rows and columns + in the output. Column names must be unique. + var_name : list, optional + Variable names used for both rows and columns in the output correlation + matrix. Length must match matrix dimension and names must be unique. + If omitted and ``covariance_matrix`` is a DataFrame, DataFrame column names + are used. If omitted and ``covariance_matrix`` is a NumPy array, an unlabeled + NumPy array is returned. + significant_digits : int, optional + Number of significant digits for rounding entries of the correlation matrix. + Default: None (no rounding). + + Returns + ------- + pandas.DataFrame/numpy.ndarray + If `var_name` is provided, returns a pandas DataFrame with the correlation matrix + and the specified variable names as both index and columns. If `var_name` is not + provided, returns: + - a pandas DataFrame when the covariance input is a DataFrame, + - a numpy array when the covariance input is a NumPy array. + """ + # Support square pandas.DataFrame input while preserving label information. + # Use duck typing to avoid hard dependency on pandas type checks here. + is_dataframe_like = ( + hasattr(covariance_matrix, "to_numpy") + and hasattr(covariance_matrix, "index") + and hasattr(covariance_matrix, "columns") + ) + + if is_dataframe_like: + cov_columns = list(covariance_matrix.columns) + if len(set(cov_columns)) != len(cov_columns): + raise ValueError( + "For DataFrame covariance_matrix input, column labels must be unique." + ) + covariance_matrix_np = covariance_matrix.to_numpy() + else: + covariance_matrix_np = np.asarray(covariance_matrix) + cov_columns = None + + # Check if covariance matrix is symmetric and square + check_matrix(covariance_matrix_np) + + if var_name is not None: + if isinstance(var_name, str): + raise ValueError( + "`var_name` must be a list-like collection of names, not a string." + ) + var_name = list(var_name) + if len(var_name) != covariance_matrix_np.shape[0]: + raise ValueError( + "Length of var_name must match the number of rows/columns in the " + "covariance matrix." + ) + if len(set(var_name)) != len(var_name): + raise ValueError("Entries in `var_name` must be unique.") + elif is_dataframe_like: + var_name = cov_columns + + if not np.all(np.isfinite(covariance_matrix_np)): + raise ValueError("Covariance matrix contains non-finite values.") + + std_dev = np.sqrt(np.diag(covariance_matrix_np)) + if np.any(std_dev == 0): + raise ValueError( + "Covariance matrix contains one or more zero variances; correlation " + "entries are undefined." + ) + + std_dev_matrix = np.outer(std_dev, std_dev) + correlation_matrix = covariance_matrix_np / std_dev_matrix + + # Build labeled DataFrame output only when names are available. + if var_name is not None: + if not pandas_available: + raise RuntimeError( + "pandas is required to return labeled correlation matrices." + ) + corr_output = pd.DataFrame(correlation_matrix, index=var_name, columns=var_name) + else: + corr_output = correlation_matrix + + if significant_digits is None: + return corr_output + + if not isinstance(significant_digits, int) or significant_digits < 0: + raise ValueError("`significant_digits` must be a non-negative integer or None.") + + return corr_output.round(significant_digits)