diff --git a/pocomc/flow.py b/pocomc/flow.py index ec4e00b..3581251 100644 --- a/pocomc/flow.py +++ b/pocomc/flow.py @@ -238,11 +238,11 @@ def fit(self, weights = weights[rand_indx] if noise is not None: - min_dists = torch.empty(n_samples) - for i in range(n_samples): - min_dist = torch.linalg.norm(x[i] - x, axis=1) - min_dists[i] = torch.min(min_dist[min_dist > 0.0]) - mean_min_dist = torch.mean(min_dist) + dists = torch.cdist(x, x) + # Mask out the diagonal + dists.fill_diagonal_(float('inf')) + min_dists = torch.min(dists, dim=1).values + mean_min_dist = torch.mean(min_dists) if validation_split > 0.0: x_train = x[:int(validation_split * n_samples)] diff --git a/pocomc/mcmc.py b/pocomc/mcmc.py index 2455f26..9774538 100644 --- a/pocomc/mcmc.py +++ b/pocomc/mcmc.py @@ -1,7 +1,6 @@ import numpy as np import torch -from .tools import numpy_to_torch, torch_to_numpy, flow_numpy_wrapper from .student import fit_mvstud @torch.no_grad() @@ -44,7 +43,7 @@ def preconditioned_pcn(state_dict: dict, log_like = function_dict.get('loglike') log_prior = function_dict.get('logprior') scaler = function_dict.get('scaler') - flow = flow_numpy_wrapper(function_dict.get('flow')) + flow = function_dict.get('flow') geometry = function_dict.get('theta_geometry') # Get MCMC options @@ -56,16 +55,17 @@ def preconditioned_pcn(state_dict: dict, # Get number of particles and parameters/dimensions n_walkers, n_dim = x.shape - # Transform u to theta - theta, logdetj_flow = flow.forward(u) + # PyTorch variables + u_t = torch.tensor(u, dtype=torch.float32) + theta_t, logdetj_flow_t = flow.forward(u_t) + logdetj_flow_t = -logdetj_flow_t - - mu = geometry.t_mean - cov = geometry.t_cov + mu_t = torch.tensor(geometry.t_mean, dtype=torch.float32) + cov_t = torch.tensor(geometry.t_cov, dtype=torch.float32) nu = geometry.t_nu - inv_cov = np.linalg.inv(cov) - chol_cov = np.linalg.cholesky(cov) + inv_cov_t = torch.linalg.inv(cov_t) + chol_cov_t = torch.linalg.cholesky(cov_t) logp2_val = np.mean(logl + logp) cnt = 0 @@ -74,18 +74,20 @@ def preconditioned_pcn(state_dict: dict, while True: i += 1 - diff = theta - mu - s = np.empty(n_walkers) - for k in range(n_walkers): - s[k] = 1./np.random.gamma((n_dim + nu) / 2, 2.0/(nu + np.dot(diff[k],np.dot(inv_cov,diff[k])))) + diff_t = theta_t - mu_t + quad_form_t = torch.einsum('ki,ij,kj->k', diff_t, inv_cov_t, diff_t) + scale_gamma_t = 2.0 / (nu + quad_form_t) - # Propose new points in theta space - theta_prime = np.empty((n_walkers, n_dim)) - for k in range(n_walkers): - theta_prime[k] = mu + (1.0 - sigma ** 2.0) ** 0.5 * diff[k] + sigma * np.sqrt(s[k]) * np.dot(chol_cov, np.random.randn(n_dim)) + gamma_dist = torch.distributions.Gamma((n_dim + nu) / 2, 1.0 / scale_gamma_t) + s_t = 1.0 / gamma_dist.sample() - # Transform to u space - u_prime, logdetj_flow_prime = flow.inverse(theta_prime) + randn_term_t = torch.randn(n_walkers, n_dim) @ chol_cov_t.T + theta_prime_t = mu_t + (1.0 - sigma ** 2.0) ** 0.5 * diff_t + sigma * torch.sqrt(s_t)[:, None] * randn_term_t + + u_prime_t, logdetj_flow_prime_t = flow.inverse(theta_prime_t) + + # Convert to numpy for scaler and likelihood + u_prime = u_prime_t.numpy().astype(np.float64) # Transform to x space x_prime, logdetj_prime = scaler.inverse(u_prime) @@ -95,6 +97,7 @@ def preconditioned_pcn(state_dict: dict, x_prime = scaler.apply_boundary_conditions_x(x_prime) u_prime = scaler.forward(x_prime, check_input=False) x_prime, logdetj_prime = scaler.inverse(u_prime) + u_prime_t = torch.tensor(u_prime, dtype=torch.float32) # Compute finite mask finite_mask_logdetj_prime = np.isfinite(logdetj_prime) @@ -121,12 +124,14 @@ def preconditioned_pcn(state_dict: dict, n_calls += np.sum(finite_mask) # Compute Metropolis factors - diff_prime = theta_prime-mu - A = np.empty(n_walkers) - B = np.empty(n_walkers) - for k in range(n_walkers): - A[k] = -(n_dim+nu)/2*np.log(1+np.dot(diff_prime[k],np.dot(inv_cov,diff_prime[k]))/nu) - B[k] = -(n_dim+nu)/2*np.log(1+np.dot(diff[k],np.dot(inv_cov,diff[k]))/nu) + diff_prime_t = theta_prime_t - mu_t + quad_form_prime_t = torch.einsum('ki,ij,kj->k', diff_prime_t, inv_cov_t, diff_prime_t) + A = -(n_dim + nu) / 2 * np.log(1 + quad_form_prime_t.numpy() / nu) + B = -(n_dim + nu) / 2 * np.log(1 + quad_form_t.numpy() / nu) + + logdetj_flow_prime = logdetj_flow_prime_t.numpy() + logdetj_flow = logdetj_flow_t.numpy() + alpha = np.minimum( np.ones(n_walkers), np.exp(logl_prime * beta - logl * beta + logp_prime - logp + logdetj_prime - logdetj + logdetj_flow_prime - logdetj_flow - A + B) @@ -138,11 +143,13 @@ def preconditioned_pcn(state_dict: dict, mask = u_rand < alpha # Accept new points - theta[mask] = theta_prime[mask] + mask_t = torch.from_numpy(mask) + theta_t[mask_t] = theta_prime_t[mask_t] + u_t[mask_t] = u_prime_t[mask_t] + logdetj_flow_t[mask_t] = logdetj_flow_prime_t[mask_t] u[mask] = u_prime[mask] x[mask] = x_prime[mask] logdetj[mask] = logdetj_prime[mask] - logdetj_flow[mask] = logdetj_flow_prime[mask] logl[mask] = logl_prime[mask] logp[mask] = logp_prime[mask] if have_blobs: @@ -153,7 +160,7 @@ def preconditioned_pcn(state_dict: dict, #sigma = np.minimum(sigma + 1 / (i + 1)**0.5 * (np.mean(alpha) - 0.234), 0.99) # Adapt mean parameter using diminishing adaptation - mu = mu + 1.0 / (i + 1.0) * (np.mean(theta, axis=0) - mu) + mu_t = mu_t + 1.0 / (i + 1.0) * (torch.mean(theta_t, axis=0) - mu_t) # Update progress bar if available if progress_bar is not None: @@ -222,7 +229,7 @@ def preconditioned_rwm(state_dict: dict, log_like = function_dict.get('loglike') log_prior = function_dict.get('logprior') scaler = function_dict.get('scaler') - flow = flow_numpy_wrapper(function_dict.get('flow')) + flow = function_dict.get('flow') geometry = function_dict.get('theta_geometry') # Get MCMC options @@ -234,11 +241,13 @@ def preconditioned_rwm(state_dict: dict, # Get number of particles and parameters/dimensions n_walkers, n_dim = x.shape - cov = geometry.normal_cov - chol = np.linalg.cholesky(cov) + cov_t = torch.tensor(geometry.normal_cov, dtype=torch.float32) + chol_t = torch.linalg.cholesky(cov_t) - # Transform u to theta - theta, logdetj_flow = flow.forward(u) + # PyTorch variables + u_t = torch.tensor(u, dtype=torch.float32) + theta_t, logdetj_flow_t = flow.forward(u_t) + logdetj_flow_t = -logdetj_flow_t logp2_val = np.mean(logl + logp + logdetj) cnt = 0 @@ -248,12 +257,13 @@ def preconditioned_rwm(state_dict: dict, i += 1 # Propose new points in theta space - theta_prime = np.empty((n_walkers, n_dim)) - for k in range(n_walkers): - theta_prime[k] = theta[k] + sigma * np.dot(chol, np.random.randn(n_dim)) + randn_term_t = torch.randn(n_walkers, n_dim) @ chol_t.T + theta_prime_t = theta_t + sigma * randn_term_t # Transform to u space - u_prime, logdetj_flow_prime = flow.inverse(theta_prime) + u_prime_t, logdetj_flow_prime_t = flow.inverse(theta_prime_t) + + u_prime = u_prime_t.numpy().astype(np.float64) # Transform to x space x_prime, logdetj_prime = scaler.inverse(u_prime) @@ -263,6 +273,7 @@ def preconditioned_rwm(state_dict: dict, x_prime = scaler.apply_boundary_conditions_x(x_prime) u_prime = scaler.forward(x_prime, check_input=False) x_prime, logdetj_prime = scaler.inverse(u_prime) + u_prime_t = torch.tensor(u_prime, dtype=torch.float32) # Compute finite mask finite_mask_logdetj_prime = np.isfinite(logdetj_prime) @@ -288,6 +299,9 @@ def preconditioned_rwm(state_dict: dict, # Update likelihood call counter n_calls += np.sum(finite_mask) + logdetj_flow_prime = logdetj_flow_prime_t.numpy() + logdetj_flow = logdetj_flow_t.numpy() + # Compute Metropolis factors alpha = np.minimum( np.ones(n_walkers), @@ -300,11 +314,13 @@ def preconditioned_rwm(state_dict: dict, mask = u_rand < alpha # Accept new points - theta[mask] = theta_prime[mask] + mask_t = torch.from_numpy(mask) + theta_t[mask_t] = theta_prime_t[mask_t] + u_t[mask_t] = u_prime_t[mask_t] + logdetj_flow_t[mask_t] = logdetj_flow_prime_t[mask_t] u[mask] = u_prime[mask] x[mask] = x_prime[mask] logdetj[mask] = logdetj_prime[mask] - logdetj_flow[mask] = logdetj_flow_prime[mask] logl[mask] = logl_prime[mask] logp[mask] = logp_prime[mask] if have_blobs: @@ -407,14 +423,13 @@ def pcn(state_dict: dict, i += 1 diff = u - mu - s = np.empty(n_walkers) - for k in range(n_walkers): - s[k] = 1./np.random.gamma((n_dim + nu) / 2, 2.0/(nu + np.dot(diff[k],np.dot(inv_cov,diff[k])))) + quad_form = np.einsum('ki,ij,kj->k', diff, inv_cov, diff) + scale_gamma = 2.0 / (nu + quad_form) + s = 1.0 / np.random.gamma((n_dim + nu) / 2, scale_gamma, size=n_walkers) # Propose new points in u space - u_prime = np.empty((n_walkers, n_dim)) - for k in range(n_walkers): - u_prime[k] = mu + (1.0 - sigma ** 2.0) ** 0.5 * diff[k] + sigma * np.sqrt(s[k]) * np.dot(chol_cov, np.random.randn(n_dim)) + randn_term = np.random.randn(n_walkers, n_dim) @ chol_cov.T + u_prime = mu + (1.0 - sigma ** 2.0) ** 0.5 * diff + sigma * np.sqrt(s)[:, None] * randn_term # Transform to x space x_prime, logdetj_prime = scaler.inverse(u_prime) @@ -451,11 +466,10 @@ def pcn(state_dict: dict, # Compute Metropolis factors diff_prime = u_prime - mu - A = np.empty(n_walkers) - B = np.empty(n_walkers) - for k in range(n_walkers): - A[k] = -(n_dim+nu)/2*np.log(1+np.dot(diff_prime[k],np.dot(inv_cov,diff_prime[k]))/nu) - B[k] = -(n_dim+nu)/2*np.log(1+np.dot(diff[k],np.dot(inv_cov,diff[k]))/nu) + quad_form_prime = np.einsum('ki,ij,kj->k', diff_prime, inv_cov, diff_prime) + A = -(n_dim + nu) / 2 * np.log(1 + quad_form_prime / nu) + B = -(n_dim + nu) / 2 * np.log(1 + quad_form / nu) + alpha = np.minimum( np.ones(n_walkers), np.exp(logl_prime * beta - logl * beta + logp_prime - logp + logdetj_prime - logdetj - A + B) @@ -566,9 +580,8 @@ def rwm(state_dict: dict, i += 1 # Propose new points in theta space - u_prime = np.empty((n_walkers, n_dim)) - for k in range(n_walkers): - u_prime[k] = u[k] + sigma * np.dot(chol, np.random.randn(n_dim)) + randn_term = np.random.randn(n_walkers, n_dim) @ chol.T + u_prime = u + sigma * randn_term # Transform to x space x_prime, logdetj_prime = scaler.inverse(u_prime) diff --git a/pocomc/sampler.py b/pocomc/sampler.py index 7c7f2ff..7b2d3d4 100644 --- a/pocomc/sampler.py +++ b/pocomc/sampler.py @@ -8,7 +8,7 @@ import torch from .mcmc import preconditioned_pcn, preconditioned_rwm, pcn, rwm -from .tools import systematic_resample, FunctionWrapper, numpy_to_torch, torch_to_numpy, trim_weights, ProgressBar, flow_numpy_wrapper, effective_sample_size, unique_sample_size +from .tools import systematic_resample, FunctionWrapper, numpy_to_torch, torch_to_numpy, trim_weights, ProgressBar, effective_sample_size, unique_sample_size from .scaler import Reparameterize from .flow import Flow from .particles import Particles @@ -668,7 +668,9 @@ def _train(self, current_particles): verbose=self.train_config["verbose"], ) - theta = flow_numpy_wrapper(self.flow).forward(u)[0] + u_t = torch.tensor(u, dtype=torch.float32) + theta_t, _ = self.flow.forward(u_t) + theta = theta_t.detach().numpy().astype(np.float64) self.theta_geometry.fit(theta, weights=w) else: self.u_geometry.fit(u, weights=w) diff --git a/pocomc/scaler.py b/pocomc/scaler.py index e1f9b3b..8847f69 100644 --- a/pocomc/scaler.py +++ b/pocomc/scaler.py @@ -124,11 +124,8 @@ def _apply_periodic_boundary_conditions_x(self, x: np.ndarray): if self.periodic is not None: x = x.copy() for i in self.periodic: - for j in range(len(x)): - while x[j, i] > self.high[i]: - x[j, i] = self.low[i] + x[j, i] - self.high[i] - while x[j, i] < self.low[i]: - x[j, i] = self.high[i] + x[j, i] - self.low[i] + width = self.high[i] - self.low[i] + x[:, i] = self.low[i] + np.mod(x[:, i] - self.low[i], width) return x def _apply_reflective_boundary_conditions_x(self, x: np.ndarray): @@ -148,11 +145,9 @@ def _apply_reflective_boundary_conditions_x(self, x: np.ndarray): if self.reflective is not None: x = x.copy() for i in self.reflective: - for j in range(len(x)): - while x[j, i] > self.high[i]: - x[j, i] = self.high[i] - x[j, i] + self.high[i] - while x[j, i] < self.low[i]: - x[j, i] = self.low[i] + self.low[i] - x[j, i] + w = self.high[i] - self.low[i] + d_mod = np.mod(x[:, i] - self.low[i], 2 * w) + x[:, i] = np.where(d_mod < w, self.low[i] + d_mod, self.low[i] + 2 * w - d_mod) return x @@ -288,7 +283,7 @@ def _forward_affine(self, x: np.ndarray): if self.diagonal: return (x - self.mu) / self.sigma else: - return np.array([np.dot(self.L_inv, xi - self.mu) for xi in x]) + return (x - self.mu) @ self.L_inv.T def _inverse_affine(self, u: np.ndarray): """ @@ -309,7 +304,7 @@ def _inverse_affine(self, u: np.ndarray): log_det_J = np.sum(np.log(self.sigma)) return self.mu + self.sigma * u, log_det_J * np.ones(len(u)) else: - x = self.mu + np.array([np.dot(self.L, ui) for ui in u]) + x = self.mu + u @ self.L.T return x, self.log_det_L * np.ones(len(u)) def _forward_left(self, x: np.ndarray): @@ -390,7 +385,7 @@ def _forward_both(self, x: np.ndarray): Transformed input data """ p = (x[:, self.mask_both] - self.low[self.mask_both]) / (self.high[self.mask_both] - self.low[self.mask_both]) - np.clip(p, 1e-13, 1.0 - 1e-13) + p = np.clip(p, 1e-13, 1.0 - 1e-13) if self.transform == "logit": u = np.log(p / (1.0 - p)) diff --git a/pocomc/tools.py b/pocomc/tools.py index 1d0c872..4eb86d7 100644 --- a/pocomc/tools.py +++ b/pocomc/tools.py @@ -173,15 +173,8 @@ def systematic_resample(size: np.ndarray, weights = np.array(weights) / np.sum(weights) positions = (np.random.random() + np.arange(size)) / size - - j = 0 - cumulative_sum = weights[0] - indeces = np.empty(size, dtype=int) - for i in range(size): - while positions[i] > cumulative_sum: - j += 1 - cumulative_sum += weights[j] - indeces[i] = j + cumulative_sum = np.cumsum(weights) + indeces = np.searchsorted(cumulative_sum, positions) return indeces @@ -314,36 +307,3 @@ def torch_double_to_float(x: torch.Tensor, warn: bool = True): return x else: raise ValueError(f"Unsupported datatype for input data: {x.dtype}") - -class flow_numpy_wrapper: - """ - Wrapper class for numpy flows. - - Parameters - ---------- - flow : Flow object - Flow object that implements forward and inverse - transformations. - - Returns - ------- - Flow object - """ - def __init__(self, flow): - self.flow = flow - - @torch.no_grad() - def forward(self, v): - v = numpy_to_torch(v) - theta, logdetj = self.flow.forward(v) - theta = torch_to_numpy(theta) - logdetj = - torch_to_numpy(logdetj) - return theta, logdetj - - @torch.no_grad() - def inverse(self, theta): - theta = numpy_to_torch(theta) - v, logdetj = self.flow.inverse(theta) - v = torch_to_numpy(v) - logdetj = torch_to_numpy(logdetj) - return v, logdetj \ No newline at end of file