Source code for src.conditionedSampling

#
# Novel constrained sequential Latin Hypercube (with multidimensional uniformity) method
# Jan 2024
# author: Christina Schenk

#Python Packages:
import numpy as np
import pandas as pd
from scipy.stats import qmc
import lhsmdu
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sympy.utilities.iterables import multiset_permutations
from pathlib import Path  
#from itertools import permutations
import time
import random
import math
import itertools
from itertools import permutations
from scipy.spatial import minkowski_distance, distance_matrix
import csv
from sklearn.preprocessing import StandardScaler
import seaborn as sns

#---------------------------------------------------------------------------------------------------#
#### Latin Hypercube sampling on domain with variables summing up to 1: Sequentially and permutations such that condition holds (similar to Petelet et al (2010) but there for inequality constraints): 


[docs] def scale(sample, bounds): """ scales the sample to be within the bounds Parameters ---------- sample: vector of sample bounds: list of upper and lower bounds Returns ------- scaled sample """ return qmc.scale(sample, bounds[:][0], bounds[:][1])
[docs] def clip_and_scale(sample, bounds): """ clips and scales the sample to be within the bounds Parameters ---------- sample: vector of sample bounds: list of upper and lower bounds Returns ------- Clipped and scaled sample """ return np.clip(sample, bounds[:][0], bounds[:][1])
[docs] def generate_lhs_sample(dimension, bounds, rs, method="LHS", n_samp=10): """ generates LHS or LHSMDU sample Parameters ---------- dimension: integer of dimension of input space bounds: list of upper and lower bounds rs: random seed, default 1234 method: string of method: LHS or LHSMDU n_samp: number of samples to be generated, default is 10 Returns ------- scaled LHS or LHSMDU samples """ if method == "LHS": sampler_new = qmc.LatinHypercube(d=dimension, seed=rs) sample = sampler_new.random(n=n_samp) elif method == "LHSMDU": sample = lhsmdu.sample(dimension, n_samp, randomSeed=rs).transpose() else: raise ValueError("Invalid method. Please specify 'LHS' or 'LHSMDU'.") return scale(sample, bounds)
[docs] def handle_dim_1(bounds, method, n_samp, rs, verbose=False): """ handles sampling for dimension=1 Parameters ---------- bounds: list of upper and lower bounds method: string of method: LHS or LHSMDU n_samp: number of samples to be generated rs: random seed, default 1234 verbose: show prints and more info or not, default=False Returns ------- feasible samples """ x1_lb, x1_ub = bounds[0] sample1 = generate_lhs_sample(1, np.array([x1_lb, x1_ub]), rs, method, n_samp) return sample1
[docs] def handle_dim_2(bounds, method, n_samp, rs, verbose=False): """ handles sampling for dimension=2 Parameters ---------- bounds: list of upper and lower bounds method: string of method: LHS or LHSMDU n_samp: number of samples to be generated rs: random seed, default 1234 verbose: show prints and more info or not, default=False Returns ------- feasible samples """ x1_lb, x1_ub = bounds[0] x2_lb, x2_ub = bounds[1] sample1 = generate_lhs_sample(1, np.array([x1_lb, x1_ub]), rs, method, n_samp) sample2 = clip_and_scale(1.0 - sample1, np.array([x2_lb, x2_ub])) return sample1, sample2
[docs] def handle_dim_greater_than_2(bounds, method, n_samp, max_iter, max_iter_dim2, max_iter_dim3, max_rej, rs, verbose=False): """ handles sampling for dimension>2 Parameters ---------- bounds: list of upper and lower bounds method: string of method: LHS or LHSMDU n_samp: number of samples to be generated max_iter: maximum number of total iterations for dimension 1,2,3 to be feasible, if does not find minimum number of samples after this, breaks max_iter_dim2: maximum number of iterations for dimension 1,2 to be feasible max_iter_dim3: maximum number of iterations for dimension 1,2,3 to be feasible (for dimension=4) max_rej: integer number of maximum allowed samples to be rejected rs: random seed, default 1234 verbose: show prints and more info or not, default=False Returns ------- sample1, sample2, sample3: vectors """ dim = len(bounds) start = time.time() infeasible = True l = 0 l1 = 0 x1_lb = bounds[0][0] x1_ub = bounds[0][1] x2_lb = bounds[1][0] x2_ub = bounds[1][1] while infeasible: if l > max_iter: print("No feasible sample found after " + str(l) + " iterations. Please increase max_rej, max_iter or check your bounds to find a feasible sample.") Sumsamp = np.zeros((n_samp,n_samp)) C_1 = np.zeros((n_samp, n_samp)) a_ind = [] A = [] B = [] sample1 = generate_lhs_sample(1, np.array([x1_lb, x1_ub]), rs, method, n_samp) sample2 = generate_lhs_sample(1, np.array([x2_lb, x2_ub]), rs, method, n_samp) for i in range(len(sample1)): for k in range(len(sample2)): Sumsamp[i, k] = sum(sample1[i], sample2[k]) if 0 < Sumsamp[i, k] <= 1.: C_1[i, k] = 1 a_ind.append(tuple((i, k))) if i not in A and k not in B: A.append(i) B.append(k) if len(A) == n_samp and len(B) == n_samp: sample1 = sample1[A] sample2 = sample2[B] sum_vec = np.add(sample1, sample2) infeasible = False elif len(A) != n_samp and len(B) != n_samp: if l1 > max_iter_dim2: if len(A) >= n_samp - max_rej and len(B) >= n_samp - max_rej: sample1 = sample1[A] sample2 = sample2[B] sum_vec = np.add(sample1, sample2) if verbose: print(n_samp - len(A), "samples were rejected") infeasible = False n_samp = len(A) else: notA = [] notB = [] for ind in range(n_samp): if ind not in A: notA.append(ind) if ind not in B: notB.append(ind) count1=0 counter_pair1=0 for m in notA: if counter_pair1>n_samp//4: if verbose: print("counter_pair exceeded") break if m not in list(zip(*a_ind))[0]: count1+=1 #print("maybe need new sample or to reject") if max_rej>0: if count1>max_rej-1: if verbose: print("get new sample") break el0list = [ik for ik, (v,*_) in enumerate(a_ind) if v==m] if len(el0list)>0: #choose random tuple from that list: chosentuple = np.random.choice(range(len(el0list))) # if second index from random tuple not in D: if a_ind[chosentuple][1] in B: ind_rand2 = [i for i, x in enumerate(B) if x == a_ind[chosentuple][1]] if len(ind_rand2)>0: A.remove(A[ind_rand2[0]]) B.remove(a_ind[chosentuple][1]) notA.append(a_ind[chosentuple][0]) notB.append(a_ind[chosentuple][1]) A.append(m) B.append(a_ind[chosentuple][1]) notA.remove(m) notB.remove(a_ind[chosentuple][1]) counter_pair1+=1 else: A.append(m) B.append(a_ind[chosentuple][1]) notA.remove(m) notB.remove(a_ind[chosentuple][1]) if len(A) == n_samp and len(B) == n_samp: sample1 = sample1[A] sample2 = sample2[B] sum_vec = np.add(sample1, sample2) infeasible = False l1 += 1 del C_1 del Sumsamp del a_ind if infeasible == False and dim==3: sample3 = np.zeros((len(sample1), 1)) for i in range(len(sample1)): sample3[i] = 1.0-(sample1[i]+sample2[i]) if np.any(sample3>bounds[2][1]) or np.any(sample3<bounds[2][0]): rej_list = [] keep_list = [] for i in range(len(sample3)): if sample3[i]>bounds[2][1] or sample3[i]<bounds[2][0]: if verbose: print("reject because exceeding bounds") print("index exceeding bounds", i) rej_list.append(i) else: keep_list.append(i) if len(rej_list)>n_samp-max_rej: if verbose: print("Warning:", len(rej_list), "samples were rejected because they were not within the bounds.") sample1 = sample1[keep_list] sample2 = sample2[keep_list] sample3 = sample3[keep_list] del A del B if verbose: print("sample1", sample1) print("sample2", sample2) print("sample3", sample3) print("Sum all", sample1+sample2+sample3) end = time.time() if verbose: print("The conditioned " + str(method) + " algorithm took ", end-start, "CPUs") return sample1, sample2, sample3 if dim > 3: infeasible1, sample1, sample2, sample3 = handle_dim_greater_than_3(bounds, method, n_samp, max_iter, max_iter_dim3, max_rej, sum_vec, sample1, sample2, rs) l += 1 if infeasible == False and infeasible1 == False: sample4 = np.zeros((len(sample1), 1)) for i in range(len(sample4)): sample4[i] = 1.0-(sample1[i]+sample2[i]+sample3[i]) if np.any(sample4>bounds[3][1]) or np.any(sample4<bounds[3][0]): rej_list = [] keep_list = [] for i in range(len(sample4)): if sample4[i]>bounds[3][1] or sample4[i]<bounds[3][0]: if verbose: print("reject because exceeding bounds") print("index exceeding bounds", i) rej_list.append(i) else: keep_list.append(i) if len(rej_list)>n_samp-max_rej: if verbose: print("Warning:", len(rej_list), "samples were rejected because they were not within the bounds.") sample1 = sample1[keep_list] sample2 = sample2[keep_list] sample3 = sample3[keep_list] sample4 = sample4[keep_list] if verbose: print("sample1", sample1) print("sample2", sample2) print("sample3", sample3) print("sample4", sample4) print("Sum all", sample1+sample2+sample3+sample4) end = time.time() if verbose: print("The conditioned " + str(method) + " algorithm took ", end-start, "CPUs") return sample1, sample2, sample3, sample4 del C_2 del Sumsamp2 del A del B del C del D del c_ind
[docs] def handle_dim_greater_than_3(bounds, method, n_samp, max_iter, max_iter_dim3, max_rej, sum_vec, sample1, sample2, rs, verbose=False): """ handles sampling for dimension>3 Parameters ---------- bounds: list of upper and lower bounds method: string of method: LHS or LHSMDU n_samp: number of samples to be generated max_iter: maximum number of total iterations for dimension 1,2,3 to be feasible, if does not find minimum number of samples after this, breaks max_iter_dim3: maximum number of iterations for dimension 1,2,3 to be feasible (for dimension=4) max_rej: integer number of maximum allowed samples to be rejected sum_vec: vector of sum of compatible sample1 and sample2 sample1: vector of feasible sample1 sample2: vector of feasible sample1 rs: random seed, default 1234 verbose: show prints and more info or not, default=False Returns ------- infeasible1: boolean flag sample1, sample2, sample3: vectors """ x3_lb = bounds[2][0] x3_ub = bounds[2][1] infeasible1 = True l2 = 0 while infeasible1: sample3 = generate_lhs_sample(1, np.array([x3_lb, x3_ub]), rs, method, n_samp) Sumsamp2 = np.zeros((n_samp,n_samp)) C_2 = np.zeros((n_samp, n_samp)) c_ind = [] C=[] D=[] notC = [] notD = [] for i2 in range(len(sum_vec)): for k2 in range(len(sample3)): Sumsamp2[i2,k2] = sum_vec[i2]+sample3[k2] if Sumsamp2[i2,k2]<=1. and Sumsamp2[i2,k2]>0: C_2[i2,k2] = 1 c_ind.append(tuple((i2, k2))) if i2 not in C and k2 not in D: C.append(i2) D.append(k2) if len(C) == n_samp and len(D) == n_samp: sample1 = sample1[C] sample2 = sample2[C] sample3 = sample3[D] infeasible1 = False if len(C)!=n_samp and len(D)!=n_samp: if len(C)>=n_samp-max_rej and len(C)<n_samp: if verbose: print("rejected", n_samp-len(C), " samples") sample1 = sample1[C] sample2 = sample2[C] sample3 = sample3[D] infeasible1 = False break if l2>max_iter_dim3: if len(C)>=n_samp-max_rej and len(D)>=n_samp-max_rej: #2*n_samp sample1 = sample1[C] sample2 = sample2[C] sample3 = sample3[D] infeasible1 = False else: infeasible=True print("No feasible sample found after " + str(l2) + " iteration. Please reinitiate sampling, increase max_rej, max_iter or check your bounds to find a feasible sample.") break for k in range(n_samp): if k not in C: notC.append(k) if k not in D: notD.append(k) #check for feasible tuples count=0 #check for missing C indices: counter_pair=0 for m in notC: if counter_pair>n_samp: if verbose: print("counter_pair exceeded") break if m not in list(zip(*c_ind))[0]: count+=1 #print("maybe need new sample or to reject") if max_rej>0: if count>max_rej-1: if verbose: print("get new sample") break #list of possible tuples with current missing index m: el0list = [i for i, (v,*_) in enumerate(c_ind) if v==m] if len(el0list)>0: #choose random tuple from that list: chosentuple = np.random.choice(range(len(el0list))) # if second index from random tuple not in D: if c_ind[chosentuple][1] in D: ind_rand2 = [i for i, x in enumerate(D) if x == c_ind[chosentuple][1]] if len(ind_rand2)>0: C.remove(C[ind_rand2[0]]) D.remove(c_ind[chosentuple][1]) notC.append(c_ind[chosentuple][0]) notD.append(c_ind[chosentuple][1]) C.append(m) D.append(c_ind[chosentuple][1]) notC.remove(m) notD.remove(c_ind[chosentuple][1]) counter_pair+=1 else: C.append(m) D.append(c_ind[chosentuple][1]) notC.remove(m) notD.remove(c_ind[chosentuple][1]) if len(C) == n_samp and len(D) == n_samp: sample1 = sample1[C] sample2 = sample2[C] sample3 = sample3[D] infeasible1 = False l2 += 1 return infeasible1, sample1, sample2, sample3
[docs] def one_constrained_sampling(n_samp, method="LHS", bounds=None, max_iter=60, max_iter_dim2=60, max_iter_dim3=60, max_rej=None, rs=None, verbose=False): """ one_constrained_sampling for up to 4 dimensions, where samples should add up to 1 Parameters ---------- n_samp: number of samples to be generated method: string of method: LHS or LHSMDU bounds: list of upper and lower bounds max_iter: maximum number of total iterations for dimension 1,2,3 to be feasible, if does not find minimum number of samples after this, breaks max_iter_dim2: maximum number of iterations for dimension 1,2 to be feasible max_iter_dim3: maximum number of iterations for dimension 1,2,3 to be feasible (for dimension=4) max_rej: integer number of maximum allowed samples to be rejected rs: random seed, default 1234 verbose: show prints and more info or not, default=False Returns ------- feasible samples from routines for appropriate number of dimensions """ if max_rej is None: max_rej = n_samp//4 if bounds is None or len(bounds) < 2: raise ValueError("Invalid bounds. Please provide valid bounds.") dim = len(bounds) if dim == 1: return handle_dim_1(bounds, method, n_samp, rs, verbose) elif dim == 2: return handle_dim_2(bounds, method, n_samp, rs, verbose) elif dim > 2: return handle_dim_greater_than_2(bounds, method, n_samp, max_iter, max_iter_dim2, max_iter_dim3, max_rej, rs, verbose)
#-------------------------------------------------------------------------------------------------# ### Collect samples for all permutations of bounds (all selected or subset of samples selected with checking for distance)
[docs] def get_bounds_for_dimension(combi, prev_bounds): """ extract bounds for specific dimension Parameters ---------- combi: combination number of bounds, integer prev_bounds: list of upper and lower bounds Returns ------- bounds for this combination number """ return [prev_bounds[ind] for ind in combi]
# def stack_samples(samples, # dim): # """ # stacks samples # Parameters # ---------- # samples: vectors of samples # dim: integer of dimension # Returns # ------- # stacked samples # """ # return np.column_stack(samples)[:dim] def select_samples(all_samples, val_samples_ord, tol_norm, num_select, all_select): """ select samples based on distance Parameters ---------- all_samples: vectors of samples val_samples_ord: ordered samples in order of first bound permutation tol_norm: tolerance for minimum distance num_select: integer, number of samples to be selected if boolean=False all_select: boolean, if True all samples selected, if False num_select samples selected Returns ------- selected samples """ if all_select: return all_samples + val_samples_ord else: dist_matrix = distance_matrix(all_samples, val_samples_ord) selected_samples = [] for j in range(len(val_samples_ord)): if np.all(dist_matrix[:, j]) > tol_norm: selected_samples.append(val_samples_ord[j]) return all_samples + selected_samples[-num_select:]
[docs] def one_constrained_sampling_wrapper(methodname, dim, n_samp, bounds, max_rej, rs): """ select samples based on distance Parameters ---------- methodname: string of method, LHS or LHSMDU dim: dimension of input space n_samp: number of samples to be collected bounds: list of lower and upper bounds max_rej: maximum samples to be rejected rs: random seed, default 1234 Returns ------- one constrained feasible samples from routines for dim 2, 3, 4 """ if dim == 2: return one_constrained_sampling(n_samp, method=methodname, bounds=bounds, max_rej=max_rej, rs=rs) elif dim == 3: return one_constrained_sampling(n_samp, method=methodname, bounds=bounds, max_rej=max_rej, rs=rs) elif dim == 4: return one_constrained_sampling(n_samp, method=methodname, bounds=bounds, max_rej=max_rej, rs=rs) elif dim > 4: print("This algorithm works for maximum 4 dimensions.") return None
[docs] def sample_with_bound_permutations( prev_bounds, n_samp, tol_norm=1e-3, all_select=False, num_select=4, max_rej=None, dim=None, rs=1234, verbose=False): """ Computes samples with bound permutations. Parameters ---------- prev_bounds : list List of lower and upper bounds for sampling. n_samp : int Number of samples to generate. tol_norm : float, optional Tolerance for minimum distance between samples. all_select : bool, optional Whether to select all samples or a fixed number. num_select : int, optional Number of samples to select if `all_select` is False. max_rej : int, optional Maximum number of rejections allowed during sampling. dim : int Dimensionality of the problem. rs : int, optional Random seed for reproducibility. verbose : bool, optional Whether to print verbose output. Returns ------- tuple All feasible samples generated using two methods. """ def get_permuted_bounds(combi, prev_bounds): return [prev_bounds[idx] for idx in combi] def stack_samples(samples, combi): # Reorder samples based on permutation combination stacked = np.zeros_like(samples) for num, ind in enumerate(combi): stacked[:, ind] = samples[:, num] return stacked def select_samps(existing_samples, new_samples, tol_norm, num_select): dist_matrix = distance_matrix(existing_samples, new_samples) selected_indices = [ j for j in range(len(new_samples)) if np.min(dist_matrix[:, j]) > tol_norm ] if not all_select: selected_indices = sorted( selected_indices, key=lambda idx: np.min(dist_matrix[:, idx]), reverse=True )[:num_select] return new_samples[selected_indices, :] # Initialize start = time.time() all_perms = list(permutations(range(dim))) if max_rej is None: max_rej = n_samp // 4 all_val_samples, all_val_samples_mdu = None, None # Iterate over methods for num_meth in range(2): methodname = "LHS" if num_meth == 0 else "LHSMDU" for perm_ind, combi in enumerate(all_perms): bounds = get_permuted_bounds(combi, prev_bounds) if verbose: print(f"Permutation {perm_ind}: {combi}, Bounds: {bounds}") samples = one_constrained_sampling( method=methodname, n_samp=n_samp, bounds=bounds, max_rej=max_rej ) # Convert samples to stacked format samples_stacked = np.column_stack(samples) if perm_ind == 0: if num_meth == 0: all_val_samples = samples_stacked else: all_val_samples_mdu = samples_stacked else: val_samples_ord = stack_samples(samples_stacked, combi) if num_meth == 0: if all_select: all_val_samples = np.vstack((all_val_samples, val_samples_ord)) else: all_val_samples = np.vstack(( all_val_samples, select_samps(all_val_samples, val_samples_ord, tol_norm, num_select) )) else: if all_select: all_val_samples_mdu = np.vstack((all_val_samples_mdu, val_samples_ord)) else: all_val_samples_mdu = np.vstack(( all_val_samples_mdu, select_samps(all_val_samples_mdu, val_samples_ord, tol_norm, num_select) )) if verbose: elapsed = time.time() - start print(f"{methodname} method completed in {elapsed:.2f} seconds.") return all_val_samples, all_val_samples_mdu
###Probabilistic version that runs sample_with_bound_permutations for multiple random seeds:
[docs] def prob_sample_with_bound_permutations( seeds=[42, 123, 7, 99, 56], prev_bounds=None, n_samp=10, tol_norm=1e-3, all_select=False, num_select=4, max_rej=None, dim=None, verbose=False): """ Perform probabilistic sampling with bound permutations for a list of seeds. Args: seeds (list): List of random seeds. by default list of 5 random seeds. prev_bounds (list): Previous bounds for sampling. n_samp (int): Number of samples per seed. tol_norm (float): Tolerance for the norm. all_select (bool): Whether to select all points. num_select (int): Number of selections to make. max_rej (int): Maximum rejections allowed. dim (int): Dimensionality of the sampling space. rs: random seed, default 1234 verbose: show prints and more info or not, default=False Returns: tuple: Contains the selected LHS samples, selected MDU samples, and their means and standard deviations. """ all_val_samples = [] all_val_samples_mdu = [] # Generate the samples for each seed for rs in seeds: all_val_samplesi, all_val_samples_mdui = sample_with_bound_permutations( prev_bounds=prev_bounds, n_samp=n_samp, tol_norm=tol_norm, all_select=all_select, num_select=num_select, max_rej=max_rej, dim=dim, rs=rs, verbose=False) all_val_samples.append(np.array(all_val_samplesi)) # Convert to NumPy array all_val_samples_mdu.append(np.array(all_val_samples_mdui)) # Calculate the minimum length across all seeds min_len = min(len(samples) for samples in all_val_samples) min_len_mdu = min(len(samples) for samples in all_val_samples_mdu) # Initialize lists to store the sampled data all_val_samples_selected = [] all_val_samples_mdu_selected = [] # Randomly select `min_len` samples for each seed for k in range(len(seeds)): # Randomly sample rows sampled_indices_lhs = np.random.choice(len(all_val_samples[k]), size=min_len, replace=False) sampled_indices_mdu = np.random.choice(len(all_val_samples_mdu[k]), size=min_len_mdu, replace=False) # Select the sampled rows sampled_lhs = all_val_samples[k][sampled_indices_lhs, :] sampled_mdu = all_val_samples_mdu[k][sampled_indices_mdu, :] # Append the sampled data to the lists all_val_samples_selected.append(sampled_lhs) all_val_samples_mdu_selected.append(sampled_mdu) # Stack the selected samples along a new axis (shape: num_seeds x min_len x n_dim) all_val_samples_selected = np.stack(all_val_samples_selected, axis=0) all_val_samples_mdu_selected = np.stack(all_val_samples_mdu_selected, axis=0) # Compute the mean and standard deviation across the seeds (axis=0) mean_samples = np.mean(all_val_samples_selected, axis=0) std_samples = np.std(all_val_samples_selected, axis=0) mean_samples_mdu = np.mean(all_val_samples_mdu_selected, axis=0) std_samples_mdu = np.std(all_val_samples_mdu_selected, axis=0) return (all_val_samples_selected, all_val_samples_mdu_selected, mean_samples, std_samples, mean_samples_mdu, std_samples_mdu)
### Select subset of samples that varies the most in terms of distance from the already collected data
[docs] def scale_data(data, decimals=3): """ scales data via standard scaling Parameters ---------- data: numpy array or pandas dataframe of data decimals: decimals to be rounded to, integer Returns ------- scaled data """ scaler = StandardScaler().fit(data) return scaler.transform(np.around(data, decimals=decimals)), scaler
[docs] def select_samples(data_scaled, samples, samples_unscaled, tol, tol2, decimals=3, des_n_samp=None): """ selects samples based on distance from data set Parameters ---------- data_scaled: scaled data, numpy array samples: array of samples samples_unscaled: array of unscaled samples tol: tolerance for minimum distance to data, float tol2: tolerance for minimum distance to other already selected samples, float decimals: decimals to round to, integer des_n_samp: desired number of samples/experiments to be executed, default None Returns ------- selected scaled rounded samples: array selected unscaled rounded samples: array selected_ind_list: list of selected indices """ tollist = [] norm_list = [] for j in range(len(samples)): # Calculate the Euclidean distance between samples[j,:] and data_scaled distances = np.linalg.norm(data_scaled - samples[j, :], axis=1) if np.all(distances > tol): if len(tollist) == 0: tollist.append(j) norm_list.append(min([np.linalg.norm(data_scaled[i,:]-samples[j,:]) for i in range(len(data_scaled))])) else: # Calculate distances between samples[j,:] and samples in tollist distances_to_tollist = np.linalg.norm(samples[j, :] - samples[tollist, :], axis=1) if np.all(distances_to_tollist > tol2) and des_n_samp is not None and j not in tollist: tollist.append(j) norm_list.append(min([np.linalg.norm(data_scaled[i,:]-samples[j,:]) for i in range(len(data_scaled))])) sorted_norm_list = sorted(norm_list) selected_list = sorted_norm_list[-des_n_samp:] selected_norm_ind_list = [norm_list.index(i) for i in selected_list] selected_ind_list = [tollist[i] for i in selected_norm_ind_list] return np.around(samples[selected_ind_list, :],decimals=decimals), np.around(samples_unscaled[selected_ind_list,:], decimals=decimals), selected_ind_list
[docs] def select_samples_diff_from_data(exp_data, samples_LHS, samples_LHSMDU, des_n_samp=15, tol=5e-1, tol2=5e-1, decimals=3): """ select samples based on distance from experimental data Parameters ---------- exp_data: array of experimental data samples_LHS: LHS samples array samples_LHSMDU: LHSMDU samples array des_n_samp: desired number of samples/experiments to be executed tol: tolerance for minimum distance to data, float tol2: tolerance for minimum distance to other already selected samples, float decimals: decimals to round to, integer Returns ------- tol_samples: selected samples with LHS tol_samples_LHSMDU: selected samples with LHSMDU tol_samples_unscaled: selected unscaled samples with LHS tol_samples_LHSMDU_unscaled: selected unscaled samples with LHSMDU """ data_scaled, scaler = scale_data(exp_data) samples_LHS_scaled, scaler2 = scale_data(np.around(samples_LHS, decimals=decimals)) samples_LHSMDU_scaled, scaler3 = scale_data(np.around(samples_LHSMDU, decimals=decimals)) tol_samples, tol_samples_unscaled, selected_ind_list1 = select_samples(data_scaled, samples_LHS_scaled, samples_LHS, tol, tol2, decimals, des_n_samp) tol_samples_LHSMDU, tol_samples_LHSMDU_unscaled, selected_ind_list2 = select_samples(data_scaled, samples_LHSMDU_scaled, samples_LHSMDU, tol, tol2, decimals, des_n_samp) return tol_samples, tol_samples_LHSMDU, tol_samples_unscaled, tol_samples_LHSMDU_unscaled