Source code for src.plot

#
# Plot functions for post processing for novel constrained sequential Latin Hypercube (with multidimensional uniformity) method
# Jun 2024
# author: Christina Schenk

#Python Packages:
import matplotlib.pyplot as plt
from itertools import combinations
from sklearn.decomposition import PCA
import seaborn as sns
import numpy as np
import pandas as pd
import math
plt.rcParams.update({'font.size': 14})
#---------------------------------------------------------------------------------------------------#
#### Plotting functions for post processing new designs and analyzing uniformity etc.:



[docs]
def plot_dimred_2dims_both_methods(data_pca,
                                   lhs_samples_pca,
                                   lhsmdu_samples_pca,
                                   filename_eps=''):
    
    """
    generates scatter plot of data conditioned LHS and conditioned LHSMDU samples

    Parameters
    ----------
    data: np array of original data
    lhs_samples: np array of LHS samples
    lhsmdu_samples: np array of LHSMDU samples
    filename_eps: string of path with eps filename

    Returns
    -------
    Scatterplot

    """
    plt.scatter(data_pca[:,0], data_pca[:,1])
    plt.scatter(lhs_samples_pca[:,0], lhs_samples_pca[:,1])
    plt.scatter(lhsmdu_samples_pca[:,0], lhsmdu_samples_pca[:,1])
    plt.legend(['Data', 'LHS', 'LHSMDU'])
    if len(filename_eps)>0:
        plt.savefig(filename_eps, format='eps')
        plt.show()
    else:
        plt.show()




[docs]
def distplot_samples(samples,
                     filename_eps=''):
    """
    generates distribution kde plot of samples

    Parameters
    ----------
    samples: np array of samples nsamp x ncomponents
    filename_eps: string of path with eps filename

    Returns
    -------
    Distplot with distributions for different components in different colors

    """
    ax = sns.displot(samples,
    kind="kde")
    ax.set(xlabel='Distribution', ylabel='Density')
    if len(filename_eps)>0:
        plt.savefig(filename_eps, format='eps')
        plt.show()
    else:
        plt.show()


        

[docs]
def box_kdeplot_samples(samples,
                        filename_eps='',
                        fixed_ranges=None):
    """
    generates distribution box kde subplots of samples

    Parameters
    ----------
    samples: np array of samples nsamp x ncomponents
    filename_eps: string of path with eps filename

    Returns
    -------
    Subplots showing box kde distributions

    """
    if samples.shape[1]%2==0:
        cols = samples.shape[1]//2
    elif samples.shape[1]%3==0:
        cols = samples.shape[1]//3
    else:
        cols = samples.shape[1]//3
    rows = math.ceil(samples.shape[1]/cols)
    fig, axes = plt.subplots(cols, rows)
    axes = axes.ravel()  # flattening the array makes indexing easier
    for col, ax in zip(range(samples.shape[1]), axes):
        sns.histplot(data = samples[:,col],kde=True, stat='density', ax=ax)
        ax.set(xlabel="component "+str(col), ylabel='Density')
        
        # Set fixed ranges if provided
        if fixed_ranges is not None and col < len(fixed_ranges):
            ax.set_xlim(fixed_ranges[col])
            
    fig.tight_layout()
    if len(filename_eps)>0:
        plt.savefig(filename_eps, format='eps')
        plt.show()
    else:
        plt.show()


        

[docs]
def create_pairwise_scatterplots(data,
                                 lhs,
                                 lhsmdu,
                                 dim_labels=None,
                                 colors=None,
                                 labels=None,
                                 figsize=(15, 10),
                                 filename_eps="",
                                 plots_per_fig=9):
    """
    Create pairwise scatterplots for given datasets, splitting into multiple figures if necessary.

    Parameters:
    -----------
    data: np array of original data
    lhs_samples: np array of LHS samples
    lhsmdu_samples: np array of LHSMDU samples
    dim_labels: list of str, labels for each dimension. Defaults to 'component X'.
    colors: list of str, colors for each dataset. Defaults to ['blue', 'orange', 'green'].
    labels: list of str, labels for each dataset. Defaults to ['Data', 'LHS', 'LHSMDU'].
    figsize: tuple, size of each figure. Defaults to (15, 10).
    filename_prefix: prefix for filenames when saving figures.
    plots_per_fig: int, number of plots per figure.
    """
    if dim_labels is None:
        dim_labels = [f'component {i+1}' for i in range(data.shape[1])]
    if colors is None:
        colors = ['blue', 'orange', 'green']
    if labels is None:
        labels = ['Data', 'LHS', 'LHSMDU']
    # Create pairwise combinations of dimensions
    pairs = list(combinations(range(data.shape[1]), 2))
    total_plots = len(pairs)
    n_figs = (total_plots + plots_per_fig - 1) // plots_per_fig  # Total number of figures

    for fig_idx in range(n_figs):
        # Create a new figure for this batch
        fig, axes = plt.subplots((plots_per_fig + 2) // 3, 3, figsize=figsize)
        axes = axes.flatten()

        # Plot the subset of pairs for this figure
        for plot_idx, (dim1, dim2) in enumerate(pairs[fig_idx * plots_per_fig : (fig_idx + 1) * plots_per_fig]):
            ax = axes[plot_idx]
            ax.scatter(data[:, dim1], data[:, dim2], color=colors[0], alpha=0.6, label=labels[0])
            ax.scatter(lhs[:, dim1], lhs[:, dim2], color=colors[1], alpha=0.6, label=labels[1])
            ax.scatter(lhsmdu[:, dim1], lhsmdu[:, dim2], color=colors[2], alpha=0.6, label=labels[2])

            ax.set_xlabel(dim_labels[dim1])
            ax.set_ylabel(dim_labels[dim2])
            ax.set_title(f"Pair: {dim_labels[dim1]} vs {dim_labels[dim2]}")
            ax.legend()

        # Hide unused subplots
        for plot_idx in range(len(pairs[fig_idx * plots_per_fig : (fig_idx + 1) * plots_per_fig]), len(axes)):
            axes[plot_idx].axis('off')

        # Adjust layout
        plt.tight_layout()
        if len(filename_eps)>0:
            plt.savefig(filename_eps, format='eps')
            plt.show()
        else:
            plt.show()

            
            

[docs]
def create_pairwise_distribution_plots_seaborn(data,
                                               lhs,
                                               lhsmdu,
                                               markers=None,
                                               dim_labels=None,
                                               labels=None,
                                               filename_eps=""):
    """
    Create pairwise distribution plots using Seaborn.

    Parameters:
    -----------
    data: np.array, original dataset.
    lhs: np.array, LHS samples.
    lhsmdu: np.array, LHSMDU samples.
    dim_labels: list of str, labels for each dimension. Defaults to 'dim i'.
    labels: list of str, dataset labels. Defaults to ['Data', 'LHS', 'LHSMDU'].
    filename_eps: str, file path for saving the plot as EPS.
    """
    if dim_labels is None:
        dim_labels = [f'dim {i+1}' for i in range(data.shape[1])]
    if labels is None:
        labels = ['Data', 'LHS', 'LHSMDU']

    # Combine data into a DataFrame
    data_df = pd.DataFrame(data, columns=dim_labels)
    lhs_df = pd.DataFrame(lhs, columns=dim_labels)
    lhsmdu_df = pd.DataFrame(lhsmdu, columns=dim_labels)

    # Add labels for datasets
    data_df['Dataset'] = labels[0]
    lhs_df['Dataset'] = labels[1]
    lhsmdu_df['Dataset'] = labels[2]

    # Combine all datasets
    combined_df = pd.concat([data_df, lhs_df, lhsmdu_df])
    
    # Create pairplot with a custom palette
    palette = {'Data': 'blue', 'LHS': 'orange', 'LHSMDU': 'green'}

    # Create pairplot
    if markers==None:
        pairplot = sns.pairplot(combined_df, hue='Dataset', diag_kind='kde', palette=palette)
    else: 
        pairplot = sns.pairplot(combined_df, hue='Dataset', markers=markers, diag_kind='kde', palette=palette)
    if filename_eps:
        pairplot.savefig(filename_eps)
    plt.show()