Source code for kenchi.plotting

import numpy as np
from scipy.stats import gaussian_kde
from sklearn.metrics import auc, roc_curve
from sklearn.utils.validation import check_array, check_symmetric, column_or_1d

__all__ = [
    'plot_anomaly_score', 'plot_graphical_model',
    'plot_partial_corrcoef', 'plot_roc_curve'
]


[docs]def plot_anomaly_score(
    anomaly_score, ax=None, bins='auto', figsize=None,
    filename=None, hist=True, kde=True, threshold=None,
    title=None, xlabel='Samples', xlim=None, ylabel='Anomaly score',
    ylim=None, **kwargs
):
    """Plot the anomaly score for each sample.

    Parameters
    ----------
    anomaly_score : array-like of shape (n_samples,)
        Anomaly score for each sample.

    ax : matplotlib Axes, default None
        Target axes instance.

    bins : int, str or array-like, default 'auto'
        Number of hist bins.

    figsize : tuple, default None
        Tuple denoting figure size of the plot.

    filename : str, default None
        If provided, save the current figure.

    hist : bool, default True
        If True, plot a histogram of anomaly scores.

    kde : bool, default True
        If True, plot a gaussian kernel density estimate.

    threshold : float, default None
        Threshold.

    title : string, default None
        Axes title. To disable, pass None.

    xlabel : string, default 'Samples'
        X axis title label. To disable, pass None.

    xlim : tuple, default None
        Tuple passed to ``ax.xlim``.

    ylabel : string, default 'Anomaly score'
        Y axis title label. To disable, pass None.

    ylim : tuple, default None
        Tuple passed to ``ax.ylim``.

    **kwargs : dict
        Other keywords passed to ``ax.plot``.

    Returns
    -------
    ax : matplotlib Axes
        Axes on which the plot was drawn.

    Examples
    --------
    >>> import matplotlib.pyplot as plt
    >>> from kenchi.datasets import load_wdbc
    >>> from kenchi.outlier_detection import MiniBatchKMeans
    >>> from kenchi.plotting import plot_anomaly_score
    >>> X, _ = load_wdbc(random_state=0, return_X_y=True)
    >>> det = MiniBatchKMeans(random_state=0).fit(X)
    >>> anomaly_score = det.anomaly_score(X, normalize=True)
    >>> plot_anomaly_score(
    ...     anomaly_score, threshold=det.threshold_, linestyle='', marker='.'
    ... ) # doctest: +ELLIPSIS
    <matplotlib.axes._subplots.AxesSubplot object at 0x...>
    >>> plt.show() # doctest: +SKIP

    .. figure:: images/plot_anomaly_score.png
    """

    import matplotlib.pyplot as plt
    from mpl_toolkits.axes_grid1 import make_axes_locatable

    def _get_ax_hist(ax):
        locator          = ax.get_axes_locator()

        if locator is None:
            # Create an axes on the right side of ax
            divider      = make_axes_locatable(ax)
            ax_hist      = divider.append_axes(
                'right', '20%', pad=0.1, sharey=ax
            )

            return ax_hist

        for ax_hist in ax.get_figure().get_axes():
            locator_hist = ax_hist.get_axes_locator()

            if ax_hist is ax:
                continue

            if locator_hist is None:
                continue

            if locator_hist._axes_divider is locator._axes_divider:
                return ax_hist

    anomaly_score        = column_or_1d(anomaly_score)

    if ax is None:
        _, ax            = plt.subplots(figsize=figsize)

    ax.grid(True, linestyle=':')

    if xlim is None:
        n_samples,       = anomaly_score.shape
        xlim             = (0., n_samples - 1.)

    ax.set_xlim(xlim)

    if ylim is None:
        ylim             = (0., 1.05 * np.max(anomaly_score))

    ax.set_ylim(ylim)

    if title is not None:
        ax.set_title(title)

    if xlabel is not None:
        ax.set_xlabel(xlabel)

    if ylabel is not None:
        ax.set_ylabel(ylabel)

    line,                = ax.plot(anomaly_score, **kwargs)
    color                = line.get_color()

    if threshold is not None:
        ax.hlines(threshold, xlim[0], xlim[1], color=color)

    if hist or kde:
        ax_hist          = _get_ax_hist(ax)

        ax_hist.grid(True, linestyle=':')
        ax_hist.tick_params(axis='y', labelleft=False)
        ax_hist.set_ylim(ylim)

    if hist:
        # Draw a histogram
        ax_hist.hist(
            anomaly_score,
            alpha        = 0.4,
            bins         = bins,
            color        = color,
            density      = True,
            orientation  = 'horizontal'
        )

    if kde:
        kernel           = gaussian_kde(anomaly_score)
        ylocs            = np.linspace(ylim[0], ylim[1])

        # Draw a gaussian kernel density estimate
        ax_hist.plot(kernel(ylocs), ylocs, color=color)

    if 'label' in kwargs:
        ax.legend(loc='upper left')

    if filename is not None:
        ax.get_figure().savefig(filename)

    return ax


[docs]def plot_roc_curve(
    y_true, y_score, ax=None, figsize=None,
    filename=None, title='ROC curve', xlabel='FPR', ylabel='TPR',
    **kwargs
):
    """Plot the Receiver Operating Characteristic (ROC) curve.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        True Labels.

    y_score : array-like of shape (n_samples,)
        Target scores.

    ax : matplotlib Axes, default None
        Target axes instance.

    figsize : tuple, default None
        Tuple denoting figure size of the plot.

    filename : str, default None
        If provided, save the current figure.

    title : string, default 'ROC curve'
        Axes title. To disable, pass None.

    xlabel : string, default 'FPR'
        X axis title label. To disable, pass None.

    ylabel : string, default 'TPR'
        Y axis title label. To disable, pass None.

    **kwargs : dict
        Other keywords passed to ``ax.plot``.

    Returns
    -------
    ax : matplotlib Axes
        Axes on which the plot was drawn.

    Examples
    --------
    >>> import matplotlib.pyplot as plt
    >>> from kenchi.datasets import load_wdbc
    >>> from kenchi.outlier_detection import MiniBatchKMeans
    >>> from kenchi.plotting import plot_roc_curve
    >>> X, y = load_wdbc(random_state=0, return_X_y=True)
    >>> det = MiniBatchKMeans(random_state=0).fit(X)
    >>> score_samples = det.score_samples(X)
    >>> plot_roc_curve(y, score_samples) # doctest: +ELLIPSIS
    <matplotlib.axes._subplots.AxesSubplot object at 0x...>
    >>> plt.show() # doctest: +SKIP

    .. figure:: images/plot_roc_curve.png
    """

    import matplotlib.pyplot as plt

    fpr, tpr, _          = roc_curve(y_true, y_score)
    roc_auc              = auc(fpr, tpr)

    if ax is None:
        _, ax            = plt.subplots(figsize=figsize)

    ax.grid(True, linestyle=':')
    ax.set_xlim(0., 1.)
    ax.set_ylim(0., 1.05)

    if title is not None:
        ax.set_title(title)

    if xlabel is not None:
        ax.set_xlabel(xlabel)

    if ylabel is not None:
        ax.set_ylabel(ylabel)

    if 'label' in kwargs:
        kwargs['label'] += f' (area={roc_auc:1.3f})'
    else:
        kwargs['label']  = f'area={roc_auc:1.3f}'

    ax.plot(fpr, tpr, **kwargs)

    ax.legend(loc='lower right')

    if filename is not None:
        ax.get_figure().savefig(filename)

    return ax


[docs]def plot_graphical_model(
    G, ax=None, figsize=None, filename=None,
    random_state=None, title='GGM', **kwargs
):
    """Plot the Gaussian Graphical Model (GGM).

    Parameters
    ----------
    G : networkx Graph
        GGM.

    ax : matplotlib Axes, default None
        Target axes instance.

    figsize : tuple, default None
        Tuple denoting figure size of the plot.

    filename : str, default None
        If provided, save the current figure.

    random_state : int, RandomState instance, default None
        Seed of the pseudo random number generator.

    title : string, default 'GGM'
        Axes title. To disable, pass None.

    **kwargs : dict
        Other keywords passed to ``nx.draw_networkx``.

    Returns
    -------
    ax : matplotlib Axes
        Axes on which the plot was drawn.

    Examples
    --------
    >>> import matplotlib.pyplot as plt
    >>> import networkx as nx
    >>> from kenchi.plotting import plot_graphical_model
    >>> from sklearn.datasets import make_sparse_spd_matrix
    >>> A = make_sparse_spd_matrix(dim=20, norm_diag=True, random_state=0)
    >>> G = nx.from_numpy_matrix(A)
    >>> plot_graphical_model(G, random_state=0) # doctest: +ELLIPSIS
    <matplotlib.axes._subplots.AxesSubplot object at 0x...>
    >>> plt.show() # doctest: +SKIP

    .. figure:: images/plot_graphical_model.png
    """

    import matplotlib.pyplot as plt
    import networkx as nx

    if ax is None:
        _, ax = plt.subplots(figsize=figsize)

    if title is not None:
        ax.set_title(title)

    node_size = np.array([30. * (d + 1.) for _, d in G.degree])
    pos       = nx.spring_layout(G, seed=random_state)
    width     = np.abs([3. * w for _, _, w in G.edges(data='weight')])

    # Add the draw_networkx kwargs here
    kwargs.setdefault('cmap', 'Spectral')
    kwargs.setdefault('node_size', node_size)
    kwargs.setdefault('pos', pos)
    kwargs.setdefault('width', width)

    # Draw the Gaussian grapchical model
    nx.draw_networkx(G, ax=ax, **kwargs)

    # Turn off tick visibility
    ax.tick_params('x', labelbottom=False, bottom=False)
    ax.tick_params('y', labelleft=False, left=False)

    if filename is not None:
        ax.get_figure().savefig(filename)

    return ax


[docs]def plot_partial_corrcoef(
    partial_corrcoef, ax=None, cbar=True, figsize=None,
    filename=None, title='Partial correlation', **kwargs
):
    """Plot the partial correlation coefficient matrix.

    Parameters
    ----------
    partial_corrcoef : array-like of shape (n_features, n_features)
        Partial correlation coefficient matrix.

    ax : matplotlib Axes, default None
        Target axes instance.

    cbar : bool, default True.
        If True, draw a colorbar.

    figsize : tuple, default None
        Tuple denoting figure size of the plot.

    filename : str, default None
        If provided, save the current figure.

    title : string, default 'Partial correlation'
        Axes title. To disable, pass None.

    **kwargs : dict
        Other keywords passed to ``ax.pcolormesh``.

    Returns
    -------
    ax : matplotlib Axes
        Axes on which the plot was drawn.

    Examples
    --------
    >>> import matplotlib.pyplot as plt
    >>> from kenchi.plotting import plot_partial_corrcoef
    >>> from sklearn.datasets import make_sparse_spd_matrix
    >>> A = make_sparse_spd_matrix(dim=20, norm_diag=True, random_state=0)
    >>> plot_partial_corrcoef(A) # doctest: +ELLIPSIS
    <matplotlib.axes._subplots.AxesSubplot object at 0x...>
    >>> plt.show() # doctest: +SKIP

    .. figure:: images/plot_partial_corrcoef.png
    """

    import matplotlib.pyplot as plt
    from mpl_toolkits.axes_grid1 import make_axes_locatable

    partial_corrcoef = check_array(partial_corrcoef)
    partial_corrcoef = check_symmetric(partial_corrcoef, raise_exception=True)

    if ax is None:
        _, ax        = plt.subplots(figsize=figsize)

    if title is not None:
        ax.set_title(title)

    # Add the pcolormesh kwargs here
    kwargs.setdefault('cmap', 'RdBu')
    kwargs.setdefault('edgecolors', 'white')
    kwargs.setdefault('vmin', -1.)
    kwargs.setdefault('vmax', 1.)

    # Draw the heatmap
    mesh             = ax.pcolormesh(
        np.ma.masked_equal(partial_corrcoef, 0.), **kwargs
    )

    ax.set_aspect('equal')
    ax.set_facecolor('grey')

    # Invert the y axis to show the plot in matrix form
    ax.invert_yaxis()

    if cbar:
        # Create an axes on the right side of ax
        divider      = make_axes_locatable(ax)
        cax          = divider.append_axes('right', '5%', pad=0.1)

        ax.get_figure().colorbar(mesh, cax=cax)

    if filename is not None:
        ax.get_figure().savefig(filename)

    return ax