import numpy as np
from scipy.stats import gaussian_kde
from sklearn.metrics import auc, roc_curve
from sklearn.utils.validation import check_array, check_symmetric, column_or_1d
__all__ = [
'plot_anomaly_score', 'plot_graphical_model',
'plot_partial_corrcoef', 'plot_roc_curve'
]
[docs]def plot_anomaly_score(
anomaly_score, ax=None, bins='auto', figsize=None,
filename=None, hist=True, kde=True, threshold=None,
title=None, xlabel='Samples', xlim=None, ylabel='Anomaly score',
ylim=None, **kwargs
):
"""Plot the anomaly score for each sample.
Parameters
----------
anomaly_score : array-like of shape (n_samples,)
Anomaly score for each sample.
ax : matplotlib Axes, default None
Target axes instance.
bins : int, str or array-like, default 'auto'
Number of hist bins.
figsize : tuple, default None
Tuple denoting figure size of the plot.
filename : str, default None
If provided, save the current figure.
hist : bool, default True
If True, plot a histogram of anomaly scores.
kde : bool, default True
If True, plot a gaussian kernel density estimate.
threshold : float, default None
Threshold.
title : string, default None
Axes title. To disable, pass None.
xlabel : string, default 'Samples'
X axis title label. To disable, pass None.
xlim : tuple, default None
Tuple passed to ``ax.xlim``.
ylabel : string, default 'Anomaly score'
Y axis title label. To disable, pass None.
ylim : tuple, default None
Tuple passed to ``ax.ylim``.
**kwargs : dict
Other keywords passed to ``ax.plot``.
Returns
-------
ax : matplotlib Axes
Axes on which the plot was drawn.
Examples
--------
>>> import matplotlib.pyplot as plt
>>> from kenchi.datasets import load_wdbc
>>> from kenchi.outlier_detection import MiniBatchKMeans
>>> from kenchi.plotting import plot_anomaly_score
>>> X, _ = load_wdbc(random_state=0, return_X_y=True)
>>> det = MiniBatchKMeans(random_state=0).fit(X)
>>> anomaly_score = det.anomaly_score(X, normalize=True)
>>> plot_anomaly_score(
... anomaly_score, threshold=det.threshold_, linestyle='', marker='.'
... ) # doctest: +ELLIPSIS
<matplotlib.axes._subplots.AxesSubplot object at 0x...>
>>> plt.show() # doctest: +SKIP
.. figure:: images/plot_anomaly_score.png
"""
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
def _get_ax_hist(ax):
locator = ax.get_axes_locator()
if locator is None:
# Create an axes on the right side of ax
divider = make_axes_locatable(ax)
ax_hist = divider.append_axes(
'right', '20%', pad=0.1, sharey=ax
)
return ax_hist
for ax_hist in ax.get_figure().get_axes():
locator_hist = ax_hist.get_axes_locator()
if ax_hist is ax:
continue
if locator_hist is None:
continue
if locator_hist._axes_divider is locator._axes_divider:
return ax_hist
anomaly_score = column_or_1d(anomaly_score)
if ax is None:
_, ax = plt.subplots(figsize=figsize)
ax.grid(True, linestyle=':')
if xlim is None:
n_samples, = anomaly_score.shape
xlim = (0., n_samples - 1.)
ax.set_xlim(xlim)
if ylim is None:
ylim = (0., 1.05 * np.max(anomaly_score))
ax.set_ylim(ylim)
if title is not None:
ax.set_title(title)
if xlabel is not None:
ax.set_xlabel(xlabel)
if ylabel is not None:
ax.set_ylabel(ylabel)
line, = ax.plot(anomaly_score, **kwargs)
color = line.get_color()
if threshold is not None:
ax.hlines(threshold, xlim[0], xlim[1], color=color)
if hist or kde:
ax_hist = _get_ax_hist(ax)
ax_hist.grid(True, linestyle=':')
ax_hist.tick_params(axis='y', labelleft=False)
ax_hist.set_ylim(ylim)
if hist:
# Draw a histogram
ax_hist.hist(
anomaly_score,
alpha = 0.4,
bins = bins,
color = color,
density = True,
orientation = 'horizontal'
)
if kde:
kernel = gaussian_kde(anomaly_score)
ylocs = np.linspace(ylim[0], ylim[1])
# Draw a gaussian kernel density estimate
ax_hist.plot(kernel(ylocs), ylocs, color=color)
if 'label' in kwargs:
ax.legend(loc='upper left')
if filename is not None:
ax.get_figure().savefig(filename)
return ax
[docs]def plot_roc_curve(
y_true, y_score, ax=None, figsize=None,
filename=None, title='ROC curve', xlabel='FPR', ylabel='TPR',
**kwargs
):
"""Plot the Receiver Operating Characteristic (ROC) curve.
Parameters
----------
y_true : array-like of shape (n_samples,)
True Labels.
y_score : array-like of shape (n_samples,)
Target scores.
ax : matplotlib Axes, default None
Target axes instance.
figsize : tuple, default None
Tuple denoting figure size of the plot.
filename : str, default None
If provided, save the current figure.
title : string, default 'ROC curve'
Axes title. To disable, pass None.
xlabel : string, default 'FPR'
X axis title label. To disable, pass None.
ylabel : string, default 'TPR'
Y axis title label. To disable, pass None.
**kwargs : dict
Other keywords passed to ``ax.plot``.
Returns
-------
ax : matplotlib Axes
Axes on which the plot was drawn.
Examples
--------
>>> import matplotlib.pyplot as plt
>>> from kenchi.datasets import load_wdbc
>>> from kenchi.outlier_detection import MiniBatchKMeans
>>> from kenchi.plotting import plot_roc_curve
>>> X, y = load_wdbc(random_state=0, return_X_y=True)
>>> det = MiniBatchKMeans(random_state=0).fit(X)
>>> score_samples = det.score_samples(X)
>>> plot_roc_curve(y, score_samples) # doctest: +ELLIPSIS
<matplotlib.axes._subplots.AxesSubplot object at 0x...>
>>> plt.show() # doctest: +SKIP
.. figure:: images/plot_roc_curve.png
"""
import matplotlib.pyplot as plt
fpr, tpr, _ = roc_curve(y_true, y_score)
roc_auc = auc(fpr, tpr)
if ax is None:
_, ax = plt.subplots(figsize=figsize)
ax.grid(True, linestyle=':')
ax.set_xlim(0., 1.)
ax.set_ylim(0., 1.05)
if title is not None:
ax.set_title(title)
if xlabel is not None:
ax.set_xlabel(xlabel)
if ylabel is not None:
ax.set_ylabel(ylabel)
if 'label' in kwargs:
kwargs['label'] += f' (area={roc_auc:1.3f})'
else:
kwargs['label'] = f'area={roc_auc:1.3f}'
ax.plot(fpr, tpr, **kwargs)
ax.legend(loc='lower right')
if filename is not None:
ax.get_figure().savefig(filename)
return ax
[docs]def plot_graphical_model(
G, ax=None, figsize=None, filename=None,
random_state=None, title='GGM', **kwargs
):
"""Plot the Gaussian Graphical Model (GGM).
Parameters
----------
G : networkx Graph
GGM.
ax : matplotlib Axes, default None
Target axes instance.
figsize : tuple, default None
Tuple denoting figure size of the plot.
filename : str, default None
If provided, save the current figure.
random_state : int, RandomState instance, default None
Seed of the pseudo random number generator.
title : string, default 'GGM'
Axes title. To disable, pass None.
**kwargs : dict
Other keywords passed to ``nx.draw_networkx``.
Returns
-------
ax : matplotlib Axes
Axes on which the plot was drawn.
Examples
--------
>>> import matplotlib.pyplot as plt
>>> import networkx as nx
>>> from kenchi.plotting import plot_graphical_model
>>> from sklearn.datasets import make_sparse_spd_matrix
>>> A = make_sparse_spd_matrix(dim=20, norm_diag=True, random_state=0)
>>> G = nx.from_numpy_matrix(A)
>>> plot_graphical_model(G, random_state=0) # doctest: +ELLIPSIS
<matplotlib.axes._subplots.AxesSubplot object at 0x...>
>>> plt.show() # doctest: +SKIP
.. figure:: images/plot_graphical_model.png
"""
import matplotlib.pyplot as plt
import networkx as nx
if ax is None:
_, ax = plt.subplots(figsize=figsize)
if title is not None:
ax.set_title(title)
node_size = np.array([30. * (d + 1.) for _, d in G.degree])
pos = nx.spring_layout(G, seed=random_state)
width = np.abs([3. * w for _, _, w in G.edges(data='weight')])
# Add the draw_networkx kwargs here
kwargs.setdefault('cmap', 'Spectral')
kwargs.setdefault('node_size', node_size)
kwargs.setdefault('pos', pos)
kwargs.setdefault('width', width)
# Draw the Gaussian grapchical model
nx.draw_networkx(G, ax=ax, **kwargs)
# Turn off tick visibility
ax.tick_params('x', labelbottom=False, bottom=False)
ax.tick_params('y', labelleft=False, left=False)
if filename is not None:
ax.get_figure().savefig(filename)
return ax
[docs]def plot_partial_corrcoef(
partial_corrcoef, ax=None, cbar=True, figsize=None,
filename=None, title='Partial correlation', **kwargs
):
"""Plot the partial correlation coefficient matrix.
Parameters
----------
partial_corrcoef : array-like of shape (n_features, n_features)
Partial correlation coefficient matrix.
ax : matplotlib Axes, default None
Target axes instance.
cbar : bool, default True.
If True, draw a colorbar.
figsize : tuple, default None
Tuple denoting figure size of the plot.
filename : str, default None
If provided, save the current figure.
title : string, default 'Partial correlation'
Axes title. To disable, pass None.
**kwargs : dict
Other keywords passed to ``ax.pcolormesh``.
Returns
-------
ax : matplotlib Axes
Axes on which the plot was drawn.
Examples
--------
>>> import matplotlib.pyplot as plt
>>> from kenchi.plotting import plot_partial_corrcoef
>>> from sklearn.datasets import make_sparse_spd_matrix
>>> A = make_sparse_spd_matrix(dim=20, norm_diag=True, random_state=0)
>>> plot_partial_corrcoef(A) # doctest: +ELLIPSIS
<matplotlib.axes._subplots.AxesSubplot object at 0x...>
>>> plt.show() # doctest: +SKIP
.. figure:: images/plot_partial_corrcoef.png
"""
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
partial_corrcoef = check_array(partial_corrcoef)
partial_corrcoef = check_symmetric(partial_corrcoef, raise_exception=True)
if ax is None:
_, ax = plt.subplots(figsize=figsize)
if title is not None:
ax.set_title(title)
# Add the pcolormesh kwargs here
kwargs.setdefault('cmap', 'RdBu')
kwargs.setdefault('edgecolors', 'white')
kwargs.setdefault('vmin', -1.)
kwargs.setdefault('vmax', 1.)
# Draw the heatmap
mesh = ax.pcolormesh(
np.ma.masked_equal(partial_corrcoef, 0.), **kwargs
)
ax.set_aspect('equal')
ax.set_facecolor('grey')
# Invert the y axis to show the plot in matrix form
ax.invert_yaxis()
if cbar:
# Create an axes on the right side of ax
divider = make_axes_locatable(ax)
cax = divider.append_axes('right', '5%', pad=0.1)
ax.get_figure().colorbar(mesh, cax=cax)
if filename is not None:
ax.get_figure().savefig(filename)
return ax