Source code for kenchi.outlier_detection.ensemble

from sklearn.ensemble import IsolationForest

from .base import BaseOutlierDetector

__all__ = ['IForest']


[docs]class IForest(BaseOutlierDetector): """Isolation forest (iForest). Parameters ---------- bootstrap : bool, False If True, individual trees are fit on random subsets of the training data sampled with replacement. If False, sampling without replacement is performed. contamination : float, default 0.1 Proportion of outliers in the data set. Used to define the threshold. max_features : int or float, default 1.0 Number of features to draw from X to train each base estimator. max_samples : int ,float or str, default 'auto' Number of samples to draw from X to train each base estimator. n_estimators : int, default 100 Number of base estimators in the ensemble. n_jobs : int Number of jobs to run in parallel. If -1, then the number of jobs is set to the number of CPU cores. random_state : int or RandomState instance, default None Seed of the pseudo random number generator. Attributes ---------- anomaly_score_ : array-like of shape (n_samples,) Anomaly score for each training data. threshold_ : float Threshold. estimators_ : list Collection of fitted sub-estimators. estimators_samples_ : int Subset of drawn samples for each base estimator. max_samples_ : int Actual number of samples. References ---------- .. [#liu08] Liu, F. T., Ting K. M., and Zhou, Z.-H., "Isolation forest," In Proceedings of ICDM'08, pp. 413-422, 2008. """ @property def estimators_(self): return self._estimator.estimators_ @property def estimators_samples_(self): return self._estimator.estimators_samples_ @property def max_samples_(self): return self._estimator.max_samples_ def __init__( self, bootstrap=False, contamination=0.1, max_features=1.0, max_samples='auto', n_estimators=100, n_jobs=1, random_state=None ): super().__init__(contamination=contamination) self.bootstrap = bootstrap self.max_features = max_features self.max_samples = max_samples self.n_estimators = n_estimators self.n_jobs = n_jobs self.random_state = random_state def _fit(self, X): self._estimator = IsolationForest( bootstrap = self.bootstrap, max_features = self.max_features, max_samples = self.max_samples, n_estimators = self.n_estimators, n_jobs = self.n_jobs, random_state = self.random_state ).fit(X) return self def _anomaly_score(self, X): return 0.5 - self._estimator.decision_function(X)