.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "auto_examples/anomaly_detection.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note :ref:`Go to the end ` to download the full example code. .. rst-class:: sphx-glr-example-title .. _sphx_glr_auto_examples_anomaly_detection.py: Anomaly Detection comparison =========================== Simple demonstration of Anomaly Detection methods implemented in emlearn .. GENERATED FROM PYTHON SOURCE LINES 8-34 .. code-block:: Python # Example adapted from # "Comparing anomaly detection algorithms for outlier detection on toy datasets" # Author: Alexandre Gramfort # Albert Thomas # License: BSD 3 clause # https://scikit-learn.org/stable/auto_examples/miscellaneous/plot_anomaly_comparison.html#sphx-glr-auto-examples-miscellaneous-plot-anomaly-comparison-py import os.path import emlearn import numpy import pandas import seaborn import matplotlib.pyplot as plt #matplotlib.rcParams["contour.negative_linestyle"] = "solid" try: # When executed as regular .py script here = os.path.dirname(__file__) except NameError: # When executed as Jupyter notebook / Sphinx Gallery here = os.getcwd() .. GENERATED FROM PYTHON SOURCE LINES 35-39 Create dataset ------------------------ Using a simple multi-class dataset included with scikit-learn .. GENERATED FROM PYTHON SOURCE LINES 39-70 .. code-block:: Python def make_datasets(n_samples = 300, outliers = 0.15, seed=42): from sklearn.datasets import make_moons, make_blobs rng = numpy.random.RandomState(seed) n_outliers = int(outliers * n_samples) n_inliers = n_samples - n_outliers # Define datasets blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2) datasets = [ make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0], make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0], make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0], 4.0 * ( make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0] - numpy.array([0.5, 0.25]) ), #14.0 * (numpy.random.RandomState(42).rand(n_samples, 2) - 0.5), ] def add_outliers(X): return numpy.concatenate([X, rng.uniform(low=-6, high=6, size=(n_outliers, 2))], axis=0) datasets = list(map(add_outliers, datasets)) return datasets outliers_fraction = 0.15 datasets = make_datasets(outliers=outliers_fraction) .. GENERATED FROM PYTHON SOURCE LINES 71-75 Models to compare ------------------------ .. GENERATED FROM PYTHON SOURCE LINES 75-95 .. code-block:: Python from sklearn.covariance import EllipticEnvelope from sklearn.ensemble import IsolationForest from sklearn.mixture import GaussianMixture, BayesianGaussianMixture anomaly_algorithms = [ ("Elliptic Envelope", EllipticEnvelope(contamination=outliers_fraction)), ("GMM (2, full)", GaussianMixture(n_components=2, covariance_type='full')), ("GMM (4, full)", GaussianMixture(n_components=4, covariance_type='full')), #("Gaussian Mixture model (32, full)", GaussianMixture(n_components=4, covariance_type='diag', random_state=1)), ("Baysian GMM ", BayesianGaussianMixture(n_components=12, covariance_type='diag', random_state=1, n_init=4, degrees_of_freedom_prior=1.1, max_iter=20) ), # Not yet supported #( "Isolation Forest", IsolationForest(contamination=outliers_fraction, random_state=42)), #("One-Class SVM", svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1)), ] .. GENERATED FROM PYTHON SOURCE LINES 96-100 Plotting tools ------------------------ Plots the anomaly score landscape .. GENERATED FROM PYTHON SOURCE LINES 100-137 .. code-block:: Python def plot_results(ax, model, X): res = 20 xx, yy = numpy.meshgrid(numpy.linspace(-7, 7, res), numpy.linspace(-7, 7, res)) try: y_pred = model.score_samples(X) Z = model.score_samples(numpy.c_[xx.ravel(), yy.ravel()]) except FloatingPointError as e: print(e) return # Normalize the anomaly scores to 0.0 -> 1.0 from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() def nomalize_score(s, fit=False): s = -1.0 * s.reshape(-1, 1) if fit: scaler.fit(s) return scaler.transform(s)[:,0] Z = nomalize_score(Z, fit=True) y_pred = nomalize_score(y_pred, fit=True) cmap = seaborn.color_palette("rocket", as_cmap=True) Z = Z.reshape(xx.shape) ax.contour(xx, yy, Z, levels=numpy.linspace(0.0, 1.0, 5), linewidths=0.6, colors="black") ax.contourf(xx, yy, Z, cmap=cmap) # Plot datapoints seaborn.scatterplot(ax=ax, x=X[:, 0], y=X[:, 1], s=10, hue=y_pred, palette=cmap, legend=False) ax.set_xlim(-7, 7) ax.set_ylim(-7, 7) ax.set_xticks(()) ax.set_yticks(()) .. GENERATED FROM PYTHON SOURCE LINES 138-142 Run comparison ------------------------ .. GENERATED FROM PYTHON SOURCE LINES 142-174 .. code-block:: Python fig, axs = plt.subplots( ncols=len(anomaly_algorithms), nrows=len(datasets), figsize=(len(anomaly_algorithms) * 2 + 4, 12.5), sharex=True, sharey=True, ) plt.subplots_adjust(left=0.02, right=0.98, bottom=0.001, top=0.96, wspace=0.05, hspace=0.01) for i_dataset, X in enumerate(datasets): for i_algorithm, (name, model) in enumerate(anomaly_algorithms): # Train model print(f"Trying {name}") try: model.fit(X) except FloatingPointError as e: print(e) continue # Convert to C cmodel = emlearn.convert(model, method='inline') # Visualize output ax = axs[i_dataset, i_algorithm] plot_results(ax, cmodel, X) if i_dataset == 0: ax.set_title(name, size=18) plt.show() .. image-sg:: /auto_examples/images/sphx_glr_anomaly_detection_001.png :alt: Elliptic Envelope, GMM (2, full), GMM (4, full), Baysian GMM :srcset: /auto_examples/images/sphx_glr_anomaly_detection_001.png :class: sphx-glr-single-img .. rst-class:: sphx-glr-script-out .. code-block:: none Trying Elliptic Envelope Trying GMM (2, full) Trying GMM (4, full) Trying Baysian GMM /home/docs/checkouts/readthedocs.org/user_builds/emlearn/envs/stable/lib/python3.12/site-packages/sklearn/mixture/_base.py:293: ConvergenceWarning: Best performing initialization did not converge. Try different init parameters, or increase max_iter, tol, or check for degenerate data. warnings.warn( Trying Elliptic Envelope Trying GMM (2, full) Trying GMM (4, full) Trying Baysian GMM /home/docs/checkouts/readthedocs.org/user_builds/emlearn/envs/stable/lib/python3.12/site-packages/sklearn/mixture/_base.py:293: ConvergenceWarning: Best performing initialization did not converge. Try different init parameters, or increase max_iter, tol, or check for degenerate data. warnings.warn( Trying Elliptic Envelope Trying GMM (2, full) Trying GMM (4, full) Trying Baysian GMM /home/docs/checkouts/readthedocs.org/user_builds/emlearn/envs/stable/lib/python3.12/site-packages/sklearn/mixture/_base.py:293: ConvergenceWarning: Best performing initialization did not converge. Try different init parameters, or increase max_iter, tol, or check for degenerate data. warnings.warn( Trying Elliptic Envelope Trying GMM (2, full) Trying GMM (4, full) underflow encountered in exp Trying Baysian GMM /home/docs/checkouts/readthedocs.org/user_builds/emlearn/envs/stable/lib/python3.12/site-packages/sklearn/mixture/_base.py:293: ConvergenceWarning: Best performing initialization did not converge. Try different init parameters, or increase max_iter, tol, or check for degenerate data. warnings.warn( .. rst-class:: sphx-glr-timing **Total running time of the script:** (0 minutes 7.202 seconds) .. _sphx_glr_download_auto_examples_anomaly_detection.py: .. only:: html .. container:: sphx-glr-footer sphx-glr-footer-example .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: anomaly_detection.ipynb ` .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: anomaly_detection.py ` .. container:: sphx-glr-download sphx-glr-download-zip :download:`Download zipped: anomaly_detection.zip ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_