Source code for emlearn.evaluate.pareto


"""
Pareto-optimal evaluation
=========================
"""

import numpy

[docs]def is_pareto_efficient_simple(costs): """ Find the pareto-efficient points (smaller is better) :param costs: An (n_points, n_costs) array :return: A (n_points, ) boolean array, indicating whether each point is Pareto efficient From https://stackoverflow.com/a/40239615/1967571 Fairly fast for many datapoints, less fast for many costs, somewhat readable """ is_efficient = numpy.ones(costs.shape[0], dtype = bool) for i, c in enumerate(costs): if is_efficient[i]: is_efficient[is_efficient] = numpy.any(costs[is_efficient] < c, axis=1) # Keep any point with a lower cost is_efficient[i] = True # And keep self return is_efficient
[docs]def find_pareto_front(df, cost_metric : str = 'mean_test_compute', performance_metric : str = 'mean_test_accuracy', higher_is_better : bool = True, min_performance=None): """ Find the Pareto front :param cost_metric: Column with model compute cost. Lower cost always better :param performance_metric: Column with model predictive performance. :param higher_is_better: Whether higher or lower is better for @performance_metric :param min_performance: Cut datapoints with worse performance than this :returns: The rows that make up the Pareto front """ # prevnt mutating input df = df.copy() # flip to negative-is-better if higher_is_better: df['mean_test_score'] = -df[performance_metric] else: df['mean_test_score'] = df[performance_metric] pp = is_pareto_efficient_simple(df[['mean_test_score', cost_metric]].values) if min_performance is not None: if higher_is_better: pp = (pp & (df[performance_metric] >= min_performance)) else: pp = (pp & (df[performance_metric] <= min_performance)) return df[pp]
[docs]def plot_pareto_front(results, pareto_cut=None, plot_other=True, plot_pareto=True, hue=None, pareto_alpha=0.8, other_alpha=0.3, pareto_global=False, s=100, pareto_s=5, height=8, aspect=1, cost_metric='mean_test_compute', performance_metric='mean_test_accuracy', size_metric='mean_test_size'): """ Utility for plotting performance vs compute cost and size of a model. Can also compute and plot the pareto front. """ import seaborn pf = find_pareto_front(results, min_performance=pareto_cut) pf = pf.sort_values(cost_metric) g = seaborn.FacetGrid(results, hue=hue, height=height, aspect=aspect) # plot all data if plot_other: g.map_dataframe(seaborn.scatterplot, x=cost_metric, y=performance_metric, size=size_metric, legend=False, s=pareto_s*s, alpha=other_alpha, #hue=hue, ) def _plot_front(color, label): if pareto_global: sub = pf.copy() else: sub = pf[pf[hue] == label] seaborn.lineplot( data=sub, x=cost_metric, y=performance_metric, #color=color, label=label, alpha=pareto_alpha, legend=True, ) # plot the data along Pareto front if plot_pareto: g.map(_plot_front) return g