XOR classification

A simple example for getting started with emlearn.

Will train a RandomForestClassifier model on a XOR dataset, generate C code for this model using emlearn Python package, load this model in C and make predictions using it.

import os.path

import emlearn
import numpy
import pandas
import seaborn
import matplotlib.pyplot as plt

try:
    # When executed as regular .py script
    here = os.path.dirname(__file__)
except NameError:
    # When executed as Jupyter notebook / Sphinx Gallery
    here = os.getcwd()

Create dataset

The XOR problem is a very simple example of a dataset which is not linearly separable.

def make_noisy_xor(seed=42):
    xx, yy = numpy.meshgrid(numpy.linspace(-3, 3, 500),
                         numpy.linspace(-3, 3, 500))

    rng = numpy.random.RandomState(seed)
    X = rng.randn(300, 2)
    y = numpy.logical_xor(X[:, 0] > 0, X[:, 1] > 0)

    # Add some noise
    flip = rng.randint(300, size=15)
    y[flip] = ~y[flip]

    df = pandas.DataFrame((X * 255).astype(numpy.int16))
    df['label'] = y

    return df

def dataset_split_random(data, val_size=0.25, test_size=0.25, random_state=3, column='split'):
    """
    Split DataFrame into 3 non-overlapping parts: train,val,test with specified proportions

    Returns a new DataFrame with the rows marked by the assigned split in @column
    """
    train_size = (1.0 - val_size - test_size)
    from sklearn.model_selection import train_test_split

    train_val_idx, test_idx = train_test_split(data.index, test_size=test_size, random_state=random_state)
    val_ratio = (val_size / (val_size+train_size))
    train_idx, val_idx = train_test_split(train_val_idx, test_size=val_ratio, random_state=random_state)

    data = data.copy()
    data.loc[train_idx, column] = 'train'
    data.loc[val_idx, column] = 'val'
    data.loc[test_idx, column] = 'test'

    return data

dataset = make_noisy_xor()
dataset = dataset_split_random(dataset, test_size=0.10).set_index('split')

# Plot the dataset
ax = seaborn.scatterplot(data=dataset, x=0, y=1, hue='label')
ax.axvline(0.0, ls='--', alpha=0.5, color='black')
ax.axhline(0.0, ls='--', alpha=0.5, color='black')
ax.set_xlim(-4.0, +4.0)
ax.set_ylim(-4.0, +4.0)

# Show colums of the data
print(dataset.head(5))
xor
         0    1  label
split
val    126  -35   True
val    165  388  False
val    -59  -59  False
train  402  195  False
train -119  138   True

Train ML model

Usin the standard process with scikit-learn

def train_model(dataset, seed=42):
    from sklearn.ensemble import RandomForestClassifier

    #feature_columns =
    X_train = dataset.loc['train', [0, 1]]
    Y_train = dataset.loc['train', 'label']

    model = RandomForestClassifier(n_estimators=10, max_depth=5, random_state=seed)
    model.fit(X_train, Y_train)

    return model


model = train_model(dataset)

Convert model to C using emlearn

def convert_model(model):

    model_filename = os.path.join(here, 'xor_model.h')
    cmodel = emlearn.convert(model)
    code = cmodel.save(file=model_filename, name='xor')

    assert os.path.exists(model_filename)
    print(f"Generated {model_filename}")

convert_model(model)
Generated /home/docs/checkouts/readthedocs.org/user_builds/emlearn/checkouts/latest/examples/xor_model.h

Use generated C model to make predictions

xor.c : Executable that takes features as commandline arguments, and prints the predicted class to stdout

#include "xor_model.h" // emlearn generated model

#include <stdio.h> // printf
#include <stdlib.h> // stdod

int
main(int argc, const char *argv[])
{
    if (argc != 3) {
        return -1;
    }

    // Input is espected to be 0.0-255.0
    const float a = strtod(argv[1], NULL);
    const float b = strtod(argv[2], NULL);

    // Convert to integers
    const int16_t features[] = { a, b };

    const int out = xor_predict(features, 2);
    if (out < 0) {
        return out; // error
    } else {
        printf("%d\n", out);
    }
}
# Python wrapper for the C executable
# calls the C program as a subprocess to run the model
def predict(bin_path, X, verbose=1):
    import subprocess

    def predict_one(x):
        args = [ bin_path, str(x[0]), str(x[1]) ]
        out = subprocess.check_output(args)
        cls = int(out)
        if verbose > 0:
            print(f"run xor in1={x[0]:+.2f} in2={x[1]:+.2f} out={cls} ")
        return cls

    y = [ predict_one(x) for x in numpy.array(X) ]
    return numpy.array(y)

def evaluate_model(dataset):

    # Compile the xor.c example program
    out_dir = './examples'
    src_path = os.path.join(here, 'xor.c')
    include_dirs = [ emlearn.includedir ]
    bin_path = emlearn.common.compile_executable(src_path, out_dir, include_dirs=include_dirs)

    print('Compiled C excutable', bin_path)

    # Make predictions on dataset
    X_test = dataset.loc['test', [0, 1]]
    Y_test = dataset.loc['test', 'label']
    y_pred_c = predict(bin_path, X_test)
    y_pred_py = model.predict(X_test)

    # Compute scores using converted C model, and original Python model
    import sklearn.metrics
    f1_score_c = sklearn.metrics.f1_score(Y_test, y_pred_c)
    f1_score_py = sklearn.metrics.f1_score(Y_test, y_pred_py)

    print(f'\nF1-score python={f1_score_py} c={f1_score_c}')

evaluate_model(dataset)
Compiled C excutable ./examples/main
run xor in1=-118.00 in2=-118.00 out=0
run xor in1=-3.00 in2=-269.00 out=0
run xor in1=+82.00 in2=-98.00 out=1
run xor in1=-282.00 in2=-305.00 out=0
run xor in1=+357.00 in2=-357.00 out=1
run xor in1=-395.00 in2=+17.00 out=1
run xor in1=-234.00 in2=+395.00 out=1
run xor in1=+475.00 in2=+120.00 out=0
run xor in1=+209.00 in2=+483.00 out=0
run xor in1=-62.00 in2=-192.00 out=0
run xor in1=+131.00 in2=+131.00 out=0
run xor in1=-123.00 in2=+20.00 out=1
run xor in1=-186.00 in2=+55.00 out=1
run xor in1=-516.00 in2=+47.00 out=1
run xor in1=-112.00 in2=+33.00 out=1
run xor in1=+139.00 in2=-51.00 out=1
run xor in1=+295.00 in2=+201.00 out=0
run xor in1=-4.00 in2=-255.00 out=0
run xor in1=-336.00 in2=+467.00 out=1
run xor in1=+1.00 in2=+11.00 out=1
run xor in1=-63.00 in2=+247.00 out=1
run xor in1=-246.00 in2=+174.00 out=1
run xor in1=-301.00 in2=-520.00 out=0
run xor in1=+383.00 in2=+18.00 out=0
run xor in1=-135.00 in2=-202.00 out=0
run xor in1=-141.00 in2=-305.00 out=0
run xor in1=-264.00 in2=-48.00 out=0
run xor in1=-356.00 in2=+143.00 out=1
run xor in1=+12.00 in2=-211.00 out=1
run xor in1=+83.00 in2=-55.00 out=1

F1-score python=0.967741935483871 c=0.967741935483871

Total running time of the script: (0 minutes 0.525 seconds)

Gallery generated by Sphinx-Gallery