Examples

Some of these examples make use of the COMPAS dataset, which can be found here. Its data is stored with one comma-separated-row per sample, each with 26 binary features, and 1 classification (the 27th column). The first row specifies the feature names.

Toy Dataset

from corels import CorelsClassifier

# ["loud", "samples"] is the most verbose setting possible
C = CorelsClassifier(max_card=2, c=0.8, verbosity=["loud", "samples"])

# 4 samples, 3 features
X = [[1, 0, 1], [0, 0, 0], [1, 1, 0], [0, 1, 0]]
y = [1, 0, 0, 1]
# Feature names
features = ["Mac User", "Likes Pie", "Age < 20"]

# Fit the model
C.fit(X, y, features=features, prediction_name="Has a dirty computer")

# Print the resulting rulelist
print(C.rl())

# Predict on the training set
print("Prediction: " + str(C.predict(X)))

COMPAS Dataset

from corels import *
import numpy as np

# Train split proportion
train_proportion = 0.8

X, y, features, prediction = load_from_csv("data/compas.csv")
# A maximum cardinality of 3 makes CORELS search all rule antecedents
# with up to three features combined together
c = CorelsClassifier(max_card=3, n_iter=1000000, verbosity=["progress","rulelist"])

# Generate train and test sets
train_split = int(train_proportion * X.shape[0])

X_train = X[:train_split]
y_train = y[:train_split]

X_test = X[train_split:]
y_test = y[train_split:]

# Fit the model. Features is a list of the feature names
c.fit(X_train, y_train, features=features, prediction_name=prediction)

# Score the model on the test set
a = c.score(X_test, y_test)

print("Test Accuracy: " + str(a))

# Print the rulelist
print(c.rl())

Scikit-learn

# Example integration of corels with pandas and scikit-learn

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from corels import CorelsClassifier


# Load the iris dataset
iris = datasets.load_iris()
feature_names = list(iris.feature_names)

data = iris.data
targets = iris.target

# Binarize the features
for f in range(iris.data.shape[1]):
    mean = round(np.mean(data[:,f]), 3) 
    data[:,f] = (data[:,f] >= mean)
    feature_names[f] += " >= " + str(mean)

X, y = pd.DataFrame(data, columns=feature_names), targets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

C_Setosa = CorelsClassifier(verbosity=[])
C_Versicolour = CorelsClassifier(verbosity=[])

C_Setosa.fit(X_train, y_train == 0, features=feature_names, prediction_name="Setosa")
s_Setosa = C_Setosa.score(X_test, y_test == 0)

C_Versicolour.fit(X_train, y_train == 1, features=feature_names, prediction_name="Versicolour")
s_Versicolour = C_Versicolour.score(X_test, y_test == 1)

print("SETOSA:")
print(C_Setosa.rl())
print("Setosa score = " + str(s_Setosa))

print("\n\nVERSICOLOUR:")
print(C_Versicolour.rl())
print("Versicolour score = " + str(s_Versicolour))