Examples¶
Some of these examples make use of the COMPAS dataset, which can be found here. Its data is stored with one comma-separated-row per sample, each with 26 binary features, and 1 classification (the 27th column). The first row specifies the feature names.
Toy Dataset¶
from corels import CorelsClassifier
# ["loud", "samples"] is the most verbose setting possible
C = CorelsClassifier(max_card=2, c=0.8, verbosity=["loud", "samples"])
# 4 samples, 3 features
X = [[1, 0, 1], [0, 0, 0], [1, 1, 0], [0, 1, 0]]
y = [1, 0, 0, 1]
# Feature names
features = ["Mac User", "Likes Pie", "Age < 20"]
# Fit the model
C.fit(X, y, features=features, prediction_name="Has a dirty computer")
# Print the resulting rulelist
print(C.rl())
# Predict on the training set
print("Prediction: " + str(C.predict(X)))
COMPAS Dataset¶
from corels import *
import numpy as np
# Train split proportion
train_proportion = 0.8
X, y, features, prediction = load_from_csv("data/compas.csv")
# A maximum cardinality of 3 makes CORELS search all rule antecedents
# with up to three features combined together
c = CorelsClassifier(max_card=3, n_iter=1000000, verbosity=["progress","rulelist"])
# Generate train and test sets
train_split = int(train_proportion * X.shape[0])
X_train = X[:train_split]
y_train = y[:train_split]
X_test = X[train_split:]
y_test = y[train_split:]
# Fit the model. Features is a list of the feature names
c.fit(X_train, y_train, features=features, prediction_name=prediction)
# Score the model on the test set
a = c.score(X_test, y_test)
print("Test Accuracy: " + str(a))
# Print the rulelist
print(c.rl())
Scikit-learn¶
# Example integration of corels with pandas and scikit-learn
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from corels import CorelsClassifier
# Load the iris dataset
iris = datasets.load_iris()
feature_names = list(iris.feature_names)
data = iris.data
targets = iris.target
# Binarize the features
for f in range(iris.data.shape[1]):
mean = round(np.mean(data[:,f]), 3)
data[:,f] = (data[:,f] >= mean)
feature_names[f] += " >= " + str(mean)
X, y = pd.DataFrame(data, columns=feature_names), targets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
C_Setosa = CorelsClassifier(verbosity=[])
C_Versicolour = CorelsClassifier(verbosity=[])
C_Setosa.fit(X_train, y_train == 0, features=feature_names, prediction_name="Setosa")
s_Setosa = C_Setosa.score(X_test, y_test == 0)
C_Versicolour.fit(X_train, y_train == 1, features=feature_names, prediction_name="Versicolour")
s_Versicolour = C_Versicolour.score(X_test, y_test == 1)
print("SETOSA:")
print(C_Setosa.rl())
print("Setosa score = " + str(s_Setosa))
print("\n\nVERSICOLOUR:")
print(C_Versicolour.rl())
print("Versicolour score = " + str(s_Versicolour))