Compare SLP with LogisticRegression¶

A comparison of LogisticRegression and sppamCV including sppam.sppam.SPPAM

Comparison of SPPAM and LogisticRegression

logit [0.91 0.83]
SPPAM [0.86 0.66]
logit [0.96 0.9 ]
SPPAM [0.88 0.82]
logit [0.95 0.87]
SPPAM [0.88 0.77]
logit [0.99 0.95]
SPPAM [0.94 0.86]
logit [0.99 0.96]
SPPAM [0.91 0.85]
logit [0.78 0.71]
SPPAM [0.76 0.72]
logit [0.98 0.92]
SPPAM [0.87 0.77]
logit [0.92 0.88]
SPPAM [0.88 0.78]
logit [0.9  0.84]
SPPAM [0.87 0.81]
logit [0.88 0.79]
SPPAM [0.8  0.71]
logit [0.93 0.83]
SPPAM [0.86 0.76]
logit [0.93 0.86]
SPPAM [0.83 0.72]
logit [0.91 0.85]
SPPAM [0.86 0.78]
logit [0.93 0.85]
SPPAM [0.91 0.85]
logit [0.89 0.8 ]
SPPAM [0.85 0.77]
logit [0.96 0.91]
SPPAM [0.92 0.84]
logit [0.89 0.81]
SPPAM [0.84 0.73]
logit [0.92 0.88]
SPPAM [0.89 0.82]
logit [0.97 0.91]
SPPAM [0.95 0.83]
logit [0.99 0.97]
SPPAM [0.94 0.84]

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

from sppam import SPPAM

logit_auc = []
logit_acc = []
sppam_auc = []
sppam_acc = []

rng = np.random.RandomState(11)
for _ in range(20):
    # Make a classification problem
    X, y_d = make_classification(
        n_samples=100,
        n_features=20,
        n_informative=10,
        n_redundant=5,
        n_classes=2,
        hypercube=True,
        random_state=rng
    )
    scaler = StandardScaler()
    X_d = scaler.fit_transform(X)

    for desc, clf in [('logit', LogisticRegression(max_iter=10000)), ('SPPAM', SPPAM())]:
        lp = clf.fit(X_d, y_d).predict_proba(X_d)
        auc = roc_auc_score(y_true=y_d, y_score=clf.fit(X_d, y_d).predict_proba(X_d)[:, 1])
        acc = accuracy_score(y_true=y_d, y_pred=clf.fit(X_d, y_d).predict(X_d))
        print(desc, np.round((auc, acc), 2))
        if desc == 'logit':
            logit_auc.append(auc)
            logit_acc.append(acc)
        else:
            sppam_auc.append(auc)
            sppam_acc.append(acc)

# compare the mean of the differences of auc
diff = np.subtract(logit_auc, sppam_auc)

# plot the results
fig, axs = plt.subplots(3, 1, layout='constrained')
xdata = np.arange(len(logit_acc))  # make an ordinal for this
axs[0].plot(xdata, logit_auc, label='LogisticRegression')
axs[0].plot(xdata, sppam_auc, label='SPPAM')
axs[0].set_title('Comparison of SPPAM and LogisticRegression')
axs[0].set_ylabel('AUC')
axs[0].legend()

axs[1].plot(xdata, logit_acc, label='LogisticRegression')
axs[1].plot(xdata, sppam_acc, label='SPPAM')
axs[1].set_ylabel('Accuracy')
axs[1].legend()

axs[2].hist(diff)
axs[2].set_ylabel('AUC difference')
stats = pd.DataFrame(diff).describe().loc[['mean', 'std']].to_string(header=False)
axs[2].text(.1, 2, stats)
fig.set_size_inches(18.5, 20)
plt.show()

Total running time of the script: ( 0 minutes 8.127 seconds)

Gallery generated by Sphinx-Gallery