# Evaluate Classifier with Stratified 10-fold Cross Validation

This notebook first installs some necessary TeX libraries such that the text in the graphs has the same font as the thesis document (Computer Modern). It then runs the cross validation for 10 splits and with the optimized parameters which have been found during hyperparameter optimization. The metrics are once again continuously uploaded to W&B where a plot of the performance of the individual folds is also accessible.

In [1]:
!pip install wandb onnx onnxruntime -q
!sudo apt-get install texlive-latex-recommended 
!sudo apt-get install dvipng texlive-latex-extra texlive-fonts-recommended  
!wget http://mirrors.ctan.org/macros/latex/contrib/type1cm.zip 
!unzip type1cm.zip -d /tmp/type1cm 
!cd /tmp/type1cm/type1cm/ && sudo latex type1cm.ins
!sudo mkdir /usr/share/texmf/tex/latex/type1cm 
!sudo cp /tmp/type1cm/type1cm/type1cm.sty /usr/share/texmf/tex/latex/type1cm 
!sudo texhash 
!apt install cm-super

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m26.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.5/13.5 MB[0m [31m52.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.0/5.0 MB[0m [31m48.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m201.7/201.7 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m10.1 MB/s[0m 

In [5]:
import wandb

wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33me1527193[0m ([33mflower-classification[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [6]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
from torchvision import datasets, transforms
from torchvision.models import resnet50, ResNet50_Weights
from torch.utils.data import Dataset, DataLoader, random_split, SubsetRandomSampler
from sklearn.model_selection import KFold, StratifiedKFold
import numpy as np
import os
import time
import copy
import random
import onnx
from sklearn import metrics
from sklearn.metrics import roc_curve, RocCurveDisplay, auc
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

torch.manual_seed(42)
torch.backends.cudnn.deterministic = True
random.seed(42)
np.random.seed(42)
torch.cuda.manual_seed_all(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
def set_size(width, fraction=1, subplots=(1, 1)):
    # Width of figure (in pts)
    fig_width_pt = width * fraction
    # Convert from pt to inches
    inches_per_pt = 1 / 72.27
    # Golden ratio to set aesthetic figure height
    # https://disq.us/p/2940ij3
    golden_ratio = (5**.5 - 1) / 2
    # Figure width in inches
    fig_width_in = fig_width_pt * inches_per_pt
    # Figure height in inches
    fig_height_in = fig_width_in * golden_ratio * (subplots[0] / subplots[1])

    fig_dim = (fig_width_in, fig_height_in)

    return fig_dim

In [8]:
plt.rcParams['mathtext.fontset'] = 'cm'
plt.rcParams['font.family'] = 'STIXGeneral'

In [7]:
def build_dataset(batch_size):    
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'test': transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }

    data_dir = '/content/drive/MyDrive/plantsdata'
    dataset = datasets.ImageFolder(os.path.join(data_dir))
    dataset.transform = data_transforms['test']

    # 90/10 split
    train_dataset, test_dataset = random_split(dataset, [0.9, 0.1])

    train_dataset.dataset.transform = data_transforms['train']
    test_dataset.dataset.transform = data_transforms['test']

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                               shuffle=True, num_workers=4)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                             shuffle=True, num_workers=4)

    dataloaders = {'train': train_loader, 'test': test_loader}
    dataset_size = len(dataset)
    dataset_sizes = {'train': len(train_dataset), 'test': len(test_dataset)}
    class_names = dataset.classes

    return dataset

def build_network():
    network = resnet50(weights=ResNet50_Weights.DEFAULT)
    num_ftrs = network.fc.in_features

    # Add linear layer with number of classes
    network.fc = nn.Linear(num_ftrs, 2)

    return network.to(device)

def build_optimizer(network, optimizer, learning_rate):
    optimizer = optim.SGD(network.parameters(),
                          lr=learning_rate, momentum=0.9)
    return optimizer

def train_epoch(network, loader, optimizer, criterion, scheduler, dataset_sizes):
    network.train()
    confusion = torch.empty([0, 1])
    confusion = confusion.to(device)
    running_loss = 0.0
    running_corrects = 0
    for _, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()

        # ➡ Forward pass
        #loss = F.nll_loss(network(data), target)
        with torch.set_grad_enabled(True):
            outputs = network(data)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, target)
        
        #cumu_loss += loss.item()
        
        running_loss += loss.item() * data.size(0)
        running_corrects += torch.sum(preds == target.data)

        confusion = torch.cat((confusion, preds[:, None] / target.data[:, None]))

        # ⬅ Backward pass + weight update
        loss.backward()
        optimizer.step()

        wandb.log({'train/batch_loss': loss.item()})

    scheduler.step()

    tp = torch.sum(confusion == 1).item()
    fp = torch.sum(confusion == float('inf')).item()
    tn = torch.sum(torch.isnan(confusion)).item()
    fn = torch.sum(confusion == 0).item()
    
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f = 2 * ((precision * recall) / (precision + recall))
    
    epoch_loss = running_loss / dataset_sizes['train']
    epoch_acc = running_corrects.double() / dataset_sizes['train']
    
    return (epoch_loss, epoch_acc, precision, recall, f, tp, fp, tn, fn)

def test(network, loader, optimizer, criterion, dataset_sizes):
    network.eval()
    confusion = torch.empty([0, 1])
    confusion = confusion.to(device)
    probabilities = torch.empty([0])
    probabilities = probabilities.to(device)
    targets = torch.empty([0])
    targets = targets.to(device)
    running_loss = 0.0
    test_corrects = 0
    for _, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()

        # ➡ Forward pass
        with torch.set_grad_enabled(False):
            outputs = network(data)
            _, preds = torch.max(outputs, 1)
            # Convert logits to probabilities
            targets = torch.cat((targets, target))
            probabilities = torch.cat((probabilities, F.softmax(outputs, dim=1)[:, 1]))
            loss = criterion(outputs, target)

        running_loss += loss.item() * data.size(0)
        test_corrects += torch.sum(preds == target.data)
        
        confusion = torch.cat((confusion, preds[:, None] / target.data[:, None]))

    tp = torch.sum(confusion == 1).item()
    fp = torch.sum(confusion == float('inf')).item()
    tn = torch.sum(torch.isnan(confusion)).item()
    fn = torch.sum(confusion == 0).item()
    
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f = 2 * ((precision * recall) / (precision + recall))
    
    epoch_loss = running_loss / dataset_sizes['test']
    epoch_acc = test_corrects.double() / dataset_sizes['test']
    
    y_true = targets.detach().cpu().numpy()
    y_score = probabilities.detach().cpu().numpy()

    nn_fpr, nn_tpr, nn_thresholds = roc_curve(y_true, y_score)

    auc = metrics.roc_auc_score(y_true, y_score)
    
    return (epoch_loss, epoch_acc, precision, recall, f, tp, fp, tn, fn, y_true, y_score, auc)

In [1]:
def train(config=None):
    # Style the plots (with grid this time)
    width = 418
    sns.set_theme(style='whitegrid',
                  rc={'text.usetex': True, 'font.family': 'serif', 'axes.labelsize': 16,
                      'font.size': 16, 'legend.fontsize': 11,
                      'xtick.labelsize': 12, 'ytick.labelsize': 12})

    fig_save_dir = '../../thesis/graphics/'
    # Initialize a new wandb run
    with wandb.init(config=config):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config
        
        dataset = build_dataset(config.batch_size)

        print("Dataset targets: ", dataset.targets)
        
        splits = StratifiedKFold(n_splits=config.k_splits, shuffle=True, random_state=42)
        foldperf={}

        # Aggregate metrics from best epochs for ROC plot
        tprs = []
        aucs = []
        mean_fpr = np.linspace(0, 1, 100)

        # Style the plots (with grid this time)
        sns.set_palette('ch:light=0.8,gamma=1.2', n_colors=12)
        fig, ax = plt.subplots(1, 1, figsize=(6, 6))
        
        # Iterate over the folds
        for fold, (train_idx, val_idx) in enumerate(splits.split(np.zeros(len(dataset)), dataset.targets)):

            print('Fold {}'.format(fold + 1))

            train_sampler = SubsetRandomSampler(train_idx)
            test_sampler = SubsetRandomSampler(val_idx)
            train_loader = DataLoader(dataset, batch_size=config.batch_size, sampler=train_sampler)
            test_loader = DataLoader(dataset, batch_size=config.batch_size, sampler=test_sampler)
            
            dataset_sizes = {'train': len(train_loader.sampler), 'test': len(test_loader.sampler)}

            print("Dataset sizes: ", dataset_sizes)

            network = build_network()
            optimizer = build_optimizer(network, config.optimizer, config.learning_rate)
            criterion = nn.CrossEntropyLoss()
            # Decay LR by a factor of 0.1 every 5 epochs
            exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, config.step_size)

            best_test_auc = 0.0
            best_y_true = []
            best_y_score = []

            for epoch in tqdm(range(config.epochs)):            
                (train_loss, train_acc, train_precision, train_recall, train_f,
                 train_tp, train_fp, train_tn, train_fn) = train_epoch(network, train_loader, optimizer,
                                                                       criterion, exp_lr_scheduler,
                                                                       dataset_sizes)
                wandb.log({'epoch': epoch, 'train/epoch_loss': train_loss, 'train/epoch_acc': train_acc, 
                           'train/precision': train_precision, 'train/recall': train_recall, 'train/f1-score': train_f,
                           'train/tp': train_tp, 'train/fp': train_fp, 'train/tn': train_tn, 'train/fn': train_fn})
            
                (test_loss, test_acc, test_precision, test_recall, test_f,
                 test_tp, test_fp, test_tn, test_fn, y_true, y_score, test_auc) = test(network, test_loader,
                                                            optimizer, criterion,
                                                            dataset_sizes)
                wandb.log({'test/epoch_loss': test_loss, 'test/epoch_acc': test_acc,
                       'test/precision': test_precision, 'test/recall': test_recall, 'test/f1-score': test_f,
                       'test/tp': test_tp, 'test/fp': test_fp, 'test/tn': test_tn, 'test/fn': test_fn,
                       'test/y_true': y_true, 'test/y_score': y_score, 'test/auc': test_auc})
                if test_auc > best_test_auc:
                    best_y_true = y_true
                    best_y_score = y_score
                    best_test_auc = test_auc
            
            # Get tpr and fpr
            fpr, tpr, thresh = metrics.roc_curve(best_y_true, best_y_score)
            ax.plot(fpr,
                    tpr,
                    legend=False,
                    #label=r"Fold %d (AUC = %0.2f)" % (fold, best_test_auc),
                    lw=1,
                    alpha=0.5)
            interp_tpr = np.interp(mean_fpr, fpr, tpr)
            interp_tpr[0] = 0.0
            tprs.append(interp_tpr)
            aucs.append(best_test_auc)

            network = network.to(torch.device('cpu'))
            network.eval() 
            # Save model as artifact
            model_pt = wandb.Artifact(
                "trained-model-pt", type="model",
                description="Best Epoch per Fold Pytorch"
            )
            torch.save(network.state_dict(), "resnet-fold-{}.pt".format(fold))
            model_pt.add_file("resnet-fold-{}.pt".format(fold))
            #wandb.save("resnet-fold-{}.pt".format(fold))
            wandb.log_artifact(model_pt)

            # Export model to onnx
            model_onnx = wandb.Artifact(
                "trained-model-onnx", type="model",
                description="Best Epoch per Fold ONNX"
            )
            # Let's create a dummy input tensor  
            dummy_input = torch.randn(1, 3, 224, 224, requires_grad=True)  

            # Export the model   
            torch.onnx.export(network,
                              dummy_input,
                              "resnet-fold-{}.onnx".format(fold),
                              export_params=True,
                              opset_version=11,
                              do_constant_folding=True,
                              input_names = ['input'],
                              output_names = ['output']
            ) 
            model_onnx.add_file("resnet-fold-{}.onnx".format(fold))
            wandb.log_artifact(model_onnx)
            #wandb.save("resnet-fold-{}.onnx".format(fold))

        ax.plot([0, 1], [0, 1], "k--", label="chance level (AUC = 0.5)")
        mean_tpr = np.mean(tprs, axis=0)
        mean_tpr[-1] = 1.0
        mean_auc = auc(mean_fpr, mean_tpr)
        std_auc = np.std(aucs)
        ax.plot(
            mean_fpr,
            mean_tpr,
            #color="b",
            label=r"Mean ROC (AUC = %0.2f $\pm$ %0.2f" % (mean_auc, std_auc),
            lw=1,
            alpha=1,
        )

        std_tpr = np.std(tprs, axis=0)
        tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
        tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
        ax.fill_between(
            mean_fpr,
            tprs_lower,
            tprs_upper,
            color="grey",
            alpha=0.4,
            label=r"$\pm$ 1 std. dev.",
        )

        ax.set(
            xlim=[-0.05, 1.05],
            ylim=[-0.05, 1.05],
            xlabel="False Positive Rate",
            ylabel="True Positive Rate",
            title=f"Mean ROC curve with variability\n(Positive label `{dataset.classes[1]}')",
        )
        ax.axis("square")
        ax.legend(loc="lower right")
        fig.tight_layout()

        fig.savefig('classifier-hyp-folds.pdf', format='pdf', bbox_inches='tight')
        artifact_plot = wandb.Artifact(name="ROC", type="metric")
        artifact_plot.add_file(
            local_path='classifier-hyp-folds.pdf'
        )
        wandb.log_artifact(artifact_plot)            

In [2]:
sweep_config = {
    'method': 'random'
}

metric = {
    'name': 'test/f1-score',
    'goal': 'maximize'   
}

sweep_config['metric'] = metric

parameters_dict = {
    'optimizer': {
        'values': ['sgd']
    },
}

sweep_config['parameters'] = parameters_dict

parameters_dict.update({
    'epochs': {
        'value': 25},
    'batch_size': {
        'value': 64},
    'learning_rate': {
        'value': 0.01},
    'step_size': {
        'value': 5},
    'k_splits': {
        'value': 10},
})

In [9]:
sweep_id = wandb.sweep(sweep_config, project="classifier-optimized")

Create sweep with ID: fp9p6hei
Sweep URL: https://wandb.ai/flower-classification/classifier-optimized/sweeps/fp9p6hei


In [10]:
wandb.agent(sweep_id, train, count=1)

[34m[1mwandb[0m: Agent Starting Run: puf6qvta with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	k_splits: 10
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	step_size: 5


Traceback (most recent call last):
  File "/run/user/1000/ipykernel_27841/4074982736.py", line 16, in train
    dataset = build_dataset(config.batch_size)
              ^^^^^^^^^^^^^
NameError: name 'build_dataset' is not defined


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run puf6qvta errored: NameError("name 'build_dataset' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run puf6qvta errored: NameError("name 'build_dataset' is not defined")
