How do I structure the validation step to fit this?

Hi, during validation I wish to calculate ‘Cohens kappa’, which can not be calculated batch-wise, but needs the entire confusion matrix. Hence simply calculating performance on validation batches does not work for me. For comparison, I also calculate kappa on the training set, which is of a different size to the validation set. So I don’t see how I can get the the validation_step to work on both data sets (if they were the same size, I guess I could include everything in the validation loader, and then calculate kappa in validation_step_end?)

Should I be calculating kappa in both validation_step_end and train_step_end?

I hope the question makes sense.

Below is my, admittedly not pretty, present implementation (without lightning):

At the end of each training epoch:

 #validate after every epoch:
    with torch.no_grad():
        net.eval()
        totTrainLoss,trainKappa,trainAcc=performanceMetrics(net,trainLabels_tensor.numpy().reshape(-1,1),trainSampler)
        totValLoss,valKappa,valAcc=performanceMetrics(net,valLabels_tensor.numpy().reshape(-1,1),valSampler)
        
        #save and print performances
        performanceMeasures.append(np.array([totTrainLoss, totValLoss, trainKappa, valKappa, trainAcc, valAcc]))
        print(np.round([iEpoch, totTrainLoss, totValLoss, trainKappa, valKappa, trainAcc, valAcc],decimals=3))

The function looks like this:

def performanceMetrics(net,labels,dataLoader):
    with torch.no_grad():
        net.eval()
        totLoss=0
        epochPreds=labels.reshape(-1,1)*0
        for xtemp,ytemp,dataIdx in dataLoader:
            xbatch=xtemp.to(cuda)
            ybatch=ytemp.to(cuda)
       
            y_pred = net(xbatch) #one-hot encoded
            totLoss+=loss_fn_val(y_pred,ybatch).cpu().numpy()
    
            a,b=torch.max(y_pred.cpu(),1) #b is 0-4
            epochPreds[dataIdx]=torch.unsqueeze(b,1)
                            
          
        totLoss/=labels.size 
        kappa=cohen_kappa_score(epochPreds,labels)
        acc=np.mean(epochPreds==labels)
        
    return totLoss,kappa,acc

This solution works:

(inside the lightning module):

def epochMetrics(self,epochOutputs):
    epochPreds=[]
    trueLabels=[]
    totLoss=0
    for out in epochOutputs:
        epochPreds=np.append(epochPreds,out['pred_labels'].cpu())
        trueLabels=np.append(trueLabels,out['labels'].cpu())
        totLoss+=out['loss'].cpu()
    
    totLoss/=trueLabels.size 
    kappa=np.around(cohen_kappa_score(epochPreds,trueLabels),4)
    acc=np.mean(epochPreds==trueLabels)
    
    return totLoss,kappa,acc

def training_step(self, batch, batch_idx):
    xtemp, ytemp, idx=batch            
    y_pred = self(xtemp)
    loss = F.cross_entropy(y_pred,ytemp)
    
    a,pred_labels=torch.max(y_pred,1) #b is 0-4
    return {'loss':loss,'pred_labels':pred_labels,'labels':ytemp,'idx':idx }

def training_epoch_end(self, training_step_outputs):
    totLoss,kappa,acc=self.epochMetrics(training_step_outputs)
    self.print('training outputs:',totLoss,kappa,acc)
    self.log('trainLoss',totLoss)
    self.log('trainKappa',kappa)
    self.log('trainAcc',acc)
   
def validation_step(self, batch, batch_idx):
    xtemp, ytemp, idx=batch            
    y_pred = self(xtemp)
    loss = F.cross_entropy(y_pred,ytemp)
    a,pred_labels=torch.max(y_pred.cpu(),1) #b is 0-4
    return {'loss':loss,'pred_labels':pred_labels,'labels':ytemp,'idx':idx }

def validation_epoch_end(self, validation_step_outputs):
    totLoss,kappa,acc=self.epochMetrics(validation_step_outputs)
    self.print('validation outputs:',totLoss,kappa,acc)
    
    self.log('valLoss',totLoss)
    self.log('valKappa',kappa)
    self.log('valAcc',acc)