Source code for inFairness.auditor.auditor

import torch
import numpy as np

from abc import ABCMeta
from scipy.stats import norm
from inFairness.utils.datautils import convert_tensor_to_numpy
from inFairness.auditor.datainterface import AuditorResponse


[docs] class Auditor(metaclass=ABCMeta): """ Abstract class for model auditors, e.g. Sensei or Sensr """ def __init__(self): super(Auditor, self).__init__()
[docs] def generate_worst_case_examples(self, *args, **kwargs): """Generates worst-case example for the input data sample batch""" raise NotImplementedError( "Method `generate_worst_case_examples` not implemented." )
[docs] def compute_loss_ratio(self, X_audit, X_worst, Y_audit, network, loss_fn): """Compute the loss ratio of samples computed by solving gradient flow attack to original audit samples Parameters ------------- X_audit: torch.Tensor Auditing samples. Shape (n_samples, n_features) Y_audit: torch.Tensor Labels of auditing samples. Shape: (n_samples) lambda_param: float Lambda weighting parameter as defined in the equation above Returns --------- loss_ratios: numpy.ndarray Ratio of loss for samples computed using gradient flow attack to original audit samples """ with torch.no_grad(): Y_pred_worst = network(X_worst) Y_pred_original = network(X_audit) loss_vals_adversarial = loss_fn(Y_pred_worst, Y_audit, reduction="none") loss_vals_original = loss_fn(Y_pred_original, Y_audit, reduction="none") loss_vals_adversarial = convert_tensor_to_numpy(loss_vals_adversarial) loss_vals_original = convert_tensor_to_numpy(loss_vals_original) loss_ratio = np.divide(loss_vals_adversarial, loss_vals_original) return loss_ratio
[docs] def compute_audit_result(self, loss_ratios, threshold=None, confidence=0.95): """Computes auditing statistics given loss ratios and user-specified acceptance threshold Parameters ------------- loss_ratios: numpy.ndarray List of loss ratios between worst-case and normal data samples threshold: float. optional User-specified acceptance threshold value If a value is not specified, the procedure simply returns the mean and lower bound of loss ratio, leaving the detemination of models' fairness to the user. If a value is specified, the procedure also determines if the model is individually fair or not. confidence: float, optional Confidence value. Default = 0.95 Returns ---------- audit_result: AuditorResponse Data interface with auditing results and statistics """ loss_ratios = loss_ratios[np.isfinite(loss_ratios)] lossratio_mean = np.mean(loss_ratios) lossratio_std = np.std(loss_ratios) N = loss_ratios.shape[0] z = norm.ppf(confidence) lower_bound = lossratio_mean - z * lossratio_std / np.sqrt(N) if threshold is None: response = AuditorResponse( lossratio_mean=lossratio_mean, lossratio_std=lossratio_std, lower_bound=lower_bound, ) else: tval = (lossratio_mean - threshold) / lossratio_std tval *= np.sqrt(N) pval = 1 - norm.cdf(tval) is_model_fair = False if pval < (1 - confidence) else True response = AuditorResponse( lossratio_mean=lossratio_mean, lossratio_std=lossratio_std, lower_bound=lower_bound, threshold=threshold, pval=pval, confidence=confidence, is_model_fair=is_model_fair, ) return response
[docs] def audit(self, *args, **kwargs): """Audit model for individual fairness""" raise NotImplementedError("Method not implemented")