Source code for inFairness.auditor.auditor

import torch
import numpy as np

from abc import ABCMeta
from scipy.stats import norm
from inFairness.utils.datautils import convert_tensor_to_numpy
from inFairness.auditor.datainterface import AuditorResponse



[docs]
class Auditor(metaclass=ABCMeta):
    """
    Abstract class for model auditors, e.g. Sensei or Sensr
    """

    def __init__(self):
        super(Auditor, self).__init__()


[docs]
    def generate_worst_case_examples(self, *args, **kwargs):
        """Generates worst-case example for the input data sample batch"""
        raise NotImplementedError(
            "Method `generate_worst_case_examples` not implemented."
        )



[docs]
    def compute_loss_ratio(self, X_audit, X_worst, Y_audit, network, loss_fn):
        """Compute the loss ratio of samples computed by solving gradient flow attack
        to original audit samples

        Parameters
        -------------
            X_audit: torch.Tensor
                Auditing samples. Shape (n_samples, n_features)
            Y_audit: torch.Tensor
                Labels of auditing samples. Shape: (n_samples)
            lambda_param: float
                Lambda weighting parameter as defined in the equation above

        Returns
        ---------
            loss_ratios: numpy.ndarray
                Ratio of loss for samples computed using gradient
                flow attack to original audit samples
        """

        with torch.no_grad():
            Y_pred_worst = network(X_worst)
            Y_pred_original = network(X_audit)

            loss_vals_adversarial = loss_fn(Y_pred_worst, Y_audit, reduction="none")
            loss_vals_original = loss_fn(Y_pred_original, Y_audit, reduction="none")

        loss_vals_adversarial = convert_tensor_to_numpy(loss_vals_adversarial)
        loss_vals_original = convert_tensor_to_numpy(loss_vals_original)

        loss_ratio = np.divide(loss_vals_adversarial, loss_vals_original)

        return loss_ratio



[docs]
    def compute_audit_result(self, loss_ratios, threshold=None, confidence=0.95):
        """Computes auditing statistics given loss ratios and user-specified
        acceptance threshold

        Parameters
        -------------
            loss_ratios: numpy.ndarray
                List of loss ratios between worst-case and normal data samples
            threshold: float. optional
                User-specified acceptance threshold value
                If a value is not specified, the procedure simply returns the mean
                and lower bound of loss ratio, leaving the detemination of models'
                fairness to the user.
                If a value is specified, the procedure also determines if the model
                is individually fair or not.
            confidence: float, optional
                Confidence value. Default = 0.95

        Returns
        ----------
            audit_result: AuditorResponse
                Data interface with auditing results and statistics
        """

        loss_ratios = loss_ratios[np.isfinite(loss_ratios)]

        lossratio_mean = np.mean(loss_ratios)
        lossratio_std = np.std(loss_ratios)
        N = loss_ratios.shape[0]

        z = norm.ppf(confidence)
        lower_bound = lossratio_mean - z * lossratio_std / np.sqrt(N)

        if threshold is None:
            response = AuditorResponse(
                lossratio_mean=lossratio_mean,
                lossratio_std=lossratio_std,
                lower_bound=lower_bound,
            )
        else:
            tval = (lossratio_mean - threshold) / lossratio_std
            tval *= np.sqrt(N)

            pval = 1 - norm.cdf(tval)
            is_model_fair = False if pval < (1 - confidence) else True

            response = AuditorResponse(
                lossratio_mean=lossratio_mean,
                lossratio_std=lossratio_std,
                lower_bound=lower_bound,
                threshold=threshold,
                pval=pval,
                confidence=confidence,
                is_model_fair=is_model_fair,
            )

        return response



[docs]
    def audit(self, *args, **kwargs):
        """Audit model for individual fairness"""
        raise NotImplementedError("Method not implemented")