Source code for inFairness.distances.sensitive_subspace_dist

import numpy as np
import torch
from sklearn.decomposition import TruncatedSVD
from typing import List

from inFairness.distances.mahalanobis_distance import MahalanobisDistances
from inFairness.utils import datautils


[docs] class SensitiveSubspaceDistance(MahalanobisDistances): """Implements Sensitive Subspace metric base class that accepts the basis vectors of a sensitive subspace, and computes a projection that ignores the sensitive subspace. The projection from the sensitive subspace basis vectors (A) is computed as: .. math:: P^{'} = I - (A (A A^{T})^{-1} A^{T}) """ def __init__(self): super().__init__()
[docs] def fit(self, basis_vectors): """Fit Sensitive Subspace Distance metric Parameters -------------- basis_vectors: torch.Tensor Basis vectors of the sensitive subspace """ sigma = self.compute_projection_complement(basis_vectors) super().fit(sigma)
[docs] def compute_projection_complement(self, basis_vectors): """Compute the projection complement of the space defined by the basis_vectors: projection complement given basis vectors (A) is computed as: .. math:: P^{'} = I - (A (A A^{T})^{-1} A^{T}) Parameters ------------- basis_vectors: torch.Tensor Basis vectors of the sensitive subspace Dimension (d, k) where d is the data features dimension and k is the number of protected dimensions Returns ---------- projection complement: torch.Tensor Projection complement computed as described above. Shape (d, d) where d is the data feature dimension """ # Computing the orthogonal projection # V(V V^T)^{-1} V^T projection = torch.linalg.inv(torch.matmul(basis_vectors.T, basis_vectors)) projection = torch.matmul(basis_vectors, projection) # Shape: (n_features, n_features) projection = torch.matmul(projection, basis_vectors.T) # Complement the projection as: (I - Proj) projection_complement_ = torch.eye(projection.shape[0]) - projection projection_complement_ = projection_complement_.detach() return projection_complement_
[docs] class SVDSensitiveSubspaceDistance(SensitiveSubspaceDistance): """Sensitive Subspace metric that uses SVD to find the basis vectors of the sensitive subspace. The metric learns a subspace from a set of user-curated comparable data samples. Proposed in Section B.2 of Training individually fair ML models with sensitive subspace robustness References ------------- `Yurochkin, Mikhail, Amanda Bower, and Yuekai Sun. "Training individually fair ML models with sensitive subspace robustness." arXiv preprint arXiv:1907.00020 (2019).` """ def __init__(self): super().__init__() self.n_components_ = None
[docs] def fit(self, X_train, n_components, autoinfer_device=True): """Fit SVD Sensitive Subspace distance metric parameters Parameters ------------- X_train: torch.Tensor | List[torch.Tensor] Training data containing comparable data samples. If only one set of comparable data samples is provided, the input should be a torch.Tensor of shape :math:`(N, D)`. For multiple sets of comparable data samples a list of shape :math:`[ (N_1, D), \\cdots, (N_x, D)]` can be provided. n_components: int Desired number of latent variable dimensions autoinfer_device: bool Should the distance metric be automatically moved to an appropriate device (CPU / GPU) or not? If set to True, it moves the metric to the same device `X_train` is on. If set to False, keeps the metric on CPU. """ self.n_components_ = n_components basis_vectors = self.compute_basis_vectors(X_train, n_components) super().fit(basis_vectors) if autoinfer_device: device = datautils.get_device(X_train) super().to(device)
def __process_input_data__(self, X_train): """Process metric training data to convert from tensor to numpy and remove the mean and concatenate if multiple sets of training data is provided """ if isinstance(X_train, torch.Tensor) or isinstance(X_train, np.ndarray): X_train = datautils.convert_tensor_to_numpy(X_train) return X_train if isinstance(X_train, list): X_train = [datautils.convert_tensor_to_numpy(X) for X in X_train] # Subtract mean and concatenate all sets of features X_norm = np.vstack([X - np.mean(X, axis=0) for X in X_train]) return X_norm raise TypeError( "Provided data `X_train` should either be Tensor, np.ndarray or a list of these." )
[docs] def compute_basis_vectors(self, X_train, n_components): """Compute basis vectors using SVD""" X_train = self.__process_input_data__(X_train) tSVD = TruncatedSVD(n_components=n_components) tSVD.fit(X_train) basis_vectors_ = tSVD.components_.T # Shape: (n_features, n_components) basis_vectors_ = torch.Tensor(basis_vectors_) return basis_vectors_