Source code for qbiocode.embeddings.embed

import numpy as np
import os


# ====== Embedding functions imports ======
from sklearn.decomposition import PCA 
from sklearn.decomposition import NMF
from sklearn.manifold import (
    Isomap,
    LocallyLinearEmbedding,
    SpectralEmbedding,
)
from umap import UMAP
from functools import reduce

# ====== Qiskit imports ======

from qiskit import QuantumCircuit
import qbiocode.utils.qutils as qutils
from qiskit.quantum_info import Pauli


[docs]
def pqk(X_train, X_test, args, store = False, data_key = '',  
                 encoding = 'Z', data_map=True, primitive = 'estimator', entanglement = 'linear', reps= 2):
    """
    This function generates quantum circuits, computes projections of the data onto these circuits.
    It uses a feature map to encode the data into quantum states and then measures the expectation values
    of Pauli operators to obtain the features.  
    This function requires a quantum backend (simulator or real quantum hardware) for execution.
    It supports various configurations such as encoding methods, entanglement strategies, and repetitions
    of the feature map. Optionally the results are saved to files for training and test projections.
   
    Args:
        X_train (np.ndarray): Training data features.
        X_test (np.ndarray): Test data features.
        args (dict): Arguments containing backend and other configurations.
        store (bool): If true projections are stored, using data_key as indefitier
        data_key (str): Key for the dataset, default is ''.
        encoding (str): Encoding method for the quantum circuit, default is 'Z'.
        data_map (bool): If true ensures that all multiplicative factors of data features inside single qubit gates are 1.0. Not applicable for Hejsemberg feature maps
        primitive (str): Primitive type to use, default is 'estimator'.
        entanglement (str): Entanglement strategy, default is 'linear'.
        reps (int): Number of repetitions for the feature map, default is 2.

    Returns:
        modeleval (dict): A dictionary containing evaluation metrics and model parameters.
    """

    feat_dimension = X_train.shape[1]

    if data_map: 
    #  This function ensures that all multiplicative factors of data features inside single qubit gates are 1.0
        def data_map_func(x: np.ndarray) -> float:
            """
            Define a function map from R^n to R.

            Args:
                x: data

            Returns:
                float: the mapped value
            """
            coeff = x[0] / 2 if len(x) == 1 else reduce(lambda m, n: (m * n) / 2, x)
            return coeff
    else:
        data_map_func = None
    
     # choose a method for mapping your features onto the circuit
    feature_map, _ = qutils.get_feature_map(feature_map=encoding,
                                         feat_dimension=X_train.shape[1], 
                                         reps = reps,
                                         entanglement=entanglement,
                                         data_map_func = data_map_func)

    # Build quantum circuit
    circuit = QuantumCircuit(feature_map.num_qubits)
    circuit.compose(feature_map, inplace=True)
    num_qubits = circuit.num_qubits

    
    #  Generate the backend, session and primitive
    backend, session, prim = qutils.get_backend_session(args,
                                                            'estimator',
                                                            num_qubits=num_qubits)

    # Transpile
    if args['backend'] != 'simulator':
        circuit = qutils.transpile_circuit( circuit, opt_level=3, backend = backend, 
                                        PT = True, initial_layout = None)

    for f_tr in ['train', 'test']:
        
        if 'train' in f_tr:
            dat = X_train.copy()
        else:
            dat = X_test.copy()
        
        # Identity operator on all qubits
        id = 'I' * feat_dimension

        # We group all commuting observables
        # These groups are the Pauli X, Y and Z operators on individual qubits
        # Apply the circuit layout to the observable if mapped to device
        if args['backend'] != 'simulator':
            observables_x =[]
            observables_y =[]
            observables_z =[]
            for i in range(feat_dimension):
                observables_x.append( Pauli(id[:i] + 'X' + id[(i + 1):]).apply_layout(circuit.layout, num_qubits=backend.num_qubits) )
                observables_y.append( Pauli(id[:i] + 'Y' + id[(i + 1):]).apply_layout(circuit.layout, num_qubits=backend.num_qubits) )
                observables_z.append( Pauli(id[:i] + 'Z' + id[(i + 1):]).apply_layout(circuit.layout, num_qubits=backend.num_qubits) )
        else:
            observables_x = [Pauli(id[:i] + 'X' + id[(i + 1):]) for i in range(feat_dimension)]
            observables_y = [Pauli(id[:i] + 'Y' + id[(i + 1):]) for i in range(feat_dimension)]
            observables_z = [Pauli(id[:i] + 'Z' + id[(i + 1):]) for i in range(feat_dimension)]
            
                                                    
        # projections[i][j][k] will be the expectation value of the j-th Pauli operator (0: X, 1: Y, 2: Z)
        # of datapoint i on qubit k
        projections = []

        for i in range(len(dat)):
          
            # Get training sample 
            parameters = dat[i]

            # We define the primitive unified blocs (PUBs) consisting of the embedding circuit, 
            # set of observables and the circuit parameters
            pub_x = (circuit, observables_x, parameters)
            pub_y = (circuit, observables_y, parameters)
            pub_z = (circuit, observables_z, parameters)

            job = prim.run([pub_x, pub_y, pub_z])
            job_result_x = job.result()[0].data.evs
            job_result_y = job.result()[1].data.evs
            job_result_z = job.result()[2].data.evs

            # Record <X>, <Y> and <Z> on all qubits for the current datapoint
            projections.append([job_result_x, job_result_y, job_result_z])                                    
        
        if store:
            if not os.path.exists( 'pqk_projections'):
                os.makedirs('pqk_projections')

            file_projection = os.path.join( 'pqk_projections', 'pqk_projection_' + data_key + '_'+f_tr+'.npy')
            
            np.save( file_projection, projections )

        if 'train' in f_tr:
            X_train_prj = np.array(projections.copy()).reshape(len(projections), -1)
        else:
            X_test_prj = np.array(projections.copy()).reshape(len(projections), -1)
             
    if not isinstance(session, type(None)):
        session.close()

    return X_train_prj, X_test_prj



[docs]
def get_embeddings(embedding: str, X_train, X_test, n_neighbors=30, n_components=None, method=None):

    """This function applies the specified embedding technique to the training and test datasets.

    Args:
        embedding (str): The embedding technique to use. Options are 'none', 'pca', 'nmf', 'lle', 'isomap', 'spectral', or 'umap'.
        X_train (array-like): The training dataset.
        X_test (array-like): The test dataset.
        n_neighbors (int, optional): Number of neighbors for certain embeddings. Defaults to 30.
        n_components (int, optional): Number of components for the embedding. If None, it defaults to the number of features in X_train.
        method (str, optional): Method for Locally Linear Embedding. Defaults to None.
        
    Returns:
        tuple: Transformed training and test datasets.
    """

    embedding = embedding.lower()    
    valid_modes = ['none', 'pca', 'lle', 'isomap', 'spectral', 'umap', 'nmf']
    if embedding not in valid_modes:
        raise ValueError(f"Invalid mode: {embedding}. Mode must be one of {valid_modes}")


    assert n_components <= X_train.shape[1], "number of components greater than number of feature in the dataset"
    if 'none' == embedding:
        return X_train, X_test
    else:
        embedding_model = None
        if 'pca' == embedding:
            embedding_model = PCA(
                                n_components=n_components)
        elif 'nmf' == embedding:
            embedding_model = NMF(
                                n_components=n_components)
        elif 'lle' == embedding:
            if method==None: 
                embedding_model = LocallyLinearEmbedding(
                                    n_neighbors=n_neighbors,
                                    n_components=n_components, 
                                    method='standard')   
            else: 
                embedding_model = LocallyLinearEmbedding(
                                    n_neighbors=n_neighbors,
                                    n_components=n_components, 
                                    method='modified')
        elif 'isomap' == embedding: 
            embedding_model = Isomap(
                                n_neighbors=n_neighbors,
                                n_components=n_components, 
                                )
        elif 'spectral' == embedding: 
            embedding_model = SpectralEmbedding(
                                n_components=n_components, 
                                eigen_solver="arpack")
        elif 'umap' == embedding: 
            embedding_model = UMAP(
                                n_neighbors=n_neighbors,
                                n_components=n_components, 
                                )

        X_train = embedding_model.fit_transform(X_train)
        X_test = embedding_model.transform(X_test)
    
    return X_train, X_test