Source code for qbiocode.learning.compute_pqk

# ====== Base class imports ======
import time
import numpy as np
import os

# ====== Additional local imports ======
from qbiocode.evaluation.model_evaluation import modeleval
import qbiocode.utils.qutils as qutils
from sklearn.model_selection import GridSearchCV

# ====== Qiskit imports ======
from qiskit import QuantumCircuit

#from qiskit.primitives import Sampler
from functools import reduce
from qiskit.quantum_info import Pauli
from sklearn import svm


[docs]
def compute_pqk(X_train, X_test, y_train, y_test, args, model='PQK', data_key = '', verbose=False,
                 encoding = 'Z', primitive = 'estimator', entanglement = 'linear', reps= 2):
    """
    This function generates quantum circuits, computes projections of the data onto these circuits,
    and evaluates the performance of a Support Vector Classifier (SVC) on the projected data.
    It uses a feature map to encode the data into quantum states and then measures the expectation values
    of Pauli operators to obtain the features. The SVC is trained on the projected training data and
    evaluated on the projected test data. The function returns evaluation metrics and model parameters.
    This function requires a quantum backend (simulator or real quantum hardware) for execution.
    It supports various configurations such as encoding methods, entanglement strategies, and repetitions
    of the feature map. The results are saved to files for training and test projections, which are reused
    if they already exist to avoid redundant computations.
    This function is part of the main quantum machine learning pipeline (QProfiler.py) and is intended for use in supervised learning tasks.
    It leverages quantum computing to enhance feature extraction and classification performance on complex datasets.
    The function returns the performance results, including accuracy, F1-score, AUC, runtime, as well as model parameters, and other relevant metrics.
    
    Args:
        X_train (np.ndarray): Training data features.
        X_test (np.ndarray): Test data features.
        y_train (np.ndarray): Training data labels.
        y_test (np.ndarray): Test data labels.
        args (dict): Arguments containing backend and other configurations.
        model (str): Model type, default is 'PQK'.
        data_key (str): Key for the dataset, default is ''.
        verbose (bool): If True, print additional information, default is False.
        encoding (str): Encoding method for the quantum circuit, default is 'Z'.
        primitive (str): Primitive type to use, default is 'estimator'.
        entanglement (str): Entanglement strategy, default is 'linear'.
        reps (int): Number of repetitions for the feature map, default is 2.

    Returns:
        modeleval (dict): A dictionary containing evaluation metrics and model parameters.
    """

    beg_time = time.time()
    feat_dimension = X_train.shape[1]

    if not os.path.exists( 'pqk_projections'):
        os.makedirs('pqk_projections')

    file_projection_train = os.path.join( 'pqk_projections', 'pqk_projection_' + data_key + '_train.npy')
    file_projection_test = os.path.join( 'pqk_projections', 'pqk_projection_' + data_key + '_test.npy')

    
    #  This function ensures that all multiplicative factors of data features inside single qubit gates are 1.0
    def data_map_func(x: np.ndarray) -> float:
        """
        Define a function map from R^n to R.

        Args:
            x: data

        Returns:
            float: the mapped value
        """
        coeff = x[0] / 2 if len(x) == 1 else reduce(lambda m, n: (m * n) / 2, x)
        return coeff
    
     # choose a method for mapping your features onto the circuit
    feature_map, _ = qutils.get_feature_map(feature_map=encoding,
                                         feat_dimension=X_train.shape[1], 
                                         reps = reps,
                                         entanglement=entanglement,
                                         data_map_func = data_map_func)

    # Build quantum circuit
    circuit = QuantumCircuit(feature_map.num_qubits)
    circuit.compose(feature_map, inplace=True)
    num_qubits = circuit.num_qubits

    if (not os.path.exists( file_projection_train ) ) | (not os.path.exists( file_projection_test ) ):

        #  Generate the backend, session and primitive
        backend, session, prim = qutils.get_backend_session(args,
                                                                'estimator',
                                                                num_qubits=num_qubits)

        # Transpile
        if args['backend'] != 'simulator':
            circuit = qutils.transpile_circuit( circuit, opt_level=3, backend = backend, 
                                            PT = True, initial_layout = None)
        

        for f_tr in [file_projection_train, file_projection_test]:
            if not os.path.exists( f_tr ):
                projections = []
                if 'train' in f_tr:
                    dat = X_train.copy()
                else:
                    dat = X_test.copy()
                
                # Identity operator on all qubits
                id = 'I' * feat_dimension

                # We group all commuting observables
                # These groups are the Pauli X, Y and Z operators on individual qubits
                # Apply the circuit layout to the observable if mapped to device
                if args['backend'] != 'simulator':
                    observables_x =[]
                    observables_y =[]
                    observables_z =[]
                    for i in range(feat_dimension):
                        observables_x.append( Pauli(id[:i] + 'X' + id[(i + 1):]).apply_layout(circuit.layout, num_qubits=backend.num_qubits) )
                        observables_y.append( Pauli(id[:i] + 'Y' + id[(i + 1):]).apply_layout(circuit.layout, num_qubits=backend.num_qubits) )
                        observables_z.append( Pauli(id[:i] + 'Z' + id[(i + 1):]).apply_layout(circuit.layout, num_qubits=backend.num_qubits) )
                else:
                    observables_x = [Pauli(id[:i] + 'X' + id[(i + 1):]) for i in range(feat_dimension)]
                    observables_y = [Pauli(id[:i] + 'Y' + id[(i + 1):]) for i in range(feat_dimension)]
                    observables_z = [Pauli(id[:i] + 'Z' + id[(i + 1):]) for i in range(feat_dimension)]
                    
                                                            
                # projections[i][j][k] will be the expectation value of the j-th Pauli operator (0: X, 1: Y, 2: Z)
                # of datapoint i on qubit k
                projections = []

                for i in range(len(dat)):
                    if i % 100 == 0:
                        print('at datapoint {}'.format(i))

                    # Get training sample 
                    parameters = dat[i]

                    # We define the primitive unified blocs (PUBs) consisting of the embedding circuit, 
                    # set of observables and the circuit parameters
                    pub_x = (circuit, observables_x, parameters)
                    pub_y = (circuit, observables_y, parameters)
                    pub_z = (circuit, observables_z, parameters)

                    job = prim.run([pub_x, pub_y, pub_z])
                    job_result_x = job.result()[0].data.evs
                    job_result_y = job.result()[1].data.evs
                    job_result_z = job.result()[2].data.evs

                    # Record <X>, <Y> and <Z> on all qubits for the current datapoint
                    projections.append([job_result_x, job_result_y, job_result_z])                                    
                np.save( f_tr, projections )

        if not isinstance(session, type(None)):
            session.close()

    # Load computed projections
    projections_train = np.load( file_projection_train )
    projections_train = np.array(projections_train).reshape(len(projections_train), -1)
    projections_test = np.load( file_projection_test )
    projections_test = np.array(projections_test).reshape(len(projections_test), -1)
    
    # Run SVC
    gridsearch_svc_args= {'C': [0.1, 1, 10, 100], 
                        'gamma': [0.001, 0.01, 0.1, 1],
                        'kernel': ['linear', 'rbf', 'poly','sigmoid']
                        }
    

    svc = svm.SVC(random_state=args['seed'])

    # Initialize GridSearchCV
    svc_random = GridSearchCV(estimator=svc, 
                                param_grid=gridsearch_svc_args, 
                                cv=5, 
                                n_jobs=-1)


    svc_random.fit(projections_train, y_train)
    y_predicted = svc_random.predict(projections_test)

    hyperparameters = {
                        'feature_map': feature_map.__class__.__name__,
                        'feature_map_reps': reps,
                        'entanglement' : entanglement,                        
                        'svc_best_params': svc_random.best_params_
                        # Add other hyperparameters as needed
                        }
    model_params = hyperparameters
    
    return(modeleval(y_test, y_predicted, beg_time, model_params, args, model=model, verbose=verbose))