Source code for qbiocode.utils.data_encoding

"""
Quantum Data Encoding Utilities
================================

This module provides utility functions for encoding classical data into
quantum states, including normalization, label encoding, and training
set preparation for quantum machine learning algorithms.

These functions are generic and can be used across different quantum
algorithms, not just ensemble learning.
"""

import numpy as np
from typing import List, Tuple



[docs]
def normalize_data(x: np.ndarray, C: float = 1.0) -> List[complex]:
    """
    Normalize data vector for quantum state encoding.
    
    Normalizes a classical data vector to unit L2 norm and converts to
    complex amplitudes suitable for quantum state initialization.
    
    Parameters
    ----------
    x : np.ndarray
        Classical data vector to normalize
    C : float, optional
        Scaling constant (default: 1.0)
    
    Returns
    -------
    List[complex]
        Normalized vector as list of complex numbers
    
    Examples
    --------
    >>> x = np.array([3.0, 4.0])
    >>> x_norm = normalize_data(x)
    >>> print([abs(xi) for xi in x_norm])
    [0.6, 0.8]
    >>> print(sum([abs(xi)**2 for xi in x_norm]))
    1.0
    """
    M = np.sum(x**2)
    x_normed = [complex(i / np.sqrt(M * C), 0) for i in x]
    return x_normed




[docs]
def label_to_array(y: np.ndarray) -> np.ndarray:
    """
    Convert binary labels to one-hot encoded arrays.
    
    Transforms binary classification labels (0 or 1) into one-hot encoded
    format required by quantum circuits. Label 0 becomes [1, 0] and label
    1 becomes [0, 1].
    
    Parameters
    ----------
    y : np.ndarray
        Binary labels (0 or 1)
    
    Returns
    -------
    np.ndarray
        One-hot encoded labels, shape (n_samples, 2)
    
    Examples
    --------
    >>> y = np.array([0, 1, 0])
    >>> label_to_array(y)
    array([[1, 0],
           [0, 1],
           [1, 0]])
    """
    Y = []
    for el in y:
        if el == 0:
            Y.append([1, 0])
        else:
            Y.append([0, 1])
    return np.asarray(Y)




[docs]
def prepare_training_set(X: np.ndarray, y: np.ndarray, 
                        n: int = 4, seed: int = 123) -> Tuple[np.ndarray, np.ndarray]:
    """
    Select and prepare balanced training subset for quantum ensemble.
    
    Creates a balanced training set by selecting equal numbers of samples
    from each class and normalizing them for quantum encoding.
    
    Parameters
    ----------
    X : np.ndarray, shape (n_samples, n_features)
        Training feature data
    y : np.ndarray, shape (n_samples,)
        Training labels (binary: 0 or 1)
    n : int, optional
        Total number of training samples to select (must be even, default: 4)
    seed : int, optional
        Random seed for reproducibility (default: 123)
    
    Returns
    -------
    Tuple[np.ndarray, np.ndarray]
        X_data : Normalized training samples, shape (n, n_features)
        Y_data : One-hot encoded labels, shape (n, 2)
    
    Examples
    --------
    >>> X = np.random.rand(20, 4)
    >>> y = np.array([0]*10 + [1]*10)
    >>> X_data, Y_data = prepare_training_set(X, y, n=4, seed=42)
    >>> print(X_data.shape, Y_data.shape)
    (4, 4) (4, 2)
    """
    np.random.seed(seed)
    
    # Select balanced samples from each class
    ix_y1 = np.random.choice(np.where(y == 1)[0], int(n / 2), replace=False)
    ix_y0 = np.random.choice(np.where(y == 0)[0], int(n / 2), replace=False)
    
    X_selected = np.concatenate([X[ix_y1], X[ix_y0]])
    Y_data = label_to_array(np.concatenate([y[ix_y1], y[ix_y0]]))
    
    # Normalize each sample
    X_data = np.array([normalize_data(x) for x in X_selected])
    
    return X_data, Y_data