24 #include <logger/Log.hpp>
33 data_filename(
"data_filename",
"images-idx3-ubyte"),
34 labels_filename(
"labels_filename",
"labels-idx1-ubyte"),
35 patch_size(
"patch_size", 5),
36 samples_limit(
"samples_limit",-1)
51 int label_offset_bytes = 8;
52 int data_offset_bytes = 16;
62 LOG(LSTATUS) <<
"Opening file containing MNIST labels: " <<
labels_filename;
63 std::ifstream labels_file(
labels_filename, std::ios::in | std::ios::binary);
64 if (!labels_file.is_open()) {
70 LOG(LSTATUS) <<
"Opening file containing MNIST images: " <<
data_filename;
71 std::ifstream data_file(
data_filename, std::ios::in | std::ios::binary);
72 if (!data_file.is_open()) {
78 LOG(LSTATUS) <<
"Importing MNIST patches of size " <<
patch_size <<
" by " <<
patch_size <<
". This might take a while...";
81 labels_file.seekg (label_offset_bytes, std::ios::beg);
83 data_file.seekg (data_offset_bytes , std::ios::beg);
88 labels_file.read(buffer, 1);
90 if (labels_file.eof())
93 unsigned int temp_label = (
unsigned int)buffer[0];
106 for (
size_t i = 0; i < (size_t)(
image_width*image_height); i++) {
109 image(row, col) = (float)((uint8_t)buffer[i])/255.0f;
113 LOG(LDEBUG) <<
"Loading MNIST sample: " << sample;
116 for (
size_t yi=0; (yi+
patch_size) <= image_height; yi++)
125 (*patch)(yp, xp) = image(yi+yp, xi+xp);
129 sample_labels.push_back(std::make_shared <unsigned int> (temp_label) );
138 LOG(LINFO) <<
"Imported " <<
sample_labels.size() <<
" patches";
153 LOG(LINFO) <<
"Data import finished";
mic::configuration::Property< std::string > labels_filename
mic::configuration::Property< int > samples_limit
MNISTPatchImporter(std::string node_name_="mnist_patch_importer")
std::shared_ptr< mic::types::MatrixXf > MatrixXfPtr
Shared pointer to matrix with single precision floats (of dynamic size).
std::vector< size_t > sample_indices
Stores sample indices (sample "positions" in original dataset).
mic::configuration::Property< std::string > data_filename
mic::configuration::Property< size_t > patch_size
Parent class for all data importers.
std::vector< std::shared_ptr< mic::types::MatrixXf > > sample_data
Stores the data.
std::vector< std::shared_ptr< unsigned int > > sample_labels
Stores labels.
Template-typed Matrix of dynamic size. Uses OpenBLAS if found by CMAKE - overloaded, specializations of * operator for types: float, double.