MachineIntelligenceCore:Algorithms
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
MNISTPatchImporter.cpp
Go to the documentation of this file.
1 
24 #include <logger/Log.hpp>
25 
26 
27 #include <fstream>
28 
29 namespace mic {
30 namespace importers {
31 
32 MNISTPatchImporter::MNISTPatchImporter(std::string node_name_) : Importer (node_name_),
33  data_filename("data_filename","images-idx3-ubyte"),
34  labels_filename("labels_filename", "labels-idx1-ubyte"),
35  patch_size("patch_size", 5),
36  samples_limit("samples_limit",-1)
37 {
38  // Register properties - so their values can be overridden (read from the configuration file).
39  registerProperty(data_filename);
40  registerProperty(labels_filename);
41  registerProperty(patch_size);
42  registerProperty(samples_limit);
43  // Set image properties.
44  image_width = 28;
45  image_height = 28;
46 }
47 
49 
50  char buffer[28*28];
51  int label_offset_bytes = 8;
52  int data_offset_bytes = 16;
53  size_t sample = 0;
54 
55  // Limit patch size.
56  if (patch_size < 1)
57  patch_size = 1;
58  else if (patch_size >= 28)
59  patch_size = 28;
60 
61  // Try to open file with labels.
62  LOG(LSTATUS) << "Opening file containing MNIST labels: " << labels_filename;
63  std::ifstream labels_file(labels_filename, std::ios::in | std::ios::binary);
64  if (!labels_file.is_open()) {
65  LOG(LFATAL) << "Oops! Couldn't find file: " << labels_filename;
66  return false;
67  }//: else
68 
69  // Read file containing images (binary format).
70  LOG(LSTATUS) << "Opening file containing MNIST images: " << data_filename;
71  std::ifstream data_file(data_filename, std::ios::in | std::ios::binary);
72  if (!data_file.is_open()) {
73  LOG(LFATAL) << "Oops! Couldn't find file: " << data_filename;
74  return false;
75  }
76 
77  // Label and image files ok - import patches.
78  LOG(LSTATUS) << "Importing MNIST patches of size " << patch_size << " by " << patch_size << ". This might take a while...";
79 
80  // Skip label header.
81  labels_file.seekg (label_offset_bytes, std::ios::beg);
82  // Skip data header.
83  data_file.seekg (data_offset_bytes , std::ios::beg);
84 
85  // Import loop.
86  while(true) {
87  // Try to read the label.
88  labels_file.read(buffer, 1);
89  // If reached the EOF.
90  if (labels_file.eof())
91  break;
92  // Else: get the label.
93  unsigned int temp_label = (unsigned int)buffer[0];
94 
95  // Try to read the image into buffer.
96  data_file.read(buffer, image_width*image_height);
97  // If reached the EOF.
98  if (data_file.eof())
99  break;
100  // Else: get image.
101 
102  // Create new matrix of MNIST image size.
103  mic::types::MatrixXf image(image_height, image_width);
104 
105  // Parse and set image data.
106  for (size_t i = 0; i < (size_t)(image_width*image_height); i++) {
107  unsigned row = i / image_width;
108  unsigned col = i % image_height;
109  image(row, col) = (float)((uint8_t)buffer[i])/255.0f;
110  }//: for
111 
112  // Got the image and label - now add the patches...
113  LOG(LDEBUG) << "Loading MNIST sample: " << sample;
114 
115  // Iterate through the image.
116  for (size_t yi=0; (yi+patch_size) <= image_height; yi++)
117  for (size_t xi=0; (xi +patch_size) <= image_width; xi++) {
118  // Create a new matrix.
119  mic::types::MatrixXfPtr patch (new mic::types::MatrixXf(patch_size, patch_size));
120 
121  // Iterate through the patch.
122  for (size_t yp=0; yp < patch_size; yp++)
123  for (size_t xp=0; xp < patch_size; xp++) {
124  // Parse and set image data.
125  (*patch)(yp, xp) = image(yi+yp, xi+xp);
126  }//: for patch
127 
128  sample_data.push_back(patch);
129  sample_labels.push_back(std::make_shared <unsigned int> (temp_label) );
130  }//: for image
131 
132  sample++;
133  // Check limit.
134  if ((samples_limit > 0) && (sample >= (size_t)samples_limit))
135  break;
136  }//: while !eof
137 
138  LOG(LINFO) << "Imported " << sample_labels.size() << " patches";
139 
140  // Close files
141  labels_file.close();
142  data_file.close();
143 
144  // Fill the indices table(!)
145  for (size_t i=0; i < sample_data.size(); i++ )
146  sample_indices.push_back(i);
147 
148  // Count the classes.
149  //countClasses();
150  number_of_classes = 10;
151 
152 
153  LOG(LINFO) << "Data import finished";
154  return true;
155 }
156 
157 } /* namespace importers */
158 } /* namespace mic */
mic::configuration::Property< std::string > labels_filename
mic::configuration::Property< int > samples_limit
MNISTPatchImporter(std::string node_name_="mnist_patch_importer")
std::shared_ptr< mic::types::MatrixXf > MatrixXfPtr
Shared pointer to matrix with single precision floats (of dynamic size).
std::vector< size_t > sample_indices
Stores sample indices (sample "positions" in original dataset).
Definition: Batch.hpp:460
mic::configuration::Property< std::string > data_filename
mic::configuration::Property< size_t > patch_size
Parent class for all data importers.
Definition: Importer.hpp:51
std::vector< std::shared_ptr< mic::types::MatrixXf > > sample_data
Stores the data.
Definition: Batch.hpp:454
std::vector< std::shared_ptr< unsigned int > > sample_labels
Stores labels.
Definition: Batch.hpp:457
Template-typed Matrix of dynamic size. Uses OpenBLAS if found by CMAKE - overloaded, specializations of * operator for types: float, double.
Definition: Matrix.hpp:64