MachineIntelligenceCore:Algorithms
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
CIFARImporter.hpp
Go to the documentation of this file.
1 
25 #ifndef CIFARIMPORTER_HPP_
26 #define CIFARIMPORTER_HPP_
27 
28 
29 #include <importers/Importer.hpp>
30 #include <types/TensorTypes.hpp>
31 
32 namespace mic {
33 namespace importers {
34 
40 template <typename eT>
41 class CIFARImporter: public mic::importers::Importer< mic::types::Tensor<eT>, unsigned int > {
42 public:
50  CIFARImporter(std::string node_name_ = "cifar_importer", std::string data_filename_ = "", int min_sample_ = -1, int max_sample_ = -1)
51  : Importer< mic::types::Tensor<eT>, unsigned int >::Importer (node_name_),
52  data_filename("data_filename",data_filename_),
53  min_sample("min_sample", min_sample_),
54  max_sample("max_sample", max_sample_)
55  {
56  // Register properties - so their values can be overridden (read from the configuration file).
57  registerProperty(data_filename);
58  registerProperty(min_sample);
59  registerProperty(max_sample);
60 
61  // Set image properties.
62  image_height = 32;
63  image_width = 32;
64  image_depth = 3;
65  }
66 
67 
68 
72  virtual ~CIFARImporter() { };
73 
78  void setDataFilename(std::string data_filename_) {
79  data_filename = data_filename_;
80  }
81 
86  bool importData() {
87  // Split filename using a semicolon (;) separator.
88  std::vector<std::string> names_array;
89  std::size_t pos = 0, found;
90  while((found = std::string(data_filename).find_first_of(';', pos)) != std::string::npos) {
91  // Get filename from pos to found.
92  names_array.push_back(std::string(data_filename).substr(pos, found - pos));
93  // Move marker.
94  pos = found+1;
95  }//: white
96  // Add last name.
97  names_array.push_back(std::string(data_filename).substr(pos));
98 
99  // Buffer.
100  char buffer[image_height*image_width*image_depth];
101 
102  // Read data from files.
103  for (size_t fi = 0; fi < names_array.size(); ++fi) {
104 
105  // Try to open file with labels.
106  LOG(LSTATUS) << "Opening file containing CIFAR file: " << names_array[fi];
107  std::ifstream cifar_file(names_array[fi], std::ios::in | std::ios::binary);
108  if (!cifar_file.is_open()) {
109  LOG(LFATAL) << "Oops! Couldn't find file: " << names_array[fi];
110  return false;
111  }//: else
112 
113  size_t sample = 0;
114 
115  // Import data from file.
116  while(true) {
117 
118  // <1 x label><3072 x pixel>
119  // * the first byte is the label of the first image, which is a number in the range 0-9.
120  // The next 3072 bytes are the values of the pixels of the image.
121  // The first 1024 bytes are the red channel values, the next 1024 the green, and the final 1024 the blue.
122  // The values are stored in row-major order, so the first 32 bytes are the red channel values of the first row of the image.
123 
124  // Try to read the label.
125  cifar_file.read(buffer, 1);
126  // If reached the EOF.
127  if (cifar_file.eof())
128  break;
129  // Else: get the label.
130  unsigned int temp_label = (unsigned int)buffer[0];
131 
132  // Got the image and label.
133  LOG(LDEBUG) << "Loading sample: " << sample;
134 
135  // Try to read the image into buffer.
136  cifar_file.read(buffer, image_width*image_height*image_depth);
137  // If reached the EOF.
138  if (cifar_file.eof())
139  break;
140  // Else: get image.
141 
142  // Ok, the sample was loaded. Now check whether we should add it to image.
143  sample++;
144  if ((min_sample > 0) && (sample < (size_t)min_sample))
145  continue;
146 
147  // Create new tensor of CIFAR image size.
149  eT* data = ptr->data();
150 
151  // Copy image.
152  for (size_t id = 0; id < image_depth; ++id) {
153  for (size_t ih = 0; ih < image_height; ++ih) {
154  for (size_t iw = 0; iw < image_width; ++iw) {
155  // Calculate indices.
156  // BI - row-major.
157  size_t bi = id*image_height*image_width + ih*image_width + iw;
158  // DI - col-major.
159  //size_t di = id*image_height*image_width + ih + iw*image_height;
160 
161  // Copy value.
162  data[bi] = (eT)((uint8_t)buffer[bi])/255.0f;
163  }
164  }//: width
165  }//: depth
166  /*for (size_t i = 0; i < (size_t)(image_width*image_height*image_depth); i++) {
167  data[i] = (eT)((uint8_t)buffer[i])/255.0f;
168  }//: for*/
169 
170  sample_data.push_back(ptr);
171  sample_labels.push_back(std::make_shared <unsigned int> (temp_label) );
172 
173  // Check limit.
174  if ((max_sample > 0) && (sample >= (size_t)max_sample))
175  break;
176  }//: while !eof
177 
178  LOG(LINFO) << "Imported " << sample_data.size() << " samples";
179 
180  // Close files
181  cifar_file.close();
182 
183  }//: for files.
184 
185  // Fill the indices table(!)
186  for (size_t i=0; i < sample_data.size(); i++ )
187  sample_indices.push_back(i);
188 
189  // Count (and set) number of classes.
190  countClasses();
191 
192  LOG(LINFO) << "Data import finished";
193  return true;
194  }
195 
200 
201 protected:
202  // Unhide the fields inherited from the template class Layer via "using" statement.
203  using Importer< mic::types::Tensor<eT>, unsigned int >::registerProperty;
204  using Importer< mic::types::Tensor<eT>, unsigned int >::sample_data;
209 
210 private:
214  size_t image_height;
215 
219  size_t image_width;
220 
224  size_t image_depth;
225 
226 
230  mic::configuration::Property<std::string> data_filename;
231 
235  mic::configuration::Property<int> min_sample;
236 
240  mic::configuration::Property<int> max_sample;
241 
242 };
243 
244 
245 } /* namespace importers */
246 } /* namespace mic */
247 
248 
249 
250 #endif /* CIFARIMPORTER_HPP_ */
CIFARImporter(std::string node_name_="cifar_importer", std::string data_filename_="", int min_sample_=-1, int max_sample_=-1)
std::vector< std::shared_ptr< mic::types::Tensor< eT > > > & data()
Returns sample data.
Definition: Batch.hpp:98
mic::configuration::Property< int > max_sample
std::vector< size_t > sample_indices
Stores sample indices (sample "positions" in original dataset).
Definition: Batch.hpp:460
mic::configuration::Property< std::string > data_filename
Class responsible for importing CIFAR images. Returns a batch of Tensors.
virtual void initializePropertyDependentVariables()
Parent class for all data importers.
Definition: Importer.hpp:51
std::vector< std::shared_ptr< mic::types::Tensor< eT > > > sample_data
Stores the data.
Definition: Batch.hpp:454
#define MAKE_TENSOR_PTR(eT,...)
Macro for initialization of tensor pointer.
Definition: TensorTypes.hpp:45
mic::configuration::Property< int > min_sample
Contains declaration (and definition) of base template class of all data importers.
typename std::shared_ptr< mic::types::Tensor< eT > > TensorPtr
Typedef for a shared pointer to template-typed dynamic matrices.
Definition: TensorTypes.hpp:39
std::vector< std::shared_ptr< unsigned int > > sample_labels
Stores labels.
Definition: Batch.hpp:457
void setDataFilename(std::string data_filename_)
Contains declaration of tensor types.