MachineIntelligenceCore:Algorithms
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
RawTextImporter.cpp
Go to the documentation of this file.
1 
24 
25 #include <fstream>
26 
27 namespace mic {
28 namespace importers {
29 
30 RawTextImporter::RawTextImporter(std::string node_name_) : Importer (node_name_),
31  data_filename("data_filename","data_filename")
32 {
33  // Register properties - so their values can be overridden (read from the configuration file).
34  registerProperty(data_filename);
35 }
36 
37 void RawTextImporter::setDataFilename(std::string data_filename_) {
38  data_filename = data_filename_;
39 }
40 
41 
43  char character;
44  // Open file.
45  std::ifstream data_file(data_filename, std::ios::in | std::ios::binary);
46 
47  LOG(LSTATUS) << "Importing raw data from file: " << data_filename;
48 
49  // Check if file is open.
50  if (data_file.is_open()) {
51  // Iterate through the characters.
52  while(!data_file.eof()) {
53  // Read the character.
54  data_file.get(character);
55  LOG(LDEBUG) << character;
56 
57  // Add character to both data and labels.
58  sample_data.push_back(std::make_shared <char> (character) );
59  sample_labels.push_back(std::make_shared <char> (character) );
60 
61  }//: while ! eof
62 
63  // Close the file.
64  data_file.close();
65 
66  } else {
67  LOG(LFATAL) << "Oops! Couldn't find file: " << data_filename;
68  return false;
69  }//: else
70 
71  LOG(LINFO) << "Imported " << sample_data.size() << " characters";
72 
73  // Fill the indices table(!)
74  for (size_t i=0; i < sample_data.size(); i++ )
75  sample_indices.push_back(i);
76 
77  // Count the classes.
78  countClasses();
79 
80  LOG(LINFO) << "Data import finished";
81 
82  return true;
83 }
84 
85 } /* namespace importers */
86 } /* namespace mic */
std::vector< size_t > sample_indices
Stores sample indices (sample "positions" in original dataset).
Definition: Batch.hpp:460
void setDataFilename(std::string data_filename_)
Parent class for all data importers.
Definition: Importer.hpp:51
std::vector< std::shared_ptr< char > > sample_data
Stores the data.
Definition: Batch.hpp:454
mic::configuration::Property< std::string > data_filename
RawTextImporter(std::string node_name_="raw_text_importer")
std::vector< std::shared_ptr< char > > sample_labels
Stores labels.
Definition: Batch.hpp:457