16 #include "instances.h"
43 set(new_instances_number);
55 set(instances_document);
90 if(
this != &other_instances)
132 if(splitting_method ==
"Sequential")
136 else if(splitting_method ==
"Random")
142 std::ostringstream buffer;
144 buffer <<
"OpenNN Exception: Instances class.\n"
145 <<
"static SplittingMethod get_splitting_method(const std::string&).\n"
146 <<
"Unknown splitting method: " << splitting_method <<
".\n";
148 throw std::logic_error(buffer.str());
180 for(
size_t i = 0; i < instances_number; i++)
182 uses[i] =
items[i].use;
201 for(
size_t i = 0; i < instances_number; i++)
203 if(uses[i] == Training)
205 uses_string[i] =
"Training";
207 else if(uses[i] == Generalization)
209 uses_string[i] =
"Generalization";
211 else if(uses[i] == Testing)
213 uses_string[i] =
"Testing";
215 else if(uses[i] == Unused)
217 uses_string[i] =
"Unused";
221 std::ostringstream buffer;
223 buffer <<
"OpenNN Exception Instances class.\n"
224 <<
"Vector<std::string> write_uses(void) const method.\n"
227 throw std::logic_error(buffer.str());
247 for(
size_t i = 0; i < instances_number; i++)
249 if(uses[i] == Training)
251 uses_string[i] =
"Train.";
253 else if(uses[i] == Generalization)
255 uses_string[i] =
"Gen.";
257 else if(uses[i] == Testing)
259 uses_string[i] =
"Test.";
261 else if(uses[i] == Unused)
263 uses_string[i] =
"Unused";
267 std::ostringstream buffer;
269 buffer <<
"OpenNN Exception Instances class.\n"
270 <<
"Vector<std::string> write_abbreviated_uses(void) const method.\n"
273 throw std::logic_error(buffer.str());
288 return(
items[i].use);
299 if(
items[i].use == Training)
303 else if(
items[i].use == Generalization)
305 return(
"Generalization");
307 else if(
items[i].use == Testing)
311 else if(
items[i].use == Unused)
317 std::ostringstream buffer;
319 buffer <<
"OpenNN Exception Instances class.\n"
320 <<
"std::string write_use(const size_t&) const method.\n"
323 throw std::logic_error(buffer.str());
335 if(
items[i].use == Unused)
355 size_t unused_instances_number = 0;
357 for(
size_t i = 0; i < instances_number; i++)
359 if(
items[i].use == Unused)
361 unused_instances_number++;
365 return(unused_instances_number);
379 return(instances_number - unused_instances_number);
391 size_t training_instances_number = 0;
393 for(
size_t i = 0; i < instances_number; i++)
395 if(
items[i].use == Training)
397 training_instances_number++;
401 return(training_instances_number);
413 size_t generalization_instances_number = 0;
415 for(
size_t i = 0; i < instances_number; i++)
417 if(
items[i].use == Generalization)
419 generalization_instances_number++;
423 return(generalization_instances_number);
435 size_t testing_instances_number = 0;
437 for(
size_t i = 0; i < instances_number; i++)
439 if(
items[i].use == Testing)
441 testing_instances_number++;
445 return(testing_instances_number);
461 for(
size_t i = 0; i < instances_number; i++)
463 if(
items[i].use == Training)
467 else if(
items[i].use == Generalization)
471 else if(
items[i].use == Testing)
499 for(
size_t i = 0; i < instances_number; i++)
501 if(
items[i].use == Training)
503 training_indices[count] = (size_t)i;
508 return(training_indices);
522 Vector<size_t> generalization_indices(generalization_instances_number);
526 for(
size_t i = 0; i < instances_number; i++)
528 if(
items[i].use == Generalization)
530 generalization_indices[count] = i;
535 return(generalization_indices);
553 for(
size_t i = 0; i < instances_number; i++)
555 if(
items[i].use == Testing)
557 testing_indices[count] = i;
562 return(testing_indices);
643 const size_t new_uses_size = new_uses.size();
645 if(new_uses_size != instances_number)
647 std::ostringstream buffer;
649 buffer <<
"OpenNN Exception: Instances class.\n"
650 <<
"void set_uses(const Vector<Use>&) method.\n"
651 <<
"Size of uses (" << new_uses_size <<
") must be equal to number of instances (" << instances_number <<
").\n";
653 throw std::logic_error(buffer.str());
658 for(
size_t i = 0; i < instances_number; i++)
660 items[i].use = new_uses[i];
676 std::ostringstream buffer;
682 const size_t new_uses_size = new_uses.size();
684 if(new_uses_size != instances_number)
686 buffer <<
"OpenNN Exception: Instances class.\n"
687 <<
"void set_uses(const Vector<std::string>&) method.\n"
688 <<
"Size of uses (" << new_uses_size <<
") must be equal to number of instances (" << instances_number <<
").\n";
690 throw std::logic_error(buffer.str());
695 for(
size_t i = 0; i < instances_number; i++)
697 if(new_uses[i] ==
"Unused")
699 items[i].use = Unused;
701 else if(new_uses[i] ==
"Training")
703 items[i].use = Training;
705 else if(new_uses[i] ==
"Generalization")
707 items[i].use = Generalization;
709 else if(new_uses[i] ==
"Testing")
711 items[i].use = Testing;
715 buffer <<
"OpenNN Exception Instances class.\n"
716 <<
"void set_uses(const Vector<std::string>&) method.\n"
719 throw std::logic_error(buffer.str());
733 items[i].use = new_use;
745 if(new_use ==
"Training")
747 items[i].use = Training;
749 else if(new_use ==
"Generalization")
751 items[i].use = Generalization;
753 else if(new_use ==
"Testing")
755 items[i].use = Testing;
757 else if(new_use ==
"Unused")
759 items[i].use = Unused;
763 std::ostringstream buffer;
765 buffer <<
"OpenNN Exception Instances class.\n"
766 <<
"void set_use(const std::string&) method.\n"
767 <<
"Unknown use: " << new_use <<
"\n";
769 throw std::logic_error(buffer.str());
782 for(
size_t i = 0; i < instances_number; i++)
784 items[i].use = Training;
797 for(
size_t i = 0; i < instances_number; i++)
799 items[i].use = Generalization;
812 for(
size_t i = 0; i < instances_number; i++)
814 items[i].use = Testing;
840 items.set(new_instances_number);
853 tinyxml2::XMLDocument* document =
new tinyxml2::XMLDocument;
855 std::ostringstream buffer;
859 tinyxml2::XMLElement* instances_element = document->NewElement(
"Instances");
861 document->InsertFirstChild(instances_element);
863 tinyxml2::XMLElement* element = NULL;
864 tinyxml2::XMLText* text = NULL;
870 element = document->NewElement(
"InstancesNumber");
871 instances_element->LinkEndChild(element);
874 buffer << instances_number;
876 text = document->NewText(buffer.str().c_str());
877 element->LinkEndChild(text);
880 for(
size_t i = 0; i < instances_number; i++)
882 element = document->NewElement(
"Item");
883 element->SetAttribute(
"Index", (
unsigned)i+1);
884 instances_element->LinkEndChild(element);
888 tinyxml2::XMLElement* use_element = document->NewElement(
"Use");
889 element->LinkEndChild(use_element);
891 tinyxml2::XMLText* use_text = document->NewText(
write_use(i).c_str());
892 use_element->LinkEndChild(use_text);
897 element = document->NewElement(
"Display");
898 instances_element->LinkEndChild(element);
903 text = document->NewText(buffer.str().c_str());
904 element->LinkEndChild(text);
918 std::ostringstream buffer;
922 const tinyxml2::XMLElement* instances_element = instances_document.FirstChildElement(
"Instances");
924 if(!instances_element)
926 buffer <<
"OpenNN Exception: Instances class.\n"
927 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
928 <<
"Pointer to instances information element is NULL.\n";
930 throw std::logic_error(buffer.str());
935 const tinyxml2::XMLElement* instances_number_element = instances_element->FirstChildElement(
"InstancesNumber");
937 if(!instances_number_element)
939 buffer <<
"OpenNN Exception: Instances class.\n"
940 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
941 <<
"Pointer to instances number is NULL.\n";
943 throw std::logic_error(buffer.str());
946 const size_t instances_number = atoi(instances_number_element->GetText());
950 if(instances_number <= 0)
960 const tinyxml2::XMLElement* start_element = instances_number_element;
962 for(
size_t i = 0; i < instances_number; i++)
964 const tinyxml2::XMLElement* item_element = start_element->NextSiblingElement(
"Item");
965 start_element = item_element;
969 buffer <<
"OpenNN Exception: Instances class.\n"
970 <<
"void from_XML(const tinyxml2::XMLElement*) method.\n"
971 <<
"Item " << i+1 <<
" is NULL.\n";
973 throw std::logic_error(buffer.str());
976 item_element->QueryUnsignedAttribute(
"Index", &index);
980 buffer <<
"OpenNN Exception: Instances class.\n"
981 <<
"void from_XML(const tinyxml2::XMLElement*) method.\n"
982 <<
"Index " << index <<
" is not correct.\n";
984 throw std::logic_error(buffer.str());
989 const tinyxml2::XMLElement* use_element = item_element->FirstChildElement(
"Use");
993 buffer <<
"OpenNN Exception: Instances class.\n"
994 <<
"void from_XML(const tinyxml2::XMLElement*) method.\n"
995 <<
"Pointer to use element is NULL.\n";
997 throw std::logic_error(buffer.str());
1000 if(use_element->GetText())
1002 set_use(index-1, use_element->GetText());
1016 (
const double& training_instances_ratio,
const double& generalization_instances_ratio,
const double& testing_instances_ratio)
1018 const size_t used_instances_number = count_used_instances_number();
1020 if(used_instances_number == 0)
1025 const double total_ratio = training_instances_ratio + generalization_instances_ratio + testing_instances_ratio;
1029 const size_t generalization_instances_number = (size_t)(generalization_instances_ratio*used_instances_number/total_ratio);
1030 const size_t testing_instances_number = (size_t)(testing_instances_ratio*used_instances_number/total_ratio);
1031 const size_t training_instances_number = used_instances_number - generalization_instances_number - testing_instances_number;
1033 const size_t sum_instances_number = training_instances_number + generalization_instances_number + testing_instances_number;
1035 if(sum_instances_number != used_instances_number)
1037 std::ostringstream buffer;
1039 buffer <<
"OpenNN Warning: Instances class.\n"
1040 <<
"void split_random_indices(const double&, const double&, const double&) method.\n"
1041 <<
"Sum of numbers of training, generalization and testing instances is not equal to number of used instances.\n";
1043 throw std::logic_error(buffer.str());
1046 const size_t instances_number = get_instances_number();
1049 std::random_shuffle(indices.begin(), indices.end());
1056 size_t count_training = 0;
1058 while(count_training != training_instances_number)
1062 if(items[index].use != Unused)
1064 items[index].use = Training;
1073 size_t count_generalization = 0;
1075 while(count_generalization != generalization_instances_number)
1079 if(items[index].use != Unused)
1081 items[index].use = Generalization;
1082 count_generalization++;
1090 size_t count_testing = 0;
1092 while(count_testing != testing_instances_number)
1096 if(items[index].use != Unused)
1098 items[index].use = Testing;
1118 if(used_instances_number == 0)
1123 const double total_ratio = training_instances_ratio + generalization_instances_ratio + testing_instances_ratio;
1127 const size_t generalization_instances_number = (size_t)(generalization_instances_ratio*used_instances_number/total_ratio);
1128 const size_t testing_instances_number = (size_t)(testing_instances_ratio*used_instances_number/total_ratio);
1129 const size_t training_instances_number = used_instances_number - generalization_instances_number - testing_instances_number;
1131 const size_t sum_instances_number = training_instances_number + generalization_instances_number + testing_instances_number;
1133 if(sum_instances_number != used_instances_number)
1135 std::ostringstream buffer;
1137 buffer <<
"OpenNN Warning: Instances class.\n"
1138 <<
"void split_random_indices(const double&, const double&, const double&) method.\n"
1139 <<
"Sum of numbers of training, generalization and testing instances is not equal to number of used instances.\n";
1141 throw std::logic_error(buffer.str());
1148 size_t count_training = 0;
1150 while(count_training != training_instances_number)
1152 if(
items[i].use != Unused)
1154 items[i].use = Training;
1163 size_t count_generalization = 0;
1165 while(count_generalization != generalization_instances_number)
1167 if(
items[i].use != Unused)
1169 items[i].use = Generalization;
1170 count_generalization++;
1178 size_t count_testing = 0;
1180 while(count_testing != testing_instances_number)
1182 if(
items[i].use != Unused)
1184 items[i].use = Testing;
1206 std::ostringstream buffer;
1208 if(training_ratio < 0)
1210 buffer <<
"OpenNN Exception: Instances class.\n"
1211 <<
"void split_instances(const SplittingMethod&, const double&, const double&, const double&) method.\n"
1212 <<
"Training ratio is lower than zero.\n";
1214 throw std::logic_error(buffer.str());
1217 if(generalization_ratio < 0)
1219 buffer <<
"OpenNN Exception: Instances class.\n"
1220 <<
"void split_instances(const SplittingMethod&, const double&, const double&, const double&) method.\n"
1221 <<
"Generalization ratio is lower than zero.\n";
1223 throw std::logic_error(buffer.str());
1226 if(testing_ratio < 0)
1228 buffer <<
"OpenNN Exception: Instances class.\n"
1229 <<
"void split_instances(const SplittingMethod&, const double&, const double&, const double&) method.\n"
1230 <<
"Testing ratio is lower than zero.\n";
1232 throw std::logic_error(buffer.str());
1235 if(training_ratio == 0.0 && generalization_ratio == 0.0 && testing_ratio == 0.0)
1237 buffer <<
"OpenNN Exception: Instances class.\n"
1238 <<
"void split_instances(const SplittingMethod&, const double&, const double&, const double&) method.\n"
1239 <<
"All training, generalization and testing ratios are zero.\n";
1241 throw std::logic_error(buffer.str());
1246 switch(splitting_method)
1248 case Instances::Sequential:
1254 case Instances::Random:
1262 std::ostringstream buffer;
1264 buffer <<
"Neural Engine Exception: Instances class.\n"
1265 <<
"void split_instances(const double&, const double&, const double&) method.\n"
1266 <<
"Unknown splitting method.\n";
1268 throw std::logic_error(buffer.str());
1289 for(
size_t i = 0; i < 4; i++)
1291 uses_percentage[i] = 100.0*uses_count[i]/(double)instances_number;
1294 return(uses_percentage);
1308 if (instances_number < lags_number)
1310 std::ostringstream buffer;
1312 buffer <<
"OpenNN Exception: Instances class.\n"
1313 <<
"void convert_time_series(const size_t&).\n"
1314 <<
"Number of instances (" << instances_number <<
") must be equal or greater than number of lags (" << lags_number <<
").\n";
1316 throw std::logic_error(buffer.str());
1319 const size_t new_instances_number = instances_number - lags_number;
1321 set(new_instances_number);
1331 std::ostringstream buffer;
1333 buffer <<
"Instances object\n"
1339 <<
"Display: " <<
display <<
"\n";
1341 return(buffer.str());
const Use & get_use(const size_t &) const
bool empty(void) const
Returns empty if the number of instances is zero, and false otherwise.
tinyxml2::XMLDocument * to_XML(void) const
void set_display(const bool &)
Vector< std::string > write_uses(void) const
Returns the use of every instance (training, generalization, testing) in a string vector...
std::string to_string(void) const
Returns a string representation of the current instances object.
void set(void)
Sets a instances object with zero instances.
void set_instances_number(const size_t &)
void set_testing(void)
Sets all the instances in the data set for testing.
size_t count_training_instances_number(void) const
Returns the number of instances in the data set which will be used for training.
void print(void) const
Prints to the screen information about the instances object.
std::string write_use(const size_t &) const
virtual ~Instances(void)
Destructor.
static SplittingMethod get_splitting_method(const std::string &)
const bool & get_display(void) const
void set_training(void)
Sets all the instances in the data set for training.
Vector< size_t > arrange_testing_indices(void) const
Returns the indices of the instances which will be used for testing.
size_t count_testing_instances_number(void) const
Returns the number of instances in the data set which will be used for testing.
Vector< size_t > arrange_training_indices(void) const
Returns the indices of the instances which will be used for training.
size_t count_generalization_instances_number(void) const
Returns the number of instances in the data set which will be used for generalization.
void set_generalization(void)
Sets all the instances in the data set for generalization.
void from_XML(const tinyxml2::XMLDocument &)
void convert_time_series(const size_t &)
bool operator==(const Instances &) const
Vector< Item > items
Uses of instances (none, training, generalization or testing).
Vector< double > calculate_uses_percentage(void) const
void split_instances(const SplittingMethod &splitting_method=Random, const double &training_ratio=0.6, const double &generalization_ratio=0.2, const double &testing_ratio=0.2)
void split_sequential_indices(const double &training_ratio=0.6, const double &generalization_ratio=0.2, const double &testing_ratio=0.2)
size_t count_unused_instances_number(void) const
void set_use(const size_t &, const Use &)
Vector< std::string > write_abbreviated_uses(void) const
Returns the use of every instance (training, generalization, testing) in a string vector...
void split_random_indices(const double &training_ratio=0.6, const double &generalization_ratio=0.2, const double &testing_ratio=0.2)
bool is_used(const size_t &) const
size_t count_used_instances_number(void) const
Vector< size_t > count_uses(void) const
void set_uses(const Vector< Use > &)
size_t get_instances_number(void) const
Returns the number of instances in the data set.
Vector< size_t > arrange_generalization_indices(void) const
Returns the indices of the instances which will be used for generalization.
bool display
Display messages to screen.
Vector< Use > arrange_uses(void) const
Returns the use of every instance (training, generalization, testing) in a vector.
Instances & operator=(const Instances &)