OpenNN  2.2
Open Neural Networks Library
instances.cpp
1 /****************************************************************************************************************/
2 /* */
3 /* OpenNN: Open Neural Networks Library */
4 /* www.artelnics.com/opennn */
5 /* */
6 /* I N S T A N C E S C L A S S */
7 /* */
8 /* Roberto Lopez */
9 /* Artelnics - Making intelligent use of data */
11 /* */
12 /****************************************************************************************************************/
13 
14 // OpenNN includes
15 
16 #include "instances.h"
17 
18 namespace OpenNN
19 {
20 
21 
22 // DEFAULT CONSTRUCTOR
23 
27 
29 {
30  set();
31 }
32 
33 
34 // INSTANCES NUMBER CONSTRUCTOR
35 
40 
41 Instances::Instances(const size_t& new_instances_number)
42 {
43  set(new_instances_number);
44 }
45 
46 
47 // XML CONSTRUCTOR
48 
52 
53 Instances::Instances(const tinyxml2::XMLDocument& instances_document)
54 {
55  set(instances_document);
56 }
57 
58 
59 // COPY CONSTRUCTOR
60 
64 
65 Instances::Instances(const Instances& other_instances)
66 {
67  items = other_instances.items;
68 
69  display = other_instances.display;
70 }
71 
72 
73 // DESTRUCTOR
74 
76 
78 {
79 }
80 
81 
82 // ASSIGNMENT OPERATOR
83 
87 
88 Instances& Instances::operator=(const Instances& other_instances)
89 {
90  if(this != &other_instances)
91  {
92  items = other_instances.items;
93  display = other_instances.display;
94  }
95 
96  return(*this);
97 }
98 
99 
100 // EQUAL TO OPERATOR
101 
102 // bool operator == (const Instances&) const method
103 
108 
109 bool Instances::operator == (const Instances& other_instances) const
110 {
111  if(/*items == other_instances.items
112  &&*/ display == other_instances.display)
113  {
114  return(true);
115  }
116  else
117  {
118  return(false);
119  }
120 }
121 
122 
123 // METHODS
124 
125 // static ScalingUnscalingMethod get_splitting_method(const std::string&) method
126 
129 
131 {
132  if(splitting_method == "Sequential")
133  {
134  return(Sequential);
135  }
136  else if(splitting_method == "Random")
137  {
138  return(Random);
139  }
140  else
141  {
142  std::ostringstream buffer;
143 
144  buffer << "OpenNN Exception: Instances class.\n"
145  << "static SplittingMethod get_splitting_method(const std::string&).\n"
146  << "Unknown splitting method: " << splitting_method << ".\n";
147 
148  throw std::logic_error(buffer.str());
149  }
150 }
151 
152 
153 // bool empty(void) const method
154 
156 
157 bool Instances::empty(void) const
158 {
159  if(items.empty())
160  {
161  return(true);
162  }
163  else
164  {
165  return(false);
166  }
167 }
168 
169 
170 // Vector<Use> arrange_uses(void) const method
171 
173 
175 {
176  const size_t instances_number = get_instances_number();
177 
178  Vector<Use> uses(instances_number);
179 
180  for(size_t i = 0; i < instances_number; i++)
181  {
182  uses[i] = items[i].use;
183  }
184 
185  return(uses);
186 }
187 
188 
189 // Vector<std::string> write_uses(void) const method
190 
192 
194 {
195  const size_t instances_number = get_instances_number();
196 
197  const Vector<Use> uses = arrange_uses();
198 
199  Vector<std::string> uses_string(instances_number);
200 
201  for(size_t i = 0; i < instances_number; i++)
202  {
203  if(uses[i] == Training)
204  {
205  uses_string[i] = "Training";
206  }
207  else if(uses[i] == Generalization)
208  {
209  uses_string[i] = "Generalization";
210  }
211  else if(uses[i] == Testing)
212  {
213  uses_string[i] = "Testing";
214  }
215  else if(uses[i] == Unused)
216  {
217  uses_string[i] = "Unused";
218  }
219  else
220  {
221  std::ostringstream buffer;
222 
223  buffer << "OpenNN Exception Instances class.\n"
224  << "Vector<std::string> write_uses(void) const method.\n"
225  << "Unknown use.\n";
226 
227  throw std::logic_error(buffer.str());
228  }
229  }
230 
231  return(uses_string);
232 }
233 
234 
235 // Vector<std::string> write_abbreviated_uses(void) const method
236 
238 
240 {
241  const size_t instances_number = get_instances_number();
242 
243  const Vector<Use> uses = arrange_uses();
244 
245  Vector<std::string> uses_string(instances_number);
246 
247  for(size_t i = 0; i < instances_number; i++)
248  {
249  if(uses[i] == Training)
250  {
251  uses_string[i] = "Train.";
252  }
253  else if(uses[i] == Generalization)
254  {
255  uses_string[i] = "Gen.";
256  }
257  else if(uses[i] == Testing)
258  {
259  uses_string[i] = "Test.";
260  }
261  else if(uses[i] == Unused)
262  {
263  uses_string[i] = "Unused";
264  }
265  else
266  {
267  std::ostringstream buffer;
268 
269  buffer << "OpenNN Exception Instances class.\n"
270  << "Vector<std::string> write_abbreviated_uses(void) const method.\n"
271  << "Unknown use.\n";
272 
273  throw std::logic_error(buffer.str());
274  }
275  }
276 
277  return(uses_string);
278 }
279 
280 
281 // const Use& get_use(const size_t&) const method
282 
285 
286 const Instances::Use& Instances::get_use(const size_t& i) const
287 {
288  return(items[i].use);
289 }
290 
291 
292 // std::string write_use(const size_t&) const method
293 
296 
297 std::string Instances::write_use(const size_t& i) const
298 {
299  if(items[i].use == Training)
300  {
301  return("Training");
302  }
303  else if(items[i].use == Generalization)
304  {
305  return("Generalization");
306  }
307  else if(items[i].use == Testing)
308  {
309  return("Testing");
310  }
311  else if(items[i].use == Unused)
312  {
313  return("Unused");
314  }
315  else
316  {
317  std::ostringstream buffer;
318 
319  buffer << "OpenNN Exception Instances class.\n"
320  << "std::string write_use(const size_t&) const method.\n"
321  << "Unknown use.\n";
322 
323  throw std::logic_error(buffer.str());
324  }
325 }
326 
327 
328 // bool is_used(const size_t& i) const method
329 
332 
333 bool Instances::is_used(const size_t& i) const
334 {
335  if(items[i].use == Unused)
336  {
337  return(false);
338  }
339  else
340  {
341  return(true);
342  }
343 }
344 
345 
346 // size_t count_unused_instances_number(void) const method
347 
350 
352 {
353  const size_t instances_number = get_instances_number();
354 
355  size_t unused_instances_number = 0;
356 
357  for(size_t i = 0; i < instances_number; i++)
358  {
359  if(items[i].use == Unused)
360  {
361  unused_instances_number++;
362  }
363  }
364 
365  return(unused_instances_number);
366 }
367 
368 
369 // size_t count_used_instances_number(void) const method
370 
373 
375 {
376  const size_t instances_number = get_instances_number();
377  const size_t unused_instances_number = count_unused_instances_number();
378 
379  return(instances_number - unused_instances_number);
380 }
381 
382 
383 // size_t count_training_instances_number(void) const method
384 
386 
388 {
389  const size_t instances_number = get_instances_number();
390 
391  size_t training_instances_number = 0;
392 
393  for(size_t i = 0; i < instances_number; i++)
394  {
395  if(items[i].use == Training)
396  {
397  training_instances_number++;
398  }
399  }
400 
401  return(training_instances_number);
402 }
403 
404 
405 // size_t count_generalization_instances_number(void) const method
406 
408 
410 {
411  const size_t instances_number = get_instances_number();
412 
413  size_t generalization_instances_number = 0;
414 
415  for(size_t i = 0; i < instances_number; i++)
416  {
417  if(items[i].use == Generalization)
418  {
419  generalization_instances_number++;
420  }
421  }
422 
423  return(generalization_instances_number);
424 }
425 
426 
427 // size_t count_testing_instances_number(void) const method
428 
430 
432 {
433  const size_t instances_number = get_instances_number();
434 
435  size_t testing_instances_number = 0;
436 
437  for(size_t i = 0; i < instances_number; i++)
438  {
439  if(items[i].use == Testing)
440  {
441  testing_instances_number++;
442  }
443  }
444 
445  return(testing_instances_number);
446 }
447 
448 
449 // Vector<size_t> count_uses(void) const method
450 
454 
456 {
457  Vector<size_t> count(4, 0);
458 
459  const size_t instances_number = get_instances_number();
460 
461  for(size_t i = 0; i < instances_number; i++)
462  {
463  if(items[i].use == Training)
464  {
465  count[0]++;
466  }
467  else if(items[i].use == Generalization)
468  {
469  count[1]++;
470  }
471  else if(items[i].use == Testing)
472  {
473  count[2]++;
474  }
475  else
476  {
477  count[3]++;
478  }
479  }
480 
481  return(count);
482 }
483 
484 
485 // Vector<size_t> arrange_training_indices(void) const method
486 
488 
490 {
491  const size_t instances_number = get_instances_number();
492 
493  const size_t training_instances_number = count_training_instances_number();
494 
495  Vector<size_t> training_indices(training_instances_number);
496 
497  size_t count = 0;
498 
499  for(size_t i = 0; i < instances_number; i++)
500  {
501  if(items[i].use == Training)
502  {
503  training_indices[count] = (size_t)i;
504  count++;
505  }
506  }
507 
508  return(training_indices);
509 }
510 
511 
512 // Vector<size_t> arrange_generalization_indices(void) const method
513 
515 
517 {
518  const size_t instances_number = get_instances_number();
519 
520  const size_t generalization_instances_number = count_generalization_instances_number();
521 
522  Vector<size_t> generalization_indices(generalization_instances_number);
523 
524  size_t count = 0;
525 
526  for(size_t i = 0; i < instances_number; i++)
527  {
528  if(items[i].use == Generalization)
529  {
530  generalization_indices[count] = i;
531  count++;
532  }
533  }
534 
535  return(generalization_indices);
536 }
537 
538 
539 // Vector<size_t> arrange_testing_indices(void) const method
540 
542 
544 {
545  const size_t instances_number = get_instances_number();
546 
547  const size_t testing_instances_number = count_testing_instances_number();
548 
549  Vector<size_t> testing_indices(testing_instances_number);
550 
551  size_t count = 0;
552 
553  for(size_t i = 0; i < instances_number; i++)
554  {
555  if(items[i].use == Testing)
556  {
557  testing_indices[count] = i;
558  count++;
559  }
560  }
561 
562  return(testing_indices);
563 }
564 
565 
566 // const bool& get_display(void) const method
567 
570 
571 const bool& Instances::get_display(void) const
572 {
573  return(display);
574 }
575 
576 
577 // void set(void) method
578 
580 
581 void Instances::set(void)
582 {
584 
585  set_default();
586 }
587 
588 
589 // void set(const size_t&) method
590 
594 
595 void Instances::set(const size_t& new_instances_number)
596 {
597  set_instances_number(new_instances_number);
598 
599  set_default();
600 }
601 
602 
603 // void set(const tinyxml2::XMLDocument&) method
604 
607 
608 void Instances::set(const tinyxml2::XMLDocument& instances_document)
609 {
610  set_default();
611 
612  from_XML(instances_document);
613 }
614 
615 
616 // void set_default(void) method
617 
622 
624 {
625  display = true;
626 }
627 
628 
629 // void set_uses(const Vector<Use>&) method
630 
634 
636 {
637  const size_t instances_number = get_instances_number();
638 
639  // Control sentence (if debug)
640 
641  #ifndef NDEBUG
642 
643  const size_t new_uses_size = new_uses.size();
644 
645  if(new_uses_size != instances_number)
646  {
647  std::ostringstream buffer;
648 
649  buffer << "OpenNN Exception: Instances class.\n"
650  << "void set_uses(const Vector<Use>&) method.\n"
651  << "Size of uses (" << new_uses_size << ") must be equal to number of instances (" << instances_number << ").\n";
652 
653  throw std::logic_error(buffer.str());
654  }
655 
656  #endif
657 
658  for(size_t i = 0; i < instances_number; i++)
659  {
660  items[i].use = new_uses[i];
661  }
662 }
663 
664 
665 // void set_uses(const Vector<std::string>&) method
666 
671 
673 {
674  const size_t instances_number = get_instances_number();
675 
676  std::ostringstream buffer;
677 
678  // Control sentence (if debug)
679 
680  #ifndef NDEBUG
681 
682  const size_t new_uses_size = new_uses.size();
683 
684  if(new_uses_size != instances_number)
685  {
686  buffer << "OpenNN Exception: Instances class.\n"
687  << "void set_uses(const Vector<std::string>&) method.\n"
688  << "Size of uses (" << new_uses_size << ") must be equal to number of instances (" << instances_number << ").\n";
689 
690  throw std::logic_error(buffer.str());
691  }
692 
693  #endif
694 
695  for(size_t i = 0; i < instances_number; i++)
696  {
697  if(new_uses[i] == "Unused")
698  {
699  items[i].use = Unused;
700  }
701  else if(new_uses[i] == "Training")
702  {
703  items[i].use = Training;
704  }
705  else if(new_uses[i] == "Generalization")
706  {
707  items[i].use = Generalization;
708  }
709  else if(new_uses[i] == "Testing")
710  {
711  items[i].use = Testing;
712  }
713  else
714  {
715  buffer << "OpenNN Exception Instances class.\n"
716  << "void set_uses(const Vector<std::string>&) method.\n"
717  << "Unknown use.\n";
718 
719  throw std::logic_error(buffer.str());
720  }
721  }
722 }
723 
724 
725 // void set_use(const Use&) method
726 
730 
731 void Instances::set_use(const size_t& i, const Use& new_use)
732 {
733  items[i].use = new_use;
734 }
735 
736 
737 // void set_use(const size_t&, const std::string&) method
738 
742 
743 void Instances::set_use(const size_t& i, const std::string& new_use)
744 {
745  if(new_use == "Training")
746  {
747  items[i].use = Training;
748  }
749  else if(new_use == "Generalization")
750  {
751  items[i].use = Generalization;
752  }
753  else if(new_use == "Testing")
754  {
755  items[i].use = Testing;
756  }
757  else if(new_use == "Unused")
758  {
759  items[i].use = Unused;
760  }
761  else
762  {
763  std::ostringstream buffer;
764 
765  buffer << "OpenNN Exception Instances class.\n"
766  << "void set_use(const std::string&) method.\n"
767  << "Unknown use: " << new_use << "\n";
768 
769  throw std::logic_error(buffer.str());
770  }
771 }
772 
773 
774 // void set_training(void) method
775 
777 
779 {
780  const size_t instances_number = get_instances_number();
781 
782  for(size_t i = 0; i < instances_number; i++)
783  {
784  items[i].use = Training;
785  }
786 }
787 
788 
789 // void set_generalization(void) method
790 
792 
794 {
795  const size_t instances_number = get_instances_number();
796 
797  for(size_t i = 0; i < instances_number; i++)
798  {
799  items[i].use = Generalization;
800  }
801 }
802 
803 
804 // void set_testing(void) method
805 
807 
809 {
810  const size_t instances_number = get_instances_number();
811 
812  for(size_t i = 0; i < instances_number; i++)
813  {
814  items[i].use = Testing;
815  }
816 }
817 
818 
819 // void set_display(const bool&) method
820 
825 
826 void Instances::set_display(const bool& new_display)
827 {
828  display = new_display;
829 }
830 
831 
832 // void set_instances_number(const size_t&) method
833 
837 
838 void Instances::set_instances_number(const size_t& new_instances_number)
839 {
840  items.set(new_instances_number);
841 
842  split_instances();
843 }
844 
845 
846 // tinyxml2::XMLDocument* to_XML(void) const method
847 
850 
851 tinyxml2::XMLDocument* Instances::to_XML(void) const
852 {
853  tinyxml2::XMLDocument* document = new tinyxml2::XMLDocument;
854 
855  std::ostringstream buffer;
856 
857  // Instances
858 
859  tinyxml2::XMLElement* instances_element = document->NewElement("Instances");
860 
861  document->InsertFirstChild(instances_element);
862 
863  tinyxml2::XMLElement* element = NULL;
864  tinyxml2::XMLText* text = NULL;
865 
866  const size_t instances_number = get_instances_number();
867 
868  // Instances number
869  {
870  element = document->NewElement("InstancesNumber");
871  instances_element->LinkEndChild(element);
872 
873  buffer.str("");
874  buffer << instances_number;
875 
876  text = document->NewText(buffer.str().c_str());
877  element->LinkEndChild(text);
878  }
879 
880  for(size_t i = 0; i < instances_number; i++)
881  {
882  element = document->NewElement("Item");
883  element->SetAttribute("Index", (unsigned)i+1);
884  instances_element->LinkEndChild(element);
885 
886  // Use
887 
888  tinyxml2::XMLElement* use_element = document->NewElement("Use");
889  element->LinkEndChild(use_element);
890 
891  tinyxml2::XMLText* use_text = document->NewText(write_use(i).c_str());
892  use_element->LinkEndChild(use_text);
893  }
894 
895  // Display
896  {
897  element = document->NewElement("Display");
898  instances_element->LinkEndChild(element);
899 
900  buffer.str("");
901  buffer << display;
902 
903  text = document->NewText(buffer.str().c_str());
904  element->LinkEndChild(text);
905  }
906 
907  return(document);
908 }
909 
910 
911 // void from_XML(const tinyxml2::XMLDocument&) method
912 
915 
916 void Instances::from_XML(const tinyxml2::XMLDocument& instances_document)
917 {
918  std::ostringstream buffer;
919 
920  // Instances element
921 
922  const tinyxml2::XMLElement* instances_element = instances_document.FirstChildElement("Instances");
923 
924  if(!instances_element)
925  {
926  buffer << "OpenNN Exception: Instances class.\n"
927  << "void from_XML(const tinyxml2::XMLDocument&) method.\n"
928  << "Pointer to instances information element is NULL.\n";
929 
930  throw std::logic_error(buffer.str());
931  }
932 
933  // Instances number
934 
935  const tinyxml2::XMLElement* instances_number_element = instances_element->FirstChildElement("InstancesNumber");
936 
937  if(!instances_number_element)
938  {
939  buffer << "OpenNN Exception: Instances class.\n"
940  << "void from_XML(const tinyxml2::XMLDocument&) method.\n"
941  << "Pointer to instances number is NULL.\n";
942 
943  throw std::logic_error(buffer.str());
944  }
945 
946  const size_t instances_number = atoi(instances_number_element->GetText());
947 
948  set_instances_number(instances_number);
949 
950  if(instances_number <= 0)
951  {
952  return;
953  }
954 
955 
956  // Items
957 
958  unsigned index = 0;
959 
960  const tinyxml2::XMLElement* start_element = instances_number_element;
961 
962  for(size_t i = 0; i < instances_number; i++)
963  {
964  const tinyxml2::XMLElement* item_element = start_element->NextSiblingElement("Item");
965  start_element = item_element;
966 
967  if(!item_element)
968  {
969  buffer << "OpenNN Exception: Instances class.\n"
970  << "void from_XML(const tinyxml2::XMLElement*) method.\n"
971  << "Item " << i+1 << " is NULL.\n";
972 
973  throw std::logic_error(buffer.str());
974  }
975 
976  item_element->QueryUnsignedAttribute("Index", &index);
977 
978  if(index != i+1)
979  {
980  buffer << "OpenNN Exception: Instances class.\n"
981  << "void from_XML(const tinyxml2::XMLElement*) method.\n"
982  << "Index " << index << " is not correct.\n";
983 
984  throw std::logic_error(buffer.str());
985  }
986 
987  // Use
988 
989  const tinyxml2::XMLElement* use_element = item_element->FirstChildElement("Use");
990 
991  if(!use_element)
992  {
993  buffer << "OpenNN Exception: Instances class.\n"
994  << "void from_XML(const tinyxml2::XMLElement*) method.\n"
995  << "Pointer to use element is NULL.\n";
996 
997  throw std::logic_error(buffer.str());
998  }
999 
1000  if(use_element->GetText())
1001  {
1002  set_use(index-1, use_element->GetText());
1003  }
1004  }
1005 }
1006 
1007 
1008 // void split_random_indices(const double&, const double&, const double&) method
1009 
1014 
1016 (const double& training_instances_ratio, const double& generalization_instances_ratio, const double& testing_instances_ratio)
1017 {
1018  const size_t used_instances_number = count_used_instances_number();
1019 
1020  if(used_instances_number == 0)
1021  {
1022  return;
1023  }
1024 
1025  const double total_ratio = training_instances_ratio + generalization_instances_ratio + testing_instances_ratio;
1026 
1027  // Get number of instances for training, generalization and testing
1028 
1029  const size_t generalization_instances_number = (size_t)(generalization_instances_ratio*used_instances_number/total_ratio);
1030  const size_t testing_instances_number = (size_t)(testing_instances_ratio*used_instances_number/total_ratio);
1031  const size_t training_instances_number = used_instances_number - generalization_instances_number - testing_instances_number;
1032 
1033  const size_t sum_instances_number = training_instances_number + generalization_instances_number + testing_instances_number;
1034 
1035  if(sum_instances_number != used_instances_number)
1036  {
1037  std::ostringstream buffer;
1038 
1039  buffer << "OpenNN Warning: Instances class.\n"
1040  << "void split_random_indices(const double&, const double&, const double&) method.\n"
1041  << "Sum of numbers of training, generalization and testing instances is not equal to number of used instances.\n";
1042 
1043  throw std::logic_error(buffer.str());
1044  }
1045 
1046  const size_t instances_number = get_instances_number();
1047 
1048  Vector<size_t> indices(0, 1, instances_number-1);
1049  std::random_shuffle(indices.begin(), indices.end());
1050 
1051  size_t i = 0;
1052  size_t index;
1053 
1054  // Training
1055 
1056  size_t count_training = 0;
1057 
1058  while(count_training != training_instances_number)
1059  {
1060  index = indices[i];
1061 
1062  if(items[index].use != Unused)
1063  {
1064  items[index].use = Training;
1065  count_training++;
1066  }
1067 
1068  i++;
1069  }
1070 
1071  // Generalization
1072 
1073  size_t count_generalization = 0;
1074 
1075  while(count_generalization != generalization_instances_number)
1076  {
1077  index = indices[i];
1078 
1079  if(items[index].use != Unused)
1080  {
1081  items[index].use = Generalization;
1082  count_generalization++;
1083  }
1084 
1085  i++;
1086  }
1087 
1088  // Testing
1089 
1090  size_t count_testing = 0;
1091 
1092  while(count_testing != testing_instances_number)
1093  {
1094  index = indices[i];
1095 
1096  if(items[index].use != Unused)
1097  {
1098  items[index].use = Testing;
1099  count_testing++;
1100  }
1101 
1102  i++;
1103  }
1104 }
1105 
1106 
1107 // void split_sequential_indices(const double&, const double&, const double&) method
1108 
1113 
1114 void Instances::split_sequential_indices(const double& training_instances_ratio, const double& generalization_instances_ratio, const double& testing_instances_ratio)
1115 {
1116  const size_t used_instances_number = count_used_instances_number();
1117 
1118  if(used_instances_number == 0)
1119  {
1120  return;
1121  }
1122 
1123  const double total_ratio = training_instances_ratio + generalization_instances_ratio + testing_instances_ratio;
1124 
1125  // Get number of instances for training, generalization and testing
1126 
1127  const size_t generalization_instances_number = (size_t)(generalization_instances_ratio*used_instances_number/total_ratio);
1128  const size_t testing_instances_number = (size_t)(testing_instances_ratio*used_instances_number/total_ratio);
1129  const size_t training_instances_number = used_instances_number - generalization_instances_number - testing_instances_number;
1130 
1131  const size_t sum_instances_number = training_instances_number + generalization_instances_number + testing_instances_number;
1132 
1133  if(sum_instances_number != used_instances_number)
1134  {
1135  std::ostringstream buffer;
1136 
1137  buffer << "OpenNN Warning: Instances class.\n"
1138  << "void split_random_indices(const double&, const double&, const double&) method.\n"
1139  << "Sum of numbers of training, generalization and testing instances is not equal to number of used instances.\n";
1140 
1141  throw std::logic_error(buffer.str());
1142  }
1143 
1144  size_t i = 0;
1145 
1146  // Training
1147 
1148  size_t count_training = 0;
1149 
1150  while(count_training != training_instances_number)
1151  {
1152  if(items[i].use != Unused)
1153  {
1154  items[i].use = Training;
1155  count_training++;
1156  }
1157 
1158  i++;
1159  }
1160 
1161  // Generalization
1162 
1163  size_t count_generalization = 0;
1164 
1165  while(count_generalization != generalization_instances_number)
1166  {
1167  if(items[i].use != Unused)
1168  {
1169  items[i].use = Generalization;
1170  count_generalization++;
1171  }
1172 
1173  i++;
1174  }
1175 
1176  // Testing
1177 
1178  size_t count_testing = 0;
1179 
1180  while(count_testing != testing_instances_number)
1181  {
1182  if(items[i].use != Unused)
1183  {
1184  items[i].use = Testing;
1185  count_testing++;
1186  }
1187 
1188  i++;
1189  }
1190 }
1191 
1192 
1193 // void split_instances(const SplittingMethod& splitting_method = Random, const double& training_ratio = 0.6, const double& generalization_ratio = 0.2, const double& testing_ratio = 0.2) method
1194 
1200 
1201 void Instances::split_instances(const SplittingMethod& splitting_method, const double& training_ratio, const double& generalization_ratio, const double& testing_ratio)
1202 {
1203 
1204 #ifndef NDEBUG
1205 
1206  std::ostringstream buffer;
1207 
1208  if(training_ratio < 0)
1209  {
1210  buffer << "OpenNN Exception: Instances class.\n"
1211  << "void split_instances(const SplittingMethod&, const double&, const double&, const double&) method.\n"
1212  << "Training ratio is lower than zero.\n";
1213 
1214  throw std::logic_error(buffer.str());
1215  }
1216 
1217  if(generalization_ratio < 0)
1218  {
1219  buffer << "OpenNN Exception: Instances class.\n"
1220  << "void split_instances(const SplittingMethod&, const double&, const double&, const double&) method.\n"
1221  << "Generalization ratio is lower than zero.\n";
1222 
1223  throw std::logic_error(buffer.str());
1224  }
1225 
1226  if(testing_ratio < 0)
1227  {
1228  buffer << "OpenNN Exception: Instances class.\n"
1229  << "void split_instances(const SplittingMethod&, const double&, const double&, const double&) method.\n"
1230  << "Testing ratio is lower than zero.\n";
1231 
1232  throw std::logic_error(buffer.str());
1233  }
1234 
1235  if(training_ratio == 0.0 && generalization_ratio == 0.0 && testing_ratio == 0.0)
1236  {
1237  buffer << "OpenNN Exception: Instances class.\n"
1238  << "void split_instances(const SplittingMethod&, const double&, const double&, const double&) method.\n"
1239  << "All training, generalization and testing ratios are zero.\n";
1240 
1241  throw std::logic_error(buffer.str());
1242  }
1243 
1244 #endif
1245 
1246  switch(splitting_method)
1247  {
1248  case Instances::Sequential:
1249  {
1250  split_sequential_indices(training_ratio, generalization_ratio, testing_ratio);
1251  }
1252  break;
1253 
1254  case Instances::Random:
1255  {
1256  split_random_indices(training_ratio, generalization_ratio, testing_ratio);
1257  }
1258  break;
1259 
1260  default:
1261  {
1262  std::ostringstream buffer;
1263 
1264  buffer << "Neural Engine Exception: Instances class.\n"
1265  << "void split_instances(const double&, const double&, const double&) method.\n"
1266  << "Unknown splitting method.\n";
1267 
1268  throw std::logic_error(buffer.str());
1269  }
1270  break;
1271  }
1272 
1273 }
1274 
1275 
1276 // Vector<double> calculate_uses_percentage(void) const method
1277 
1280 
1282 {
1283  const size_t instances_number = get_instances_number();
1284 
1285  Vector<double> uses_percentage(instances_number);
1286 
1287  const Vector<size_t> uses_count = count_uses();
1288 
1289  for(size_t i = 0; i < 4; i++)
1290  {
1291  uses_percentage[i] = 100.0*uses_count[i]/(double)instances_number;
1292  }
1293 
1294  return(uses_percentage);
1295 }
1296 
1297 
1298 // void convert_time_series(const size_t&) method
1299 
1303 
1304 void Instances::convert_time_series(const size_t& lags_number)
1305 {
1306  const size_t instances_number = get_instances_number();
1307 
1308  if (instances_number < lags_number)
1309  {
1310  std::ostringstream buffer;
1311 
1312  buffer << "OpenNN Exception: Instances class.\n"
1313  << "void convert_time_series(const size_t&).\n"
1314  << "Number of instances (" << instances_number << ") must be equal or greater than number of lags (" << lags_number << ").\n";
1315 
1316  throw std::logic_error(buffer.str());
1317  }
1318 
1319  const size_t new_instances_number = instances_number - lags_number;
1320 
1321  set(new_instances_number);
1322 }
1323 
1324 
1325 // std::string to_string(void) const method
1326 
1328 
1329 std::string Instances::to_string(void) const
1330 {
1331  std::ostringstream buffer;
1332 
1333  buffer << "Instances object\n"
1334  << "Instances number: " << get_instances_number() << "\n"
1335  << "Training instances number: " << count_training_instances_number() << "\n"
1336  << "Generalization instances number: " << count_generalization_instances_number() << "\n"
1337  << "Testing instances number: " << count_testing_instances_number() << "\n"
1338  << "Uses: " << write_uses() << "\n"
1339  << "Display: " << display << "\n";
1340 
1341  return(buffer.str());
1342 }
1343 
1344 
1345 // void print(void) const method
1346 
1348 
1349 void Instances::print(void) const
1350 {
1351  std::cout << to_string();
1352 }
1353 
1354 }
1355 
1356 
1357 // OpenNN: Open Neural Networks Library.
1358 // Copyright (c) 2005-2015 Roberto Lopez.
1359 //
1360 // This library is free software; you can redistribute it and/or
1361 // modify it under the terms of the GNU Lesser General Public
1362 // License as published by the Free Software Foundation; either
1363 // version 2.1 of the License, or any later version.
1364 //
1365 // This library is distributed in the hope that it will be useful,
1366 // but WITHOUT ANY WARRANTY; without even the implied warranty of
1367 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1368 // Lesser General Public License for more details.
1369 
1370 // You should have received a copy of the GNU Lesser General Public
1371 // License along with this library; if not, write to the Free Software
1372 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
const Use & get_use(const size_t &) const
Definition: instances.cpp:286
bool empty(void) const
Returns empty if the number of instances is zero, and false otherwise.
Definition: instances.cpp:157
tinyxml2::XMLDocument * to_XML(void) const
Definition: instances.cpp:851
void set_display(const bool &)
Definition: instances.cpp:826
Vector< std::string > write_uses(void) const
Returns the use of every instance (training, generalization, testing) in a string vector...
Definition: instances.cpp:193
std::string to_string(void) const
Returns a string representation of the current instances object.
Definition: instances.cpp:1329
void set(void)
Sets a instances object with zero instances.
Definition: instances.cpp:581
void set_instances_number(const size_t &)
Definition: instances.cpp:838
void set_testing(void)
Sets all the instances in the data set for testing.
Definition: instances.cpp:808
size_t count_training_instances_number(void) const
Returns the number of instances in the data set which will be used for training.
Definition: instances.cpp:387
void print(void) const
Prints to the screen information about the instances object.
Definition: instances.cpp:1349
std::string write_use(const size_t &) const
Definition: instances.cpp:297
virtual ~Instances(void)
Destructor.
Definition: instances.cpp:77
static SplittingMethod get_splitting_method(const std::string &)
Definition: instances.cpp:130
const bool & get_display(void) const
Definition: instances.cpp:571
void set_training(void)
Sets all the instances in the data set for training.
Definition: instances.cpp:778
Vector< size_t > arrange_testing_indices(void) const
Returns the indices of the instances which will be used for testing.
Definition: instances.cpp:543
size_t count_testing_instances_number(void) const
Returns the number of instances in the data set which will be used for testing.
Definition: instances.cpp:431
Vector< size_t > arrange_training_indices(void) const
Returns the indices of the instances which will be used for training.
Definition: instances.cpp:489
size_t count_generalization_instances_number(void) const
Returns the number of instances in the data set which will be used for generalization.
Definition: instances.cpp:409
void set_generalization(void)
Sets all the instances in the data set for generalization.
Definition: instances.cpp:793
void from_XML(const tinyxml2::XMLDocument &)
Definition: instances.cpp:916
void convert_time_series(const size_t &)
Definition: instances.cpp:1304
bool operator==(const Instances &) const
Definition: instances.cpp:109
Vector< Item > items
Uses of instances (none, training, generalization or testing).
Definition: instances.h:219
Vector< double > calculate_uses_percentage(void) const
Definition: instances.cpp:1281
void split_instances(const SplittingMethod &splitting_method=Random, const double &training_ratio=0.6, const double &generalization_ratio=0.2, const double &testing_ratio=0.2)
Definition: instances.cpp:1201
void set_default(void)
Definition: instances.cpp:623
void split_sequential_indices(const double &training_ratio=0.6, const double &generalization_ratio=0.2, const double &testing_ratio=0.2)
Definition: instances.cpp:1114
size_t count_unused_instances_number(void) const
Definition: instances.cpp:351
void set_use(const size_t &, const Use &)
Definition: instances.cpp:731
Vector< std::string > write_abbreviated_uses(void) const
Returns the use of every instance (training, generalization, testing) in a string vector...
Definition: instances.cpp:239
void split_random_indices(const double &training_ratio=0.6, const double &generalization_ratio=0.2, const double &testing_ratio=0.2)
Definition: instances.cpp:1016
bool is_used(const size_t &) const
Definition: instances.cpp:333
size_t count_used_instances_number(void) const
Definition: instances.cpp:374
Vector< size_t > count_uses(void) const
Definition: instances.cpp:455
void set_uses(const Vector< Use > &)
Definition: instances.cpp:635
size_t get_instances_number(void) const
Returns the number of instances in the data set.
Definition: instances.h:134
Vector< size_t > arrange_generalization_indices(void) const
Returns the indices of the instances which will be used for generalization.
Definition: instances.cpp:516
bool display
Display messages to screen.
Definition: instances.h:223
Vector< Use > arrange_uses(void) const
Returns the use of every instance (training, generalization, testing) in a vector.
Definition: instances.cpp:174
Instances & operator=(const Instances &)
Definition: instances.cpp:88