OpenNN  2.2
Open Neural Networks Library
quasi_newton_method.cpp
1 /****************************************************************************************************************/
2 /* */
3 /* OpenNN: Open Neural Networks Library */
4 /* www.artelnics.com/opennn */
5 /* */
6 /* Q U A S I - N E W T O N M E T H O D C L A S S */
7 /* */
8 /* Roberto Lopez */
9 /* Artelnics - Making intelligent use of data */
11 /* */
12 /****************************************************************************************************************/
13 
14 // OpenNN includes
15 
16 #include "quasi_newton_method.h"
17 
18 //#include"windows.h"
19 
20 namespace OpenNN
21 {
22 
23 // DEFAULT CONSTRUCTOR
24 
28 
31 {
32  set_default();
33 }
34 
35 
36 // PERFORMANCE FUNCTIONAL CONSTRUCTOR
37 
42 
43 QuasiNewtonMethod::QuasiNewtonMethod(PerformanceFunctional* new_performance_functional_pointer)
44 : TrainingAlgorithm(new_performance_functional_pointer)
45 {
46  training_rate_algorithm.set_performance_functional_pointer(new_performance_functional_pointer);
47 
48  set_default();
49 }
50 
51 
52 // XML CONSTRUCTOR
53 
57 
58 QuasiNewtonMethod::QuasiNewtonMethod(const tinyxml2::XMLDocument& document)
59  : TrainingAlgorithm(document)
60 {
61  set_default();
62 }
63 
64 
65 // DESTRUCTOR
66 
69 
71 {
72 }
73 
74 
75 // METHODS
76 
77 
78 
79 // const TrainingRateAlgorithm& get_training_rate_algorithm(void) const method
80 
82 
84 {
86 }
87 
88 
89 // TrainingRateAlgorithm* get_training_rate_algorithm_pointer(void) method
90 
92 
94 {
95  return(&training_rate_algorithm);
96 }
97 
98 
99 // const InverseHessianApproximationMethod& get_inverse_Hessian_approximation_method(void) const method
100 
102 
104 {
106 }
107 
108 
109 // std::string write_inverse_Hessian_approximation_method(void) const method
110 
112 
114 {
116  {
117  case DFP:
118  {
119  return("DFP");
120  }
121  break;
122 
123  case BFGS:
124  {
125  return("BFGS");
126  }
127  break;
128 
129  default:
130  {
131  std::ostringstream buffer;
132 
133  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
134  << "std::string write_inverse_Hessian_approximation_method(void) const method.\n"
135  << "Unknown inverse Hessian approximation method.\n";
136 
137  throw std::logic_error(buffer.str());
138  }
139  break;
140  }
141 }
142 
143 
144 // const double& get_warning_parameters_norm(void) const method
145 
147 
149 {
150  return(warning_parameters_norm);
151 }
152 
153 
154 // const double& get_warning_gradient_norm(void) const method
155 
157 
159 {
160  return(warning_gradient_norm);
161 }
162 
163 
164 // const double& get_warning_training_rate(void) const method
165 
167 
169 {
170  return(warning_training_rate);
171 }
172 
173 
174 // const double& get_error_parameters_norm(void) const method
175 
177 
179 {
180  return(error_parameters_norm);
181 }
182 
183 
184 // const double& get_error_gradient_norm(void) const method
185 
188 
190 {
191  return(error_gradient_norm);
192 }
193 
194 
195 // const double& get_error_training_rate(void) const method
196 
199 
201 {
202  return(error_training_rate);
203 }
204 
205 
206 // const double& get_minimum_parameters_increment_norm(void) const method
207 
209 
211 {
213 }
214 
215 
216 // const double& get_minimum_performance_increase(void) const method
217 
219 
221 {
223 }
224 
225 
226 // const double& get_performance_goal(void) const method
227 
230 
232 {
233  return(performance_goal);
234 }
235 
236 
237 // const double& get_gradient_norm_goal(void) const method
238 
241 
243 {
244  return(gradient_norm_goal);
245 }
246 
247 
248 // const size_t& get_maximum_generalization_performance_decreases(void) const method
249 
251 
253 {
255 }
256 
257 
258 // const size_t& get_maximum_iterations_number(void) const method
259 
261 
263 {
265 }
266 
267 
268 // const double& get_maximum_time(void) const method
269 
271 
272 const double& QuasiNewtonMethod::get_maximum_time(void) const
273 {
274  return(maximum_time);
275 }
276 
277 
278 
279 // const bool& get_reserve_parameters_history(void) const method
280 
282 
284 {
286 }
287 
288 
289 // const bool& get_reserve_parameters_norm_history(void) const method
290 
292 
294 {
296 }
297 
298 
299 // const bool& get_reserve_performance_history(void) const method
300 
302 
304 {
306 }
307 
308 
309 // const bool& get_reserve_gradient_history(void) const method
310 
312 
314 {
315  return(reserve_gradient_history);
316 }
317 
318 
319 // const bool& get_reserve_gradient_norm_history(void) const method
320 
322 
324 {
326 }
327 
328 
329 
330 // const bool& get_reserve_training_direction_history(void) const method
331 
333 
335 {
337 }
338 
339 
340 // const bool& get_reserve_training_rate_history(void) const method
341 
343 
345 {
347 }
348 
349 
350 // const bool& get_reserve_elapsed_time_history(void) const method
351 
353 
355 {
357 }
358 
359 
360 // const bool& get_reserve_inverse_Hessian_history(void) const method
361 
363 
365 {
367 }
368 
369 
370 // const bool& get_reserve_generalization_performance_history(void) const method
371 
373 
375 {
377 }
378 
379 
380 // void set_performance_functional_pointer(PerformanceFunctional*) method
381 
385 
387 {
388  performance_functional_pointer = new_performance_functional_pointer;
389 
390  training_rate_algorithm.set_performance_functional_pointer(new_performance_functional_pointer);
391 }
392 
393 
394 // void set_inverse_Hessian_approximation_method(const InverseHessianApproximationMethod&) method
395 
398 
400 new_inverse_Hessian_approximation_method)
401 {
402  inverse_Hessian_approximation_method = new_inverse_Hessian_approximation_method;
403 }
404 
405 
406 // void set_inverse_Hessian_approximation_method(const std::string&) method
407 
415 
416 void QuasiNewtonMethod::set_inverse_Hessian_approximation_method(const std::string& new_inverse_Hessian_approximation_method_name)
417 {
418  if(new_inverse_Hessian_approximation_method_name == "DFP")
419  {
421  }
422  else if(new_inverse_Hessian_approximation_method_name == "BFGS")
423  {
425  }
426  else
427  {
428  std::ostringstream buffer;
429 
430  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
431  << "void set_inverse_Hessian_approximation_method(const std::string&) method.\n"
432  << "Unknown inverse Hessian approximation method: " << new_inverse_Hessian_approximation_method_name << ".\n";
433 
434  throw std::logic_error(buffer.str());
435  }
436 }
437 
438 
439 // void set_reserve_all_training_history(bool) method
440 
444 
445 void QuasiNewtonMethod::set_reserve_all_training_history(const bool& new_reserve_all_training_history)
446 {
447  reserve_elapsed_time_history = new_reserve_all_training_history;
448  reserve_parameters_history = new_reserve_all_training_history;
449  reserve_parameters_norm_history = new_reserve_all_training_history;
450  reserve_performance_history = new_reserve_all_training_history;
451  reserve_gradient_history = new_reserve_all_training_history;
452  reserve_gradient_norm_history = new_reserve_all_training_history;
453  reserve_training_direction_history = new_reserve_all_training_history;
454  reserve_training_rate_history = new_reserve_all_training_history;
455 }
456 
457 
458 // void set_default(void) method
459 
461 {
463 
465 
466  // TRAINING PARAMETERS
467 
468  warning_parameters_norm = 1.0e3;
469  warning_gradient_norm = 1.0e3;
470  warning_training_rate = 1.0e3;
471 
472  error_parameters_norm = 1.0e6;
473  error_gradient_norm = 1.0e6;
474  error_training_rate = 1.0e6;
475 
476  // STOPPING CRITERIA
477 
479 
481  performance_goal = -1.0e99;
482  gradient_norm_goal = 0.0;
484 
486  maximum_time = 1000.0;
487 
488  // TRAINING HISTORY
489 
492 
494  reserve_gradient_history = false;
498 
502 
503  // UTILITIES
504 
505  display = true;
506  display_period = 10;
507 }
508 
509 
510 // void set_warning_parameters_norm(const double&) method
511 
515 
516 void QuasiNewtonMethod::set_warning_parameters_norm(const double& new_warning_parameters_norm)
517 {
518  // Control sentence (if debug)
519 
520  #ifndef NDEBUG
521 
522  if(new_warning_parameters_norm < 0.0)
523  {
524  std::ostringstream buffer;
525 
526  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
527  << "void set_warning_parameters_norm(const double&) method.\n"
528  << "Warning parameters norm must be equal or greater than 0.\n";
529 
530  throw std::logic_error(buffer.str());
531  }
532 
533  #endif
534 
535  // Set warning parameters norm
536 
537  warning_parameters_norm = new_warning_parameters_norm;
538 }
539 
540 
541 // void set_warning_gradient_norm(const double&) method
542 
546 
547 void QuasiNewtonMethod::set_warning_gradient_norm(const double& new_warning_gradient_norm)
548 {
549  // Control sentence (if debug)
550 
551  #ifndef NDEBUG
552 
553  if(new_warning_gradient_norm < 0.0)
554  {
555  std::ostringstream buffer;
556 
557  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
558  << "void set_warning_gradient_norm(const double&) method.\n"
559  << "Warning gradient norm must be equal or greater than 0.\n";
560 
561  throw std::logic_error(buffer.str());
562  }
563 
564  #endif
565 
566  // Set warning gradient norm
567 
568  warning_gradient_norm = new_warning_gradient_norm;
569 }
570 
571 
572 // void set_warning_training_rate(const double&) method
573 
577 
578 void QuasiNewtonMethod::set_warning_training_rate(const double& new_warning_training_rate)
579 {
580  // Control sentence (if debug)
581 
582  #ifndef NDEBUG
583 
584  if(new_warning_training_rate < 0.0)
585  {
586  std::ostringstream buffer;
587 
588  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
589  << "void set_warning_training_rate(const double&) method.\n"
590  << "Warning training rate must be equal or greater than 0.\n";
591 
592  throw std::logic_error(buffer.str());
593  }
594 
595  #endif
596 
597  warning_training_rate = new_warning_training_rate;
598 }
599 
600 
601 // void set_error_parameters_norm(const double&) method
602 
606 
607 void QuasiNewtonMethod::set_error_parameters_norm(const double& new_error_parameters_norm)
608 {
609  // Control sentence (if debug)
610 
611  #ifndef NDEBUG
612 
613  if(new_error_parameters_norm < 0.0)
614  {
615  std::ostringstream buffer;
616 
617  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
618  << "void set_error_parameters_norm(const double&) method.\n"
619  << "Error parameters norm must be equal or greater than 0.\n";
620 
621  throw std::logic_error(buffer.str());
622  }
623 
624  #endif
625 
626  // Set error parameters norm
627 
628  error_parameters_norm = new_error_parameters_norm;
629 }
630 
631 
632 // void set_error_gradient_norm(const double&) method
633 
637 
638 void QuasiNewtonMethod::set_error_gradient_norm(const double& new_error_gradient_norm)
639 {
640  // Control sentence (if debug)
641 
642  #ifndef NDEBUG
643 
644  if(new_error_gradient_norm < 0.0)
645  {
646  std::ostringstream buffer;
647 
648  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
649  << "void set_error_gradient_norm(const double&) method.\n"
650  << "Error gradient norm must be equal or greater than 0.\n";
651 
652  throw std::logic_error(buffer.str());
653  }
654 
655  #endif
656 
657  // Set error gradient norm
658 
659  error_gradient_norm = new_error_gradient_norm;
660 }
661 
662 
663 // void set_error_training_rate(const double&) method
664 
668 
669 void QuasiNewtonMethod::set_error_training_rate(const double& new_error_training_rate)
670 {
671  // Control sentence (if debug)
672 
673  #ifndef NDEBUG
674 
675  if(new_error_training_rate < 0.0)
676  {
677  std::ostringstream buffer;
678 
679  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
680  << "void set_error_training_rate(const double&) method.\n"
681  << "Error training rate must be equal or greater than 0.\n";
682 
683  throw std::logic_error(buffer.str());
684  }
685 
686  #endif
687 
688  // Set error training rate
689 
690  error_training_rate = new_error_training_rate;
691 }
692 
693 
694 // void set_minimum_parameters_increment_norm(const double&) method
695 
698 
699 void QuasiNewtonMethod::set_minimum_parameters_increment_norm(const double& new_minimum_parameters_increment_norm)
700 {
701  // Control sentence (if debug)
702 
703  #ifndef NDEBUG
704 
705  if(new_minimum_parameters_increment_norm < 0.0)
706  {
707  std::ostringstream buffer;
708 
709  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
710  << "void new_minimum_parameters_increment_norm(const double&) method.\n"
711  << "Minimum parameters increment norm must be equal or greater than 0.\n";
712 
713  throw std::logic_error(buffer.str());
714  }
715 
716  #endif
717 
718  // Set error training rate
719 
720  minimum_parameters_increment_norm = new_minimum_parameters_increment_norm;
721 }
722 
723 
724 // void set_minimum_performance_increase(const double&) method
725 
728 
729 void QuasiNewtonMethod::set_minimum_performance_increase(const double& new_minimum_performance_increase)
730 {
731  // Control sentence (if debug)
732 
733  #ifndef NDEBUG
734 
735  if(new_minimum_performance_increase < 0.0)
736  {
737  std::ostringstream buffer;
738 
739  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
740  << "void set_minimum_performance_increase(const double&) method.\n"
741  << "Minimum performance improvement must be equal or greater than 0.\n";
742 
743  throw std::logic_error(buffer.str());
744  }
745 
746  #endif
747 
748  // Set minimum performance improvement
749 
750  minimum_performance_increase = new_minimum_performance_increase;
751 }
752 
753 
754 // void set_performance_goal(const double&) method
755 
759 
760 void QuasiNewtonMethod::set_performance_goal(const double& new_performance_goal)
761 {
762  performance_goal = new_performance_goal;
763 }
764 
765 
766 // void set_gradient_norm_goal(const double&) method
767 
771 
772 void QuasiNewtonMethod::set_gradient_norm_goal(const double& new_gradient_norm_goal)
773 {
774  // Control sentence (if debug)
775 
776  #ifndef NDEBUG
777 
778  if(new_gradient_norm_goal < 0.0)
779  {
780  std::ostringstream buffer;
781 
782  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
783  << "void set_gradient_norm_goal(const double&) method.\n"
784  << "Gradient norm goal must be equal or greater than 0.\n";
785 
786  throw std::logic_error(buffer.str());
787  }
788 
789  #endif
790 
791  // Set gradient norm goal
792 
793  gradient_norm_goal = new_gradient_norm_goal;
794 }
795 
796 
797 // void set_maximum_generalization_performance_decreases(const size_t&) method
798 
801 
802 void QuasiNewtonMethod::set_maximum_generalization_performance_decreases(const size_t& new_maximum_generalization_performance_decreases)
803 {
804  // Set maximum generalization performace decrases
805 
806  maximum_generalization_performance_decreases = new_maximum_generalization_performance_decreases;
807 }
808 
809 
810 // void set_maximum_iterations_number(size_t) method
811 
814 
815 void QuasiNewtonMethod::set_maximum_iterations_number(const size_t& new_maximum_iterations_number)
816 {
817  // Set maximum iterations number
818 
819  maximum_iterations_number = new_maximum_iterations_number;
820 }
821 
822 
823 // void set_maximum_time(const double&) method
824 
827 
828 void QuasiNewtonMethod::set_maximum_time(const double& new_maximum_time)
829 {
830  // Control sentence (if debug)
831 
832  #ifndef NDEBUG
833 
834  if(new_maximum_time < 0.0)
835  {
836  std::ostringstream buffer;
837 
838  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
839  << "void set_maximum_time(const double&) method.\n"
840  << "Maximum time must be equal or greater than 0.\n";
841 
842  throw std::logic_error(buffer.str());
843  }
844 
845  #endif
846 
847  // Set maximum time
848 
849  maximum_time = new_maximum_time;
850 }
851 
852 
853 // void set_reserve_parameters_history(bool) method
854 
857 
858 void QuasiNewtonMethod::set_reserve_parameters_history(const bool& new_reserve_parameters_history)
859 {
860  reserve_parameters_history = new_reserve_parameters_history;
861 }
862 
863 
864 // void set_reserve_parameters_norm_history(bool) method
865 
868 
869 void QuasiNewtonMethod::set_reserve_parameters_norm_history(const bool& new_reserve_parameters_norm_history)
870 {
871  reserve_parameters_norm_history = new_reserve_parameters_norm_history;
872 }
873 
874 
875 // void set_reserve_performance_history(bool) method
876 
879 
880 void QuasiNewtonMethod::set_reserve_performance_history(const bool& new_reserve_performance_history)
881 {
882  reserve_performance_history = new_reserve_performance_history;
883 }
884 
885 
886 // void set_reserve_gradient_history(bool) method
887 
890 
891 void QuasiNewtonMethod::set_reserve_gradient_history(const bool& new_reserve_gradient_history)
892 {
893  reserve_gradient_history = new_reserve_gradient_history;
894 }
895 
896 
897 // void set_reserve_gradient_norm_history(bool) method
898 
902 
903 void QuasiNewtonMethod::set_reserve_gradient_norm_history(const bool& new_reserve_gradient_norm_history)
904 {
905  reserve_gradient_norm_history = new_reserve_gradient_norm_history;
906 }
907 
908 
909 // void set_reserve_inverse_Hessian_history(bool) method
910 
914 
915 void QuasiNewtonMethod::set_reserve_inverse_Hessian_history(const bool& new_reserve_inverse_Hessian_history)
916 {
917  reserve_inverse_Hessian_history = new_reserve_inverse_Hessian_history;
918 }
919 
920 
921 // void set_reserve_training_direction_history(bool) method
922 
926 
927 void QuasiNewtonMethod::set_reserve_training_direction_history(const bool& new_reserve_training_direction_history)
928 {
929  reserve_training_direction_history = new_reserve_training_direction_history;
930 }
931 
932 
933 // void set_reserve_training_rate_history(bool) method
934 
938 
939 void QuasiNewtonMethod::set_reserve_training_rate_history(const bool& new_reserve_training_rate_history)
940 {
941  reserve_training_rate_history = new_reserve_training_rate_history;
942 }
943 
944 
945 // void set_reserve_elapsed_time_history(bool) method
946 
950 
951 void QuasiNewtonMethod::set_reserve_elapsed_time_history(const bool& new_reserve_elapsed_time_history)
952 {
953  reserve_elapsed_time_history = new_reserve_elapsed_time_history;
954 }
955 
956 
957 // void set_reserve_generalization_performance_history(bool) method
958 
962 
963 void QuasiNewtonMethod::set_reserve_generalization_performance_history(const bool& new_reserve_generalization_performance_history)
964 {
965  reserve_generalization_performance_history = new_reserve_generalization_performance_history;
966 }
967 
968 
969 // void set_display_period(const size_t&) method
970 
974 
975 void QuasiNewtonMethod::set_display_period(const size_t& new_display_period)
976 {
977  // Control sentence (if debug)
978 
979  #ifndef NDEBUG
980 
981  if(new_display_period == 0)
982  {
983  std::ostringstream buffer;
984 
985  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
986  << "void set_display_period(const size_t&) method.\n"
987  << "Display period must be greater than 0.\n";
988 
989  throw std::logic_error(buffer.str());
990  }
991 
992  #endif
993 
994  display_period = new_display_period;
995 }
996 
997 
998 // Vector<double> calculate_inverse_Hessian_approximation(
999 // const Vector<double>&, const Vector<double>&,
1000 // const Vector<double>&, const Vector<double>&,
1001 // const Matrix<double>&) method
1002 
1009 
1011 const Vector<double>& old_parameters, const Vector<double>& parameters,
1012 const Vector<double>& old_gradient, const Vector<double>& gradient,
1013 const Matrix<double>& old_inverse_Hessian) const
1014 {
1016  {
1017  case DFP:
1018  {
1019  return(calculate_DFP_inverse_Hessian(old_parameters, parameters, old_gradient, gradient, old_inverse_Hessian));
1020  }
1021  break;
1022 
1023  case BFGS:
1024  {
1025  return(calculate_BFGS_inverse_Hessian(old_parameters, parameters, old_gradient, gradient, old_inverse_Hessian));
1026  }
1027  break;
1028 
1029  default:
1030  {
1031  std::ostringstream buffer;
1032 
1033  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1034  << "Vector<double> calculate_inverse_Hessian_approximation(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1035  << "Unknown inverse Hessian approximation method.\n";
1036 
1037  throw std::logic_error(buffer.str());
1038  }
1039  break;
1040  }
1041 }
1042 
1043 
1044 // Vector<double> calculate_training_direction(const Vector<double>&, const Matrix<double>&) const method
1045 
1049 
1050 Vector<double> QuasiNewtonMethod::calculate_training_direction(const Vector<double>& gradient, const Matrix<double>& inverse_Hessian_approximation) const
1051 {
1052  return((inverse_Hessian_approximation.dot(gradient)*(-1.0)).calculate_normalized());
1053 }
1054 
1055 
1056 // Vector<double> calculate_gradient_descent_training_direction(const Vector<double>&) const method
1057 
1060 
1062 {
1063  // Control sentence (if debug)
1064 
1065  #ifndef NDEBUG
1066 
1067  std::ostringstream buffer;
1068 
1070  {
1071  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1072  << "Vector<double> calculate_gradient_descent_training_direction(const Vector<double>&) const method.\n"
1073  << "Performance functional pointer is NULL.\n";
1074 
1075  throw std::logic_error(buffer.str());
1076  }
1077 
1078  #endif
1079 
1080 
1081  #ifndef NDEBUG
1082 
1083  const NeuralNetwork* neural_network_pointer = performance_functional_pointer->get_neural_network_pointer();
1084 
1085  const size_t gradient_size = gradient.size();
1086  const size_t parameters_number = neural_network_pointer->count_parameters_number();
1087 
1088  if(gradient_size != parameters_number)
1089  {
1090  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1091  << "Vector<double> calculate_gradient_descent_training_direction(const Vector<double>&) const method.\n"
1092  << "Size of gradient (" << gradient_size << ") is not equal to number of parameters (" << parameters_number << ").\n";
1093 
1094  throw std::logic_error(buffer.str());
1095  }
1096 
1097  #endif
1098 
1099  return(gradient.calculate_normalized()*(-1.0));
1100 }
1101 
1102 
1103 // Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method
1104 
1112 
1114 const Vector<double>& old_parameters, const Vector<double>& parameters, const Vector<double>& old_gradient, const Vector<double>& gradient, const Matrix<double>& old_inverse_Hessian) const
1115 {
1116  std::ostringstream buffer;
1117 
1118  // Control sentence (if debug)
1119 
1120  #ifndef NDEBUG
1121 
1122  const NeuralNetwork* neural_network_pointer = performance_functional_pointer->get_neural_network_pointer();
1123 
1124  const size_t parameters_number = neural_network_pointer->count_parameters_number();
1125 
1126  const size_t old_parameters_size = old_parameters.size();
1127  const size_t parameters_size = parameters.size();
1128 
1129  if(old_parameters_size != parameters_number)
1130  {
1131  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1132  << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1133  << "Size of old parameters vector must be equal to number of parameters.\n";
1134 
1135  throw std::logic_error(buffer.str());
1136  }
1137  else if(parameters_size != parameters_number)
1138  {
1139  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1140  << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1141  << "Size of parameters vector must be equal to number of parameters.\n";
1142 
1143  throw std::logic_error(buffer.str());
1144  }
1145 
1146  const size_t old_gradient_size = old_gradient.size();
1147  const size_t gradient_size = gradient.size();
1148 
1149  if(old_gradient_size != parameters_number)
1150  {
1151  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1152  << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1153  << "Size of old gradient vector must be equal to number of parameters.\n";
1154 
1155  throw std::logic_error(buffer.str());
1156  }
1157  else if(gradient_size != parameters_number)
1158  {
1159  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1160  << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1161  << "Size of gradient vector must be equal to number of parameters.\n";
1162 
1163  throw std::logic_error(buffer.str());
1164  }
1165 
1166  const size_t rows_number = old_inverse_Hessian.get_rows_number();
1167  const size_t columns_number = old_inverse_Hessian.get_columns_number();
1168 
1169  if(rows_number != parameters_number)
1170  {
1171  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1172  << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1173  << "Number of rows in old inverse Hessian must be equal to number of parameters.\n";
1174 
1175  throw std::logic_error(buffer.str());
1176  }
1177  else if(columns_number != parameters_number)
1178  {
1179  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1180  << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1181  << "Number of columns in old inverse Hessian must be equal to number of parameters.\n";
1182 
1183  throw std::logic_error(buffer.str());
1184  }
1185 
1186  #endif
1187 
1188  // Parameters difference Vector
1189 
1190  const Vector<double> parameters_difference = parameters - old_parameters;
1191 
1192  // Control sentence (if debug)
1193 
1194  if(parameters_difference.calculate_absolute_value() < 1.0e-99)
1195  {
1196  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1197  << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1198  << "Parameters difference vector is zero.\n";
1199 
1200  throw std::logic_error(buffer.str());
1201  }
1202 
1203  // Gradient difference Vector
1204 
1205  const Vector<double> gradient_difference = gradient - old_gradient;
1206 
1207  if(gradient_difference.calculate_absolute_value() < 1.0e-50)
1208  {
1209  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1210  << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1211  << "Gradient difference vector is zero.\n";
1212 
1213  throw std::logic_error(buffer.str());
1214  }
1215 
1216  if(old_inverse_Hessian.calculate_absolute_value() < 1.0e-50)
1217  {
1218  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1219  << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1220  << "Old inverse Hessian matrix is zero.\n";
1221 
1222  throw std::logic_error(buffer.str());
1223  }
1224 
1225  if(fabs(parameters_difference.dot(gradient_difference)) < 1.0e-50)
1226  {
1227  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1228  << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1229  << "Denominator of first term is zero.\n";
1230 
1231  throw std::logic_error(buffer.str());
1232  }
1233  else if(fabs(gradient_difference.dot(old_inverse_Hessian).dot(gradient_difference)) < 1.0e-50)
1234  {
1235  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1236  << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1237  << "Denominator of second term is zero.\n";
1238 
1239  throw std::logic_error(buffer.str());
1240  }
1241 
1242  Matrix<double> inverse_Hessian_approximation = old_inverse_Hessian;
1243 
1244  inverse_Hessian_approximation += parameters_difference.direct(parameters_difference)/parameters_difference.dot(gradient_difference);
1245 
1246  inverse_Hessian_approximation -= (old_inverse_Hessian.dot(gradient_difference)).direct(old_inverse_Hessian.dot(gradient_difference))
1247  /gradient_difference.dot(old_inverse_Hessian).dot(gradient_difference);
1248 
1249 
1250  return(inverse_Hessian_approximation);
1251 }
1252 
1253 
1254 // Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Matrix<double>&, const Vector<double>&, const Vector<double>&) method
1255 
1263 
1265 const Vector<double>& old_parameters, const Vector<double>& parameters, const Vector<double>& old_gradient, const Vector<double>& gradient, const Matrix<double>& old_inverse_Hessian) const
1266 {
1267 
1268  // Control sentence (if debug)
1269 
1270  #ifndef NDEBUG
1271 
1272  std::ostringstream buffer;
1273 
1274  const NeuralNetwork* neural_network_pointer = performance_functional_pointer->get_neural_network_pointer();
1275 
1276  const size_t parameters_number = neural_network_pointer->count_parameters_number();
1277 
1278  const size_t old_parameters_size = old_parameters.size();
1279  const size_t parameters_size = parameters.size();
1280 
1281  if(old_parameters_size != parameters_number)
1282  {
1283  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1284  << "Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1285  << "Size of old parameters vector must be equal to number of parameters.\n";
1286 
1287  throw std::logic_error(buffer.str());
1288  }
1289  else if(parameters_size != parameters_number)
1290  {
1291  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1292  << "Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1293  << "Size of parameters vector must be equal to number of parameters.\n";
1294 
1295  throw std::logic_error(buffer.str());
1296  }
1297 
1298  const size_t old_gradient_size = old_gradient.size();
1299 
1300  if(old_gradient_size != parameters_number)
1301  {
1302  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1303  << "Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method."
1304  << std::endl
1305  << "Size of old gradient vector must be equal to number of parameters.\n";
1306 
1307  throw std::logic_error(buffer.str());
1308  }
1309 
1310  const size_t gradient_size = gradient.size();
1311 
1312  if(gradient_size != parameters_number)
1313  {
1314  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1315  << "Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method."
1316  << std::endl
1317  << "Size of gradient vector must be equal to number of parameters.\n";
1318 
1319  throw std::logic_error(buffer.str());
1320  }
1321 
1322  const size_t rows_number = old_inverse_Hessian.get_rows_number();
1323  const size_t columns_number = old_inverse_Hessian.get_columns_number();
1324 
1325  if(rows_number != parameters_number)
1326  {
1327  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1328  << "Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1329  << "Number of rows in old inverse Hessian must be equal to number of parameters.\n";
1330 
1331  throw std::logic_error(buffer.str());
1332  }
1333 
1334  if(columns_number != parameters_number)
1335  {
1336  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1337  << "Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1338  << "Number of columns in old inverse Hessian must be equal to number of parameters.\n";
1339 
1340  throw std::logic_error(buffer.str());
1341  }
1342 
1343  #endif
1344 
1345 
1346  // Parameters difference Vector
1347 
1348  const Vector<double> parameters_difference = parameters - old_parameters;
1349 
1350 
1351  if(parameters_difference.calculate_absolute_value() < 1.0e-50)
1352  {
1353  std::ostringstream buffer;
1354 
1355  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1356  << "Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1357  << "Parameters difference vector is zero.\n";
1358 
1359  throw std::logic_error(buffer.str());
1360  }
1361 
1362  // Gradient difference Vector
1363 
1364  const Vector<double> gradient_difference = gradient - old_gradient;
1365 
1366  if(gradient_difference.calculate_absolute_value() < 1.0e-99)
1367  {
1368  std::ostringstream buffer;
1369 
1370  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1371  << "Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1372  << "Gradient difference vector is zero.\n";
1373 
1374  throw std::logic_error(buffer.str());
1375  }
1376 
1377  if(old_inverse_Hessian.calculate_absolute_value() < 1.0e-50)
1378  {
1379  std::ostringstream buffer;
1380 
1381  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1382  << "Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1383  << "Old inverse Hessian matrix is zero.\n";
1384 
1385  throw std::logic_error(buffer.str());
1386  }
1387 
1388 
1389  // BGFS Vector
1390 
1391  const Vector<double> BFGS = parameters_difference/parameters_difference.dot(gradient_difference)
1392  - old_inverse_Hessian.dot(gradient_difference)
1393  /gradient_difference.dot(old_inverse_Hessian).dot(gradient_difference);
1394 
1395  // Calculate inverse Hessian approximation
1396 
1397  Matrix<double> inverse_Hessian_approximation = old_inverse_Hessian;
1398 
1399  inverse_Hessian_approximation += parameters_difference.direct(parameters_difference)/parameters_difference.dot(gradient_difference);
1400 
1401  inverse_Hessian_approximation -= (old_inverse_Hessian.dot(gradient_difference)).direct(gradient_difference.dot(old_inverse_Hessian))
1402  /gradient_difference.dot(old_inverse_Hessian).dot(gradient_difference);
1403 
1404  inverse_Hessian_approximation += (BFGS.direct(BFGS))*(gradient_difference.dot(old_inverse_Hessian).dot(gradient_difference));
1405 
1406  return(inverse_Hessian_approximation);
1407 }
1408 
1409 
1410 // QuasiNewtonMethod* get_quasi_Newton_method_pointer(void) const method
1411 
1413 
1415 {
1417 }
1418 
1419 
1420 // void set_quasi_Newton_method_pointer(QuasiNewtonMethod*) method
1421 
1423 
1425 {
1426  quasi_Newton_method_pointer = new_quasi_Newton_method_pointer;
1427 }
1428 
1429 
1430 // void resize_training_history(const size_t&) method
1431 
1434 
1436 {
1437  // Control sentence (if debug)
1438 
1439  #ifndef NDEBUG
1440 
1441  if(quasi_Newton_method_pointer == NULL)
1442  {
1443  std::ostringstream buffer;
1444 
1445  buffer << "OpenNN Exception: QuasiNewtonMethodResults structure.\n"
1446  << "void resize_training_history(const size_t&) method.\n"
1447  << "Quasi-Newton method pointer is NULL.\n";
1448 
1449  throw std::logic_error(buffer.str());
1450  }
1451 
1452  #endif
1453 
1454  if(quasi_Newton_method_pointer->get_reserve_parameters_history())
1455  {
1456  parameters_history.resize(new_size);
1457  }
1458 
1459  if(quasi_Newton_method_pointer->get_reserve_parameters_norm_history())
1460  {
1461  parameters_norm_history.resize(new_size);
1462  }
1463 
1464 
1465  if(quasi_Newton_method_pointer->get_reserve_performance_history())
1466  {
1467  performance_history.resize(new_size);
1468  }
1469 
1470  if(quasi_Newton_method_pointer->get_reserve_generalization_performance_history())
1471  {
1472  generalization_performance_history.resize(new_size);
1473  }
1474 
1475  if(quasi_Newton_method_pointer->get_reserve_gradient_history())
1476  {
1477  gradient_history.resize(new_size);
1478  }
1479 
1480  if(quasi_Newton_method_pointer->get_reserve_gradient_norm_history())
1481  {
1482  gradient_norm_history.resize(new_size);
1483  }
1484 
1485  if(quasi_Newton_method_pointer->get_reserve_inverse_Hessian_history())
1486  {
1487  inverse_Hessian_history.resize(new_size);
1488  }
1489 
1490  if(quasi_Newton_method_pointer->get_reserve_training_direction_history())
1491  {
1492  training_direction_history.resize(new_size);
1493  }
1494 
1495  if(quasi_Newton_method_pointer->get_reserve_training_rate_history())
1496  {
1497  training_rate_history.resize(new_size);
1498  }
1499 
1500  if(quasi_Newton_method_pointer->get_reserve_elapsed_time_history())
1501  {
1502  elapsed_time_history.resize(new_size);
1503  }
1504 }
1505 
1506 
1507 // std::string to_string(void) const method
1508 
1510 
1512 {
1513  std::ostringstream buffer;
1514 
1515  buffer << "% Quasi-Newton method results\n";
1516 
1517  // Parameters history
1518 
1519  if(!parameters_history.empty())
1520  {
1521  if(!parameters_history[0].empty())
1522  {
1523  buffer << "% Parameters history:\n"
1524  << parameters_history << "\n";
1525  }
1526  }
1527 
1528  // Parameters norm history
1529 
1530  if(!parameters_norm_history.empty())
1531  {
1532  buffer << "% Parameters norm history:\n"
1533  << parameters_norm_history << "\n";
1534  }
1535 
1536  // Performance history
1537 
1538  if(!performance_history.empty())
1539  {
1540  buffer << "% Performance history:\n"
1541  << performance_history << "\n";
1542  }
1543 
1544  // Generalization performance history
1545 
1546  if(!generalization_performance_history.empty())
1547  {
1548  buffer << "% Generalization performance history:\n"
1549  << generalization_performance_history << "\n";
1550  }
1551 
1552  // Gradient history
1553 
1554  if(!gradient_history.empty())
1555  {
1556  if(!gradient_history[0].empty())
1557  {
1558  buffer << "% Gradient history:\n"
1559  << gradient_history << "\n";
1560  }
1561  }
1562 
1563  // Gradient norm history
1564 
1565  if(!gradient_norm_history.empty())
1566  {
1567  buffer << "% Gradient norm history:\n"
1568  << gradient_norm_history << "\n";
1569  }
1570 
1571  // Inverse Hessian history
1572 
1573  if(!inverse_Hessian_history.empty())
1574  {
1575  if(!inverse_Hessian_history[0].empty())
1576  {
1577  buffer << "% Inverse Hessian history:\n"
1578  << inverse_Hessian_history << "\n";
1579  }
1580  }
1581 
1582  // Training direction history
1583 
1584  if(!training_direction_history.empty())
1585  {
1586  if(!training_direction_history[0].empty())
1587  {
1588  buffer << "% Training direction history:\n"
1589  << training_direction_history << "\n";
1590  }
1591  }
1592 
1593  // Training rate history
1594 
1595  if(!training_rate_history.empty())
1596  {
1597  buffer << "% Training rate history:\n"
1598  << training_rate_history << "\n";
1599  }
1600 
1601  // Elapsed time history
1602 
1603  if(!elapsed_time_history.empty())
1604  {
1605  buffer << "% Elapsed time history:\n"
1606  << elapsed_time_history << "\n";
1607  }
1608 
1609  return(buffer.str());
1610 }
1611 
1612 
1613 // Matrix<std::string> write_final_results(const size_t& precision) const method
1614 
1616 {
1617  std::ostringstream buffer;
1618 
1619  Vector<std::string> names;
1620  Vector<std::string> values;
1621 
1622  // Final parameters norm
1623 
1624  names.push_back("Final parameters norm");
1625 
1626  buffer.str("");
1627  buffer << std::setprecision(precision) << final_parameters_norm;
1628 
1629  values.push_back(buffer.str());
1630 
1631  // Final performance
1632 
1633  names.push_back("Final performance");
1634 
1635  buffer.str("");
1636  buffer << std::setprecision(precision) << final_performance;
1637 
1638  values.push_back(buffer.str());
1639 
1640  // Final generalization performance
1641 
1642  const PerformanceFunctional* performance_functional_pointer = quasi_Newton_method_pointer->get_performance_functional_pointer();
1643 
1644  if(performance_functional_pointer->has_generalization())
1645  {
1646  names.push_back("Final generalization performance");
1647 
1648  buffer.str("");
1649  buffer << std::setprecision(precision) << final_generalization_performance;
1650 
1651  values.push_back(buffer.str());
1652  }
1653 
1654  // Final gradient norm
1655 
1656  names.push_back("Final gradient norm");
1657 
1658  buffer.str("");
1659  buffer << std::setprecision(precision) << final_gradient_norm;
1660 
1661  values.push_back(buffer.str());
1662 
1663  // Final training rate
1664 
1665 // names.push_back("Final training rate");
1666 
1667 // buffer.str("");
1668 // buffer << std::setprecision(precision) << final_training_rate;
1669 
1670 // values.push_back(buffer.str());
1671 
1672  // Iterations number
1673 
1674  names.push_back("Iterations number");
1675 
1676  buffer.str("");
1677  buffer << iterations_number;
1678 
1679  values.push_back(buffer.str());
1680 
1681  // Elapsed time
1682 
1683  names.push_back("Elapsed time");
1684 
1685  buffer.str("");
1686  buffer << elapsed_time;
1687 
1688  values.push_back(buffer.str());
1689 
1690  const size_t rows_number = names.size();
1691  const size_t columns_number = 2;
1692 
1693  Matrix<std::string> final_results(rows_number, columns_number);
1694 
1695  final_results.set_column(0, names);
1696  final_results.set_column(1, values);
1697 
1698  return(final_results);
1699 }
1700 
1701 
1702 // QuasiNewtonMethodResults* perform_training(void) method
1703 
1706 
1708 {
1709  // Control sentence (if debug)
1710 
1711  #ifndef NDEBUG
1712 
1713  check();
1714 
1715  #endif
1716 
1717  // Start training
1718 
1719  if(display)
1720  {
1721  std::cout << "Training with quasi-Newton method...\n";
1722  }
1723 
1724  QuasiNewtonMethodResults* results_pointer = new QuasiNewtonMethodResults(this);
1725 
1727 
1728  // Neural network stuff
1729 
1731 
1732  const size_t parameters_number = neural_network_pointer->count_parameters_number();
1733 
1734  Vector<double> parameters(parameters_number);
1735  Vector<double> old_parameters(parameters_number);
1736  double parameters_norm;
1737 
1738  Vector<double> parameters_increment(parameters_number);
1739  double parameters_increment_norm;
1740 
1741  // Performance functional stuff
1742 
1743  double performance = 0.0;
1744  double old_performance = 0.0;
1745  double performance_increase = 0.0;
1746 
1747  Vector<double> gradient(parameters_number);
1748  Vector<double> old_gradient(parameters_number);
1749  double gradient_norm;
1750 
1751  Matrix<double> inverse_Hessian(parameters_number, parameters_number);
1752  Matrix<double> old_inverse_Hessian(parameters_number, parameters_number);
1753 
1754  double generalization_performance = 0.0;
1755  double old_generalization_performance = 0.0;
1756 
1757  // Training algorithm stuff
1758 
1759  Vector<double> training_direction(parameters_number);
1760 
1761  double training_slope;
1762 
1763  // const double& first_training_rate = training_rate_algorithm.get_first_training_rate();
1764  const double first_training_rate = 0.01;
1765 
1766  double initial_training_rate = 0.0;
1767  double training_rate = 0.0;
1768  double old_training_rate = 0.0;
1769 
1770  Vector<double> directional_point(2);
1771  directional_point[0] = 0.0;
1772  directional_point[1] = 0.0;
1773 
1774  bool stop_training = false;
1775 
1776  size_t generalization_failures = 0;
1777 
1778  time_t beginning_time, current_time;
1779  time(&beginning_time);
1780  double elapsed_time;
1781 
1782  size_t iteration;
1783 
1784  // Main loop
1785 
1786  for(iteration = 0; iteration <= maximum_iterations_number; iteration++)
1787  {
1788 
1789  // Neural network
1790 
1791  parameters = neural_network_pointer->arrange_parameters();
1792 
1793  parameters_norm = parameters.calculate_norm();
1794 
1795  if(display && parameters_norm >= warning_parameters_norm)
1796  {
1797  std::cout << "OpenNN Warning: Parameters norm is " << parameters_norm << ".\n";
1798  }
1799 
1800  // Performance functional stuff
1801 
1802  if(iteration == 0)
1803  {
1805  performance_increase = 0.0;
1806  }
1807  else
1808  {
1809  performance = directional_point[1];
1810  performance_increase = old_performance - performance;
1811  }
1812 
1814 
1815  gradient_norm = gradient.calculate_norm();
1816 
1817  if(display && gradient_norm >= warning_gradient_norm)
1818  {
1819  std::cout << "OpenNN Warning: Gradient norm is " << gradient_norm << ".\n";
1820  }
1821 
1822  if(iteration == 0
1823  || (old_parameters - parameters).calculate_absolute_value() < 1.0e-99
1824  || (old_gradient - gradient).calculate_absolute_value() < 1.0e-99)
1825  {
1826  inverse_Hessian.initialize_identity();
1827  }
1828  else
1829  {
1830  inverse_Hessian = calculate_inverse_Hessian_approximation(old_parameters, parameters, old_gradient, gradient, old_inverse_Hessian);
1831  }
1832 
1834 
1835  if(iteration != 0 && generalization_performance > old_generalization_performance)
1836  {
1837  generalization_failures++;
1838  }
1839 
1840  // Training algorithm
1841 
1842  training_direction = calculate_training_direction(gradient, inverse_Hessian);
1843 
1844  // Calculate performance training slope
1845 
1846  training_slope = (gradient/gradient_norm).dot(training_direction);
1847 
1848  // Check for a descent direction
1849 
1850  if(training_slope >= 0.0)
1851  {
1852  // Reset training direction
1853 
1854  training_direction = calculate_gradient_descent_training_direction(gradient);
1855  }
1856 
1857  // Get initial training rate
1858 
1859  if(iteration == 0)
1860  {
1861  initial_training_rate = first_training_rate;
1862  }
1863  else
1864  {
1865  initial_training_rate = old_training_rate;
1866  }
1867 
1868  directional_point = training_rate_algorithm.calculate_directional_point(performance, training_direction, initial_training_rate);
1869 
1870  training_rate = directional_point[0];
1871 
1872  // Reset training direction when training rate is 0
1873 
1874  if(iteration != 0 && training_rate < 1.0e-99)
1875  {
1876  training_direction = calculate_gradient_descent_training_direction(gradient);
1877 
1878  directional_point = training_rate_algorithm.calculate_directional_point(performance, training_direction, first_training_rate);
1879 
1880  training_rate = directional_point[0];
1881  }
1882 
1883  parameters_increment = training_direction*training_rate;
1884  parameters_increment_norm = parameters_increment.calculate_norm();
1885 
1886  // Elapsed time
1887 
1888  time(&current_time);
1889  elapsed_time = difftime(current_time, beginning_time);
1890 
1891  // Training history neural neural network
1892 
1894  {
1895  results_pointer->parameters_history[iteration] = parameters;
1896  }
1897 
1899  {
1900  results_pointer->parameters_norm_history[iteration] = parameters_norm;
1901  }
1902 
1904  {
1905  results_pointer->performance_history[iteration] = performance;
1906  }
1907 
1909  {
1910  results_pointer->generalization_performance_history[iteration] = generalization_performance;
1911  }
1912 
1914  {
1915  results_pointer->gradient_history[iteration] = gradient;
1916  }
1917 
1919  {
1920  results_pointer->gradient_norm_history[iteration] = gradient_norm;
1921  }
1922 
1924  {
1925  results_pointer->inverse_Hessian_history[iteration] = inverse_Hessian;
1926  }
1927 
1928  // Training history training algorithm
1929 
1931  {
1932  results_pointer->training_direction_history[iteration] = training_direction;
1933  }
1934 
1936  {
1937  results_pointer->training_rate_history[iteration] = training_rate;
1938  }
1939 
1941  {
1942  results_pointer->elapsed_time_history[iteration] = elapsed_time;
1943  }
1944 
1945  // Stopping Criteria
1946 
1947  if(parameters_increment_norm <= minimum_parameters_increment_norm)
1948  {
1949  if(display)
1950  {
1951  std::cout << "Iteration " << iteration << ": Minimum parameters increment norm reached.\n"
1952  << "Parameters increment norm: " << parameters_increment_norm << std::endl;
1953  }
1954 
1955  stop_training = true;
1956  }
1957 
1958  if(iteration != 0 && performance_increase <= minimum_performance_increase)
1959  {
1960  if(display)
1961  {
1962  std::cout << "Iteration " << iteration << ": Minimum performance increase reached.\n"
1963  << "Performance increase: " << performance_increase << std::endl;
1964  }
1965 
1966  stop_training = true;
1967  }
1968 
1969  else if(performance <= performance_goal)
1970  {
1971  if(display)
1972  {
1973  std::cout << "Iteration " << iteration << ": Performance goal reached.\n";
1974  }
1975 
1976  stop_training = true;
1977  }
1978 
1979  else if(gradient_norm <= gradient_norm_goal)
1980  {
1981  if(display)
1982  {
1983  std::cout << "Iteration " << iteration << ": Gradient norm goal reached.\n";
1984  }
1985 
1986  stop_training = true;
1987  }
1988 
1989  else if(generalization_failures >= maximum_generalization_performance_decreases)
1990  {
1991  if(display)
1992  {
1993  std::cout << "Iteration " << iteration << ": Maximum generalization performance decreases reached.\n"
1994  << "Generalization performance decreases: "<< generalization_failures << std::endl;
1995  }
1996 
1997  stop_training = true;
1998  }
1999 
2000  else if(iteration == maximum_iterations_number)
2001  {
2002  if(display)
2003  {
2004  std::cout << "Iteration " << iteration << ": Maximum number of iterations reached.\n";
2005  }
2006 
2007  stop_training = true;
2008  }
2009 
2010  else if(elapsed_time >= maximum_time)
2011  {
2012  if(display)
2013  {
2014  std::cout << "Iteration " << iteration << ": Maximum training time reached.\n";
2015  }
2016 
2017  stop_training = true;
2018  }
2019 
2020  if(iteration != 0 && iteration % save_period == 0)
2021  {
2022  neural_network_pointer->save(neural_network_file_name);
2023  }
2024 
2025  if(stop_training)
2026  {
2027  results_pointer->final_parameters = parameters;
2028  results_pointer->final_parameters_norm = parameters_norm;
2029 
2030  results_pointer->final_performance = performance;
2031  results_pointer->final_generalization_performance = generalization_performance;
2032 
2033  results_pointer->final_gradient = gradient;
2034  results_pointer->final_gradient_norm = gradient_norm;
2035 
2036  results_pointer->final_training_direction = training_direction;
2037  results_pointer->final_training_rate = training_rate;
2038  results_pointer->elapsed_time = elapsed_time;
2039 
2040  results_pointer->iterations_number = iteration;
2041 
2042  results_pointer->resize_training_history(iteration+1);
2043 
2044  if(display)
2045  {
2046  std::cout << "Parameters norm: " << parameters_norm << "\n"
2047  << "Performance: " << performance << "\n"
2048  << "Gradient norm: " << gradient_norm << "\n"
2050  << "Training rate: " << training_rate << "\n"
2051  << "Elapsed time: " << elapsed_time << std::endl;
2052 
2053  if(generalization_performance != 0)
2054  {
2055  std::cout << "Generalization performance: " << generalization_performance << std::endl;
2056  }
2057  }
2058 
2059  break;
2060  }
2061  else if(display && iteration % display_period == 0)
2062  {
2063  std::cout << "Iteration " << iteration << ";\n"
2064  << "Parameters norm: " << parameters_norm << "\n"
2065  << "Performance: " << performance << "\n"
2066  << "Gradient norm: " << gradient_norm << "\n"
2068  << "Training rate: " << training_rate << "\n"
2069  << "Elapsed time: " << elapsed_time << std::endl;
2070 
2071  if(generalization_performance != 0)
2072  {
2073  std::cout << "Generalization performance: " << generalization_performance << std::endl;
2074  }
2075  }
2076 
2077  // Update stuff
2078 
2079  old_parameters = parameters;
2080 
2081  old_performance = performance;
2082 
2083  old_gradient = gradient;
2084 
2085  old_inverse_Hessian = inverse_Hessian;
2086 
2087  old_generalization_performance = generalization_performance;
2088 
2089  old_training_rate = training_rate;
2090 
2091  // Set new parameters
2092 
2093  parameters += parameters_increment;
2094 
2095  neural_network_pointer->set_parameters(parameters);
2096 
2097  }
2098 
2099  results_pointer->final_parameters = parameters;
2100  results_pointer->final_parameters_norm = parameters_norm;
2101 
2102  results_pointer->final_performance = performance;
2103  results_pointer->final_generalization_performance = generalization_performance;
2104 
2105  results_pointer->final_gradient = gradient;
2106  results_pointer->final_gradient_norm = gradient_norm;
2107 
2108  results_pointer->final_training_direction = training_direction;
2109  results_pointer->final_training_rate = training_rate;
2110  results_pointer->elapsed_time = elapsed_time;
2111 
2112  results_pointer->iterations_number = iteration;
2113 
2114  results_pointer->resize_training_history(iteration+1);
2115 
2116  if(display)
2117  {
2118  std::cout << "Parameters norm: " << parameters_norm << "\n"
2119  << "Performance: " << performance << "\n"
2120  << "Gradient norm: " << gradient_norm << "\n"
2122  << "Training rate: " << training_rate << "\n"
2123  << "Elapsed time: " << elapsed_time << std::endl;
2124 
2125  if(generalization_performance != 0)
2126  {
2127  std::cout << "Generalization performance: " << generalization_performance << std::endl;
2128  }
2129  }
2130 
2131  return(results_pointer);
2132 }
2133 
2134 
2135 // std::string write_training_algorithm_type(void) const method
2136 
2138 {
2139  return("QUASI_NEWTON_METHOD");
2140 }
2141 
2142 
2143 // tinyxml2::XMLDocument* to_XML(void) const method
2144 
2148 
2149 tinyxml2::XMLDocument* QuasiNewtonMethod::to_XML(void) const
2150 {
2151  std::ostringstream buffer;
2152 
2153  tinyxml2::XMLDocument* document = new tinyxml2::XMLDocument;
2154 
2155  // Quasi-Newton method
2156 
2157  tinyxml2::XMLElement* root_element = document->NewElement("QuasiNewtonMethod");
2158 
2159  document->InsertFirstChild(root_element);
2160 
2161  tinyxml2::XMLElement* element = NULL;
2162  tinyxml2::XMLText* text = NULL;
2163 
2164  // Inverse Hessian approximation method
2165  {
2166  element = document->NewElement("InverseHessianApproximationMethod");
2167  root_element->LinkEndChild(element);
2168 
2169  text = document->NewText(write_inverse_Hessian_approximation_method().c_str());
2170  element->LinkEndChild(text);
2171  }
2172 
2173 
2174  // Training rate algorithm
2175  {
2176  tinyxml2::XMLElement* element = document->NewElement("TrainingRateAlgorithm");
2177  root_element->LinkEndChild(element);
2178 
2179  const tinyxml2::XMLDocument* training_rate_algorithm_document = training_rate_algorithm.to_XML();
2180 
2181  const tinyxml2::XMLElement* training_rate_algorithm_element = training_rate_algorithm_document->FirstChildElement("TrainingRateAlgorithm");
2182 
2183  DeepClone(element, training_rate_algorithm_element, document, NULL);
2184 
2185  delete training_rate_algorithm_document;
2186  }
2187 
2188  // Warning parameters norm
2189  {
2190  element = document->NewElement("WarningParametersNorm");
2191  root_element->LinkEndChild(element);
2192 
2193  buffer.str("");
2194  buffer << warning_parameters_norm;
2195 
2196  text = document->NewText(buffer.str().c_str());
2197  element->LinkEndChild(text);
2198  }
2199 
2200  // Warning gradient norm
2201  {
2202  element = document->NewElement("WarningGradientNorm");
2203  root_element->LinkEndChild(element);
2204 
2205  buffer.str("");
2206  buffer << warning_gradient_norm;
2207 
2208  text = document->NewText(buffer.str().c_str());
2209  element->LinkEndChild(text);
2210  }
2211 
2212  // Warning training rate
2213  {
2214  element = document->NewElement("WarningTrainingRate");
2215  root_element->LinkEndChild(element);
2216 
2217  buffer.str("");
2218  buffer << warning_training_rate;
2219 
2220  text = document->NewText(buffer.str().c_str());
2221  element->LinkEndChild(text);
2222  }
2223 
2224  // Error parameters norm
2225  {
2226  element = document->NewElement("ErrorParametersNorm");
2227  root_element->LinkEndChild(element);
2228 
2229  buffer.str("");
2230  buffer << error_parameters_norm;
2231 
2232  text = document->NewText(buffer.str().c_str());
2233  element->LinkEndChild(text);
2234  }
2235 
2236  // Error gradient norm
2237  {
2238  element = document->NewElement("ErrorGradientNorm");
2239  root_element->LinkEndChild(element);
2240 
2241  buffer.str("");
2242  buffer << error_gradient_norm;
2243 
2244  text = document->NewText(buffer.str().c_str());
2245  element->LinkEndChild(text);
2246  }
2247 
2248  // Error training rate
2249  {
2250  element = document->NewElement("ErrorTrainingRate");
2251  root_element->LinkEndChild(element);
2252 
2253  buffer.str("");
2254  buffer << error_training_rate;
2255 
2256  text = document->NewText(buffer.str().c_str());
2257  element->LinkEndChild(text);
2258  }
2259 
2260  // Minimum parameters increment norm
2261  {
2262  element = document->NewElement("MinimumParametersIncrementNorm");
2263  root_element->LinkEndChild(element);
2264 
2265  buffer.str("");
2267 
2268  text = document->NewText(buffer.str().c_str());
2269  element->LinkEndChild(text);
2270  }
2271 
2272  // Minimum performance increase
2273  {
2274  element = document->NewElement("MinimumPerformanceIncrease");
2275  root_element->LinkEndChild(element);
2276 
2277  buffer.str("");
2278  buffer << minimum_performance_increase;
2279 
2280  text = document->NewText(buffer.str().c_str());
2281  element->LinkEndChild(text);
2282  }
2283 
2284  // Performance goal
2285  {
2286  element = document->NewElement("PerformanceGoal");
2287  root_element->LinkEndChild(element);
2288 
2289  buffer.str("");
2290  buffer << performance_goal;
2291 
2292  text = document->NewText(buffer.str().c_str());
2293  element->LinkEndChild(text);
2294  }
2295 
2296  // Gradient norm goal
2297  {
2298  element = document->NewElement("GradientNormGoal");
2299  root_element->LinkEndChild(element);
2300 
2301  buffer.str("");
2302  buffer << gradient_norm_goal;
2303 
2304  text = document->NewText(buffer.str().c_str());
2305  element->LinkEndChild(text);
2306  }
2307 
2308  // Maximum generalization performance decreases
2309  {
2310  element = document->NewElement("MaximumGeneralizationPerformanceDecreases");
2311  root_element->LinkEndChild(element);
2312 
2313  buffer.str("");
2315 
2316  text = document->NewText(buffer.str().c_str());
2317  element->LinkEndChild(text);
2318  }
2319 
2320  // Maximum iterations number
2321  {
2322  element = document->NewElement("MaximumIterationsNumber");
2323  root_element->LinkEndChild(element);
2324 
2325  buffer.str("");
2326  buffer << maximum_iterations_number;
2327 
2328  text = document->NewText(buffer.str().c_str());
2329  element->LinkEndChild(text);
2330  }
2331 
2332  // Maximum time
2333  {
2334  element = document->NewElement("MaximumTime");
2335  root_element->LinkEndChild(element);
2336 
2337  buffer.str("");
2338  buffer << maximum_time;
2339 
2340  text = document->NewText(buffer.str().c_str());
2341  element->LinkEndChild(text);
2342  }
2343 
2344  // Reserve parameters history
2345  {
2346  element = document->NewElement("ReserveParametersHistory");
2347  root_element->LinkEndChild(element);
2348 
2349  buffer.str("");
2350  buffer << reserve_parameters_history;
2351 
2352  text = document->NewText(buffer.str().c_str());
2353  element->LinkEndChild(text);
2354  }
2355 
2356  // Reserve parameters norm history
2357  {
2358  element = document->NewElement("ReserveParametersNormHistory");
2359  root_element->LinkEndChild(element);
2360 
2361  buffer.str("");
2363 
2364  text = document->NewText(buffer.str().c_str());
2365  element->LinkEndChild(text);
2366  }
2367 
2368  // Reserve performance history
2369  {
2370  element = document->NewElement("ReservePerformanceHistory");
2371  root_element->LinkEndChild(element);
2372 
2373  buffer.str("");
2374  buffer << reserve_performance_history;
2375 
2376  text = document->NewText(buffer.str().c_str());
2377  element->LinkEndChild(text);
2378  }
2379 
2380  // Reserve generalization performance history
2381  {
2382  element = document->NewElement("ReserveGeneralizationPerformanceHistory");
2383  root_element->LinkEndChild(element);
2384 
2385  buffer.str("");
2387 
2388  text = document->NewText(buffer.str().c_str());
2389  element->LinkEndChild(text);
2390  }
2391 
2392 
2393  // Reserve gradient history
2394  {
2395  element = document->NewElement("ReserveGradientHistory");
2396  root_element->LinkEndChild(element);
2397 
2398  buffer.str("");
2399  buffer << reserve_gradient_history;
2400 
2401  text = document->NewText(buffer.str().c_str());
2402  element->LinkEndChild(text);
2403  }
2404 
2405  // Reserve gradient norm history
2406  {
2407  element = document->NewElement("ReserveGradientNormHistory");
2408  root_element->LinkEndChild(element);
2409 
2410  buffer.str("");
2412 
2413  text = document->NewText(buffer.str().c_str());
2414  element->LinkEndChild(text);
2415  }
2416 
2417  // Reserve inverse Hessian history
2418  {
2419  element = document->NewElement("ReserveInverseHessianHistory");
2420  root_element->LinkEndChild(element);
2421 
2422  buffer.str("");
2424 
2425  text = document->NewText(buffer.str().c_str());
2426  element->LinkEndChild(text);
2427  }
2428 
2429  // Reserve training direction history
2430  {
2431  element = document->NewElement("ReserveTrainingDirectionHistory");
2432  root_element->LinkEndChild(element);
2433 
2434  buffer.str("");
2436 
2437  text = document->NewText(buffer.str().c_str());
2438  element->LinkEndChild(text);
2439  }
2440 
2441  // Reserve training rate history
2442  {
2443  element = document->NewElement("ReserveTrainingRateHistory");
2444  root_element->LinkEndChild(element);
2445 
2446  buffer.str("");
2448 
2449  text = document->NewText(buffer.str().c_str());
2450  element->LinkEndChild(text);
2451  }
2452 
2453  // Reserve elapsed time history
2454  {
2455  element = document->NewElement("ReserveElapsedTimeHistory");
2456  root_element->LinkEndChild(element);
2457 
2458  buffer.str("");
2459  buffer << reserve_elapsed_time_history;
2460 
2461  text = document->NewText(buffer.str().c_str());
2462  element->LinkEndChild(text);
2463  }
2464 
2465  // Reserve generalization performance history
2466  {
2467  element = document->NewElement("ReserveGeneralizationPerformanceHistory");
2468  root_element->LinkEndChild(element);
2469 
2470  buffer.str("");
2472 
2473  text = document->NewText(buffer.str().c_str());
2474  element->LinkEndChild(text);
2475  }
2476 
2477  // Display period
2478  {
2479  element = document->NewElement("DisplayPeriod");
2480  root_element->LinkEndChild(element);
2481 
2482  buffer.str("");
2483  buffer << display_period;
2484 
2485  text = document->NewText(buffer.str().c_str());
2486  element->LinkEndChild(text);
2487  }
2488 
2489  // Save period
2490  {
2491  element = document->NewElement("SavePeriod");
2492  root_element->LinkEndChild(element);
2493 
2494  buffer.str("");
2495  buffer << save_period;
2496 
2497  text = document->NewText(buffer.str().c_str());
2498  element->LinkEndChild(text);
2499  }
2500 
2501  // Neural network file name
2502  {
2503  element = document->NewElement("NeuralNetworkFileName");
2504  root_element->LinkEndChild(element);
2505 
2506  text = document->NewText(neural_network_file_name.c_str());
2507  element->LinkEndChild(text);
2508  }
2509 
2510  // Display
2511  {
2512  element = document->NewElement("Display");
2513  root_element->LinkEndChild(element);
2514 
2515  buffer.str("");
2516  buffer << display;
2517 
2518  text = document->NewText(buffer.str().c_str());
2519  element->LinkEndChild(text);
2520  }
2521 
2522  return(document);
2523 }
2524 
2525 
2526 // std::string to_string(void) const method
2527 
2528 std::string QuasiNewtonMethod::to_string(void) const
2529 {
2530  std::ostringstream buffer;
2531 
2532  buffer << "Quasi-Newton method\n";
2533 
2534  return(buffer.str());
2535 }
2536 
2537 
2538 // Matrix<std::string> to_string_matrix(void) const method
2539 
2540 // the most representative
2541 
2543 {
2544  std::ostringstream buffer;
2545 
2546  Vector<std::string> labels;
2547  Vector<std::string> values;
2548 
2549  // Inverse Hessian approximation method
2550 
2551  labels.push_back("Inverse Hessian approximation method");
2552 
2553  const std::string inverse_Hessian_approximation_method_string = write_inverse_Hessian_approximation_method();
2554 
2555  values.push_back(inverse_Hessian_approximation_method_string);
2556 
2557  // Training rate method
2558 
2559  labels.push_back("Training rate method");
2560 
2561  const std::string training_rate_method = training_rate_algorithm.write_training_rate_method();
2562 
2563  values.push_back(training_rate_method);
2564 
2565  // Training rate tolerance
2566 
2567  labels.push_back("Training rate tolerance");
2568 
2569  buffer.str("");
2571 
2572  values.push_back(buffer.str());
2573 
2574  // Minimum parameters increment norm
2575 
2576  labels.push_back("Minimum parameters increment norm");
2577 
2578  buffer.str("");
2580 
2581  values.push_back(buffer.str());
2582 
2583  // Minimum performance increase
2584 
2585  labels.push_back("Minimum performance increase");
2586 
2587  buffer.str("");
2588  buffer << minimum_performance_increase;
2589 
2590  values.push_back(buffer.str());
2591 
2592  // Performance goal
2593 
2594  labels.push_back("Performance goal");
2595 
2596  buffer.str("");
2597  buffer << performance_goal;
2598 
2599  values.push_back(buffer.str());
2600 
2601  // Gradient norm goal
2602 
2603  labels.push_back("Gradient norm goal");
2604 
2605  buffer.str("");
2606  buffer << gradient_norm_goal;
2607 
2608  values.push_back(buffer.str());
2609 
2610  // Maximum generalization failures
2611 
2612  labels.push_back("Maximum generalization failures");
2613 
2614  buffer.str("");
2616 
2617  values.push_back(buffer.str());
2618 
2619  // Maximum iterations number
2620 
2621  labels.push_back("Maximum iterations number");
2622 
2623  buffer.str("");
2624  buffer << maximum_iterations_number;
2625 
2626  values.push_back(buffer.str());
2627 
2628  // Maximum time
2629 
2630  labels.push_back("Maximum time");
2631 
2632  buffer.str("");
2633  buffer << maximum_time;
2634 
2635  values.push_back(buffer.str());
2636 
2637  // Reserve parameters norm history
2638 
2639  labels.push_back("Reserve parameters norm history");
2640 
2641  buffer.str("");
2643 
2644  values.push_back(buffer.str());
2645 
2646  // Reserve performance history
2647 
2648  labels.push_back("Reserve performance history");
2649 
2650  buffer.str("");
2651  buffer << reserve_performance_history;
2652 
2653  values.push_back(buffer.str());
2654 
2655  // Reserve gradient norm history
2656 
2657  labels.push_back("Reserve gradient norm history");
2658 
2659  buffer.str("");
2661 
2662  values.push_back(buffer.str());
2663 
2664  // Reserve generalization performance history
2665 
2666  labels.push_back("Reserve generalization performance history");
2667 
2668  buffer.str("");
2670 
2671  values.push_back(buffer.str());
2672 
2673  // Reserve training direction norm history
2674 
2675 // labels.push_back("");
2676 
2677 // buffer.str("");
2678 // buffer << reserve_training_direction_norm_history;
2679 
2680  // Reserve training rate history
2681 
2682 // labels.push_back("");
2683 
2684 // buffer.str("");
2685 // buffer << reserve_training_rate_history;
2686 
2687 // values.push_back(buffer.str());
2688 
2689  // Reserve elapsed time history
2690 
2691  labels.push_back("Reserve elapsed time history");
2692 
2693  buffer.str("");
2694  buffer << reserve_elapsed_time_history;
2695 
2696  values.push_back(buffer.str());
2697 
2698  const size_t rows_number = labels.size();
2699  const size_t columns_number = 2;
2700 
2701  Matrix<std::string> string_matrix(rows_number, columns_number);
2702 
2703  string_matrix.set_column(0, labels);
2704  string_matrix.set_column(1, values);
2705 
2706  return(string_matrix);
2707 }
2708 
2709 
2710 // void from_XML(const tinyxml2::XMLDocument&) const method
2711 
2712 void QuasiNewtonMethod::from_XML(const tinyxml2::XMLDocument& document)
2713 {
2714  const tinyxml2::XMLElement* root_element = document.FirstChildElement("QuasiNewtonMethod");
2715 
2716  if(!root_element)
2717  {
2718  std::ostringstream buffer;
2719 
2720  buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
2721  << "void from_XML(const tinyxml2::XMLDocument&) method.\n"
2722  << "Quasi-Newton method element is NULL.\n";
2723 
2724  throw std::logic_error(buffer.str());
2725  }
2726 
2727  // Inverse Hessian approximation method
2728  {
2729  const tinyxml2::XMLElement* element = root_element->FirstChildElement("InverseHessianApproximationMethod");
2730 
2731  if(element)
2732  {
2733  const std::string new_inverse_Hessian_approximation_method = element->GetText();
2734 
2735  try
2736  {
2737  set_inverse_Hessian_approximation_method(new_inverse_Hessian_approximation_method);
2738  }
2739  catch(const std::logic_error& e)
2740  {
2741  std::cout << e.what() << std::endl;
2742  }
2743  }
2744  }
2745 
2746  // Inverse Hessian approximation method
2747  {
2748  const tinyxml2::XMLElement* element = root_element->FirstChildElement("InverseHessianApproximationMethod");
2749 
2750  if(element)
2751  {
2752  const std::string new_inverse_Hessian_approximation_method = element->GetText();
2753 
2754  try
2755  {
2756  set_inverse_Hessian_approximation_method(new_inverse_Hessian_approximation_method);
2757  }
2758  catch(const std::logic_error& e)
2759  {
2760  std::cout << e.what() << std::endl;
2761  }
2762  }
2763  }
2764 
2765 
2766  // Training rate algorithm
2767  {
2768  const tinyxml2::XMLElement* element = root_element->FirstChildElement("TrainingRateAlgorithm");
2769 
2770  if(element)
2771  {
2772  tinyxml2::XMLDocument training_rate_algorithm_document;
2773 
2774  tinyxml2::XMLElement* element_clone = training_rate_algorithm_document.NewElement("TrainingRateAlgorithm");
2775  training_rate_algorithm_document.InsertFirstChild(element_clone);
2776 
2777  DeepClone(element_clone, element, &training_rate_algorithm_document, NULL);
2778 
2779  training_rate_algorithm.from_XML(training_rate_algorithm_document);
2780  }
2781  }
2782 
2783  // Warning parameters norm
2784  {
2785  const tinyxml2::XMLElement* element = root_element->FirstChildElement("WarningParametersNorm");
2786 
2787  if(element)
2788  {
2789  const double new_warning_parameters_norm = atof(element->GetText());
2790 
2791  try
2792  {
2793  set_warning_parameters_norm(new_warning_parameters_norm);
2794  }
2795  catch(const std::logic_error& e)
2796  {
2797  std::cout << e.what() << std::endl;
2798  }
2799  }
2800  }
2801 
2802  // Warning gradient norm
2803  {
2804  const tinyxml2::XMLElement* element = root_element->FirstChildElement("WarningGradientNorm");
2805 
2806  if(element)
2807  {
2808  const double new_warning_gradient_norm = atof(element->GetText());
2809 
2810  try
2811  {
2812  set_warning_gradient_norm(new_warning_gradient_norm);
2813  }
2814  catch(const std::logic_error& e)
2815  {
2816  std::cout << e.what() << std::endl;
2817  }
2818  }
2819  }
2820 
2821  // Warning training rate
2822  {
2823  const tinyxml2::XMLElement* element = root_element->FirstChildElement("WarningTrainingRate");
2824 
2825  if(element)
2826  {
2827  const double new_warning_training_rate = atof(element->GetText());
2828 
2829  try
2830  {
2831  set_warning_training_rate(new_warning_training_rate);
2832  }
2833  catch(const std::logic_error& e)
2834  {
2835  std::cout << e.what() << std::endl;
2836  }
2837  }
2838  }
2839 
2840  // Error parameters norm
2841  {
2842  const tinyxml2::XMLElement* element = root_element->FirstChildElement("ErrorParametersNorm");
2843 
2844  if(element)
2845  {
2846  const double new_error_parameters_norm = atof(element->GetText());
2847 
2848  try
2849  {
2850  set_error_parameters_norm(new_error_parameters_norm);
2851  }
2852  catch(const std::logic_error& e)
2853  {
2854  std::cout << e.what() << std::endl;
2855  }
2856  }
2857  }
2858 
2859  // Error gradient norm
2860  {
2861  const tinyxml2::XMLElement* element = root_element->FirstChildElement("ErrorGradientNorm");
2862 
2863  if(element)
2864  {
2865  const double new_error_gradient_norm = atof(element->GetText());
2866 
2867  try
2868  {
2869  set_error_gradient_norm(new_error_gradient_norm);
2870  }
2871  catch(const std::logic_error& e)
2872  {
2873  std::cout << e.what() << std::endl;
2874  }
2875  }
2876  }
2877 
2878  // Error training rate
2879  {
2880  const tinyxml2::XMLElement* element = root_element->FirstChildElement("ErrorTrainingRate");
2881 
2882  if(element)
2883  {
2884  const double new_error_training_rate = atof(element->GetText());
2885 
2886  try
2887  {
2888  set_error_training_rate(new_error_training_rate);
2889  }
2890  catch(const std::logic_error& e)
2891  {
2892  std::cout << e.what() << std::endl;
2893  }
2894  }
2895  }
2896 
2897  // Minimum parameters increment norm
2898  {
2899  const tinyxml2::XMLElement* element = root_element->FirstChildElement("MinimumParametersIncrementNorm");
2900 
2901  if(element)
2902  {
2903  const double new_minimum_parameters_increment_norm = atof(element->GetText());
2904 
2905  try
2906  {
2907  set_minimum_parameters_increment_norm(new_minimum_parameters_increment_norm);
2908  }
2909  catch(const std::logic_error& e)
2910  {
2911  std::cout << e.what() << std::endl;
2912  }
2913  }
2914  }
2915 
2916  // Minimum performance increase
2917  {
2918  const tinyxml2::XMLElement* element = root_element->FirstChildElement("MinimumPerformanceIncrease");
2919 
2920  if(element)
2921  {
2922  const double new_minimum_performance_increase = atof(element->GetText());
2923 
2924  try
2925  {
2926  set_minimum_performance_increase(new_minimum_performance_increase);
2927  }
2928  catch(const std::logic_error& e)
2929  {
2930  std::cout << e.what() << std::endl;
2931  }
2932  }
2933  }
2934 
2935  // Performance goal
2936  {
2937  const tinyxml2::XMLElement* element = root_element->FirstChildElement("PerformanceGoal");
2938 
2939  if(element)
2940  {
2941  const double new_performance_goal = atof(element->GetText());
2942 
2943  try
2944  {
2945  set_performance_goal(new_performance_goal);
2946  }
2947  catch(const std::logic_error& e)
2948  {
2949  std::cout << e.what() << std::endl;
2950  }
2951  }
2952  }
2953 
2954  // Gradient norm goal
2955  {
2956  const tinyxml2::XMLElement* element = root_element->FirstChildElement("GradientNormGoal");
2957 
2958  if(element)
2959  {
2960  const double new_gradient_norm_goal = atof(element->GetText());
2961 
2962  try
2963  {
2964  set_gradient_norm_goal(new_gradient_norm_goal);
2965  }
2966  catch(const std::logic_error& e)
2967  {
2968  std::cout << e.what() << std::endl;
2969  }
2970  }
2971  }
2972 
2973  // Maximum generalization performance decreases
2974  {
2975  const tinyxml2::XMLElement* element = root_element->FirstChildElement("MaximumGeneralizationPerformanceDecreases");
2976 
2977  if(element)
2978  {
2979  const size_t new_maximum_generalization_performance_decreases = atoi(element->GetText());
2980 
2981  try
2982  {
2983  set_maximum_generalization_performance_decreases(new_maximum_generalization_performance_decreases);
2984  }
2985  catch(const std::logic_error& e)
2986  {
2987  std::cout << e.what() << std::endl;
2988  }
2989  }
2990  }
2991 
2992  // Maximum iterations number
2993  {
2994  const tinyxml2::XMLElement* element = root_element->FirstChildElement("MaximumIterationsNumber");
2995 
2996  if(element)
2997  {
2998  const size_t new_maximum_iterations_number = atoi(element->GetText());
2999 
3000  try
3001  {
3002  set_maximum_iterations_number(new_maximum_iterations_number);
3003  }
3004  catch(const std::logic_error& e)
3005  {
3006  std::cout << e.what() << std::endl;
3007  }
3008  }
3009  }
3010 
3011  // Maximum time
3012  {
3013  const tinyxml2::XMLElement* element = root_element->FirstChildElement("MaximumTime");
3014 
3015  if(element)
3016  {
3017  const double new_maximum_time = atof(element->GetText());
3018 
3019  try
3020  {
3021  set_maximum_time(new_maximum_time);
3022  }
3023  catch(const std::logic_error& e)
3024  {
3025  std::cout << e.what() << std::endl;
3026  }
3027  }
3028  }
3029 
3030  // Reserve parameters history
3031  {
3032  const tinyxml2::XMLElement* element = root_element->FirstChildElement("ReserveParametersHistory");
3033 
3034  if(element)
3035  {
3036  const std::string new_reserve_parameters_history = element->GetText();
3037 
3038  try
3039  {
3040  set_reserve_parameters_history(new_reserve_parameters_history != "0");
3041  }
3042  catch(const std::logic_error& e)
3043  {
3044  std::cout << e.what() << std::endl;
3045  }
3046  }
3047  }
3048 
3049  // Reserve parameters norm history
3050  {
3051  const tinyxml2::XMLElement* element = root_element->FirstChildElement("ReserveParametersNormHistory");
3052 
3053  if(element)
3054  {
3055  const std::string new_reserve_parameters_norm_history = element->GetText();
3056 
3057  try
3058  {
3059  set_reserve_parameters_norm_history(new_reserve_parameters_norm_history != "0");
3060  }
3061  catch(const std::logic_error& e)
3062  {
3063  std::cout << e.what() << std::endl;
3064  }
3065  }
3066  }
3067 
3068  // Reserve performance history
3069  {
3070  const tinyxml2::XMLElement* element = root_element->FirstChildElement("ReservePerformanceHistory");
3071 
3072  if(element)
3073  {
3074  const std::string new_reserve_performance_history = element->GetText();
3075 
3076  try
3077  {
3078  set_reserve_performance_history(new_reserve_performance_history != "0");
3079  }
3080  catch(const std::logic_error& e)
3081  {
3082  std::cout << e.what() << std::endl;
3083  }
3084  }
3085  }
3086 
3087  // Reserve generalization performance history
3088  {
3089  const tinyxml2::XMLElement* element = root_element->FirstChildElement("ReserveGeneralizationPerformanceHistory");
3090 
3091  if(element)
3092  {
3093  const std::string new_reserve_generalization_performance_history = element->GetText();
3094 
3095  try
3096  {
3097  set_reserve_generalization_performance_history(new_reserve_generalization_performance_history != "0");
3098  }
3099  catch(const std::logic_error& e)
3100  {
3101  std::cout << e.what() << std::endl;
3102  }
3103  }
3104  }
3105 
3106  // Reserve gradient history
3107  {
3108  const tinyxml2::XMLElement* element = root_element->FirstChildElement("ReserveGradientHistory");
3109 
3110  if(element)
3111  {
3112  const std::string new_reserve_gradient_history = element->GetText();
3113 
3114  try
3115  {
3116  set_reserve_gradient_history(new_reserve_gradient_history != "0");
3117  }
3118  catch(const std::logic_error& e)
3119  {
3120  std::cout << e.what() << std::endl;
3121  }
3122  }
3123  }
3124 
3125  // Reserve gradient norm history
3126  {
3127  const tinyxml2::XMLElement* element = root_element->FirstChildElement("ReserveGradientNormHistory");
3128 
3129  if(element)
3130  {
3131  const std::string new_reserve_gradient_norm_history = element->GetText();
3132 
3133  try
3134  {
3135  set_reserve_gradient_norm_history(new_reserve_gradient_norm_history != "0");
3136  }
3137  catch(const std::logic_error& e)
3138  {
3139  std::cout << e.what() << std::endl;
3140  }
3141  }
3142  }
3143 
3144  // Reserve inverse Hessian history
3145  {
3146  const tinyxml2::XMLElement* element = root_element->FirstChildElement("ReserveInverseHessianHistory");
3147 
3148  if(element)
3149  {
3150  const std::string new_reserve_inverse_Hessian_history = element->GetText();
3151 
3152  try
3153  {
3154  set_reserve_inverse_Hessian_history(new_reserve_inverse_Hessian_history != "0");
3155  }
3156  catch(const std::logic_error& e)
3157  {
3158  std::cout << e.what() << std::endl;
3159  }
3160  }
3161  }
3162 
3163  // Reserve training direction history
3164  {
3165  const tinyxml2::XMLElement* element = root_element->FirstChildElement("ReserveTrainingDirectionHistory");
3166 
3167  if(element)
3168  {
3169  const std::string new_reserve_training_direction_history = element->GetText();
3170 
3171  try
3172  {
3173  set_reserve_training_direction_history(new_reserve_training_direction_history != "0");
3174  }
3175  catch(const std::logic_error& e)
3176  {
3177  std::cout << e.what() << std::endl;
3178  }
3179  }
3180  }
3181 
3182  // Reserve training rate history
3183  {
3184  const tinyxml2::XMLElement* element = root_element->FirstChildElement("ReserveTrainingRateHistory");
3185 
3186  if(element)
3187  {
3188  const std::string new_reserve_training_rate_history = element->GetText();
3189 
3190  try
3191  {
3192  set_reserve_training_rate_history(new_reserve_training_rate_history != "0");
3193  }
3194  catch(const std::logic_error& e)
3195  {
3196  std::cout << e.what() << std::endl;
3197  }
3198  }
3199  }
3200 
3201  // Reserve elapsed time history
3202  {
3203  const tinyxml2::XMLElement* element = root_element->FirstChildElement("ReserveElapsedTimeHistory");
3204 
3205  if(element)
3206  {
3207  const std::string new_reserve_elapsed_time_history = element->GetText();
3208 
3209  try
3210  {
3211  set_reserve_elapsed_time_history(new_reserve_elapsed_time_history != "0");
3212  }
3213  catch(const std::logic_error& e)
3214  {
3215  std::cout << e.what() << std::endl;
3216  }
3217  }
3218  }
3219 
3220  // Reserve generalization performance history
3221  {
3222  const tinyxml2::XMLElement* element = root_element->FirstChildElement("ReserveGeneralizationPerformanceHistory");
3223 
3224  if(element)
3225  {
3226  const std::string new_reserve_generalization_performance_history = element->GetText();
3227 
3228  try
3229  {
3230  set_reserve_generalization_performance_history(new_reserve_generalization_performance_history != "0");
3231  }
3232  catch(const std::logic_error& e)
3233  {
3234  std::cout << e.what() << std::endl;
3235  }
3236  }
3237  }
3238 
3239  // Display period
3240  {
3241  const tinyxml2::XMLElement* element = root_element->FirstChildElement("DisplayPeriod");
3242 
3243  if(element)
3244  {
3245  const size_t new_display_period = atoi(element->GetText());
3246 
3247  try
3248  {
3249  set_display_period(new_display_period);
3250  }
3251  catch(const std::logic_error& e)
3252  {
3253  std::cout << e.what() << std::endl;
3254  }
3255  }
3256  }
3257 
3258  // Save period
3259  {
3260  const tinyxml2::XMLElement* element = root_element->FirstChildElement("SavePeriod");
3261 
3262  if(element)
3263  {
3264  const size_t new_save_period = atoi(element->GetText());
3265 
3266  try
3267  {
3268  set_save_period(new_save_period);
3269  }
3270  catch(const std::logic_error& e)
3271  {
3272  std::cout << e.what() << std::endl;
3273  }
3274  }
3275  }
3276 
3277  // Neural network file name
3278  {
3279  const tinyxml2::XMLElement* element = root_element->FirstChildElement("NeuralNetworkFileName");
3280 
3281  if(element)
3282  {
3283  const std::string new_neural_network_file_name = element->GetText();
3284 
3285  try
3286  {
3287  set_neural_network_file_name(new_neural_network_file_name);
3288  }
3289  catch(const std::logic_error& e)
3290  {
3291  std::cout << e.what() << std::endl;
3292  }
3293  }
3294  }
3295 
3296  // Display
3297  {
3298  const tinyxml2::XMLElement* element = root_element->FirstChildElement("Display");
3299 
3300  if(element)
3301  {
3302  const std::string new_display = element->GetText();
3303 
3304  try
3305  {
3306  set_display(new_display != "0");
3307  }
3308  catch(const std::logic_error& e)
3309  {
3310  std::cout << e.what() << std::endl;
3311  }
3312  }
3313  }
3314 }
3315 
3316 }
3317 
3318 // OpenNN: Open Neural Networks Library.
3319 // Copyright (c) 2005-2015 Roberto Lopez.
3320 //
3321 // This library is free software; you can redistribute it and/or
3322 // modify it under the terms of the GNU Lesser General Public
3323 // License as published by the Free Software Foundation; either
3324 // version 2.1 of the License, or any later version.
3325 //
3326 // This library is distributed in the hope that it will be useful,
3327 // but WITHOUT ANY WARRANTY; without even the implied warranty of
3328 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3329 // Lesser General Public License for more details.
3330 
3331 // You should have received a copy of the GNU Lesser General Public
3332 // License along with this library; if not, write to the Free Software
3333 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
3334 
Vector< Matrix< double > > inverse_Hessian_history
History of the inverse Hessian approximation over the training iterations.
virtual double calculate_generalization_performance(void) const
double maximum_time
Maximum training time. It is used as a stopping criterion.
const double & get_error_training_rate(void) const
const bool & get_reserve_parameters_history(void) const
Returns true if the parameters history matrix is to be reserved, and false otherwise.
size_t count_parameters_number(void) const
Vector< double > calculate_gradient_descent_training_direction(const Vector< double > &) const
Vector< double > elapsed_time_history
History of the elapsed time over the training iterations.
const double & get_warning_parameters_norm(void) const
Returns the minimum value for the norm of the parameters vector at wich a warning message is written ...
double error_training_rate
Training rate at wich the line minimization algorithm is assumed to be unable to bracket a minimum...
std::string write_inverse_Hessian_approximation_method(void) const
Returns the name of the method for the approximation of the inverse Hessian.
const bool & get_reserve_training_rate_history(void) const
Returns true if the training rate history vector is to be reserved, and false otherwise.
const bool & get_reserve_gradient_history(void) const
Returns true if the gradient history vector of vectors is to be reserved, and false otherwise...
InverseHessianApproximationMethod
Enumeration of the available training operators for obtaining the approximation to the inverse Hessia...
void set_reserve_gradient_history(const bool &)
const bool & get_reserve_elapsed_time_history(void) const
Returns true if the elapsed time history vector is to be reserved, and false otherwise.
Vector< double > calculate_gradient(void) const
Returns the performance function gradient, as the sum of the objective and the regularization gradien...
void set_error_parameters_norm(const double &)
double warning_gradient_norm
Value for the gradient norm at which a warning message is written to the screen.
const double & get_performance_goal(void) const
void set_reserve_all_training_history(const bool &)
void set_performance_goal(const double &)
void set_warning_parameters_norm(const double &)
const double & get_warning_training_rate(void) const
Returns the training rate value at wich a warning message is written to the screen during line minimi...
bool reserve_inverse_Hessian_history
True if the inverse Hessian history vector of matrices is to be reserved, false otherwise.
void set_error_gradient_norm(const double &)
void set_error_training_rate(const double &)
bool reserve_gradient_norm_history
True if the gradient norm history vector is to be reserved, false otherwise.
const bool & get_reserve_inverse_Hessian_history(void) const
Returns true if the inverse Hessian history is to be reserved, and false otherwise.
Vector< double > parameters_norm_history
History of the parameters norm over the training iterations.
const bool & get_reserve_gradient_norm_history(void) const
Returns true if the gradient norm history vector is to be reserved, and false otherwise.
Vector< double > arrange_parameters(void) const
const bool & get_reserve_generalization_performance_history(void) const
Returns true if the Generalization performance history vector is to be reserved, and false otherwise...
void set_display_period(const size_t &)
void set_performance_functional_pointer(PerformanceFunctional *)
void set_default(void)
Sets the members of the training algorithm object to their default values.
const double & get_error_parameters_norm(void) const
Returns the value for the norm of the parameters vector at wich an error message is written to the sc...
void set_reserve_parameters_history(const bool &)
bool display
Display messages to screen.
NeuralNetwork * get_neural_network_pointer(void) const
Returns a pointer to the neural network associated to the performance functional. ...
Vector< double > final_parameters
Final neural network parameters vector.
void set_reserve_parameters_norm_history(const bool &)
double error_parameters_norm
Value for the parameters norm at which the training process is assumed to fail.
Vector< T > calculate_absolute_value(void) const
Returns a vector with the absolute values of the current vector.
Definition: vector.h:2903
Matrix< std::string > write_final_results(const size_t &precision=3) const
Returns a default (empty) string matrix with the final results from training.
size_t maximum_iterations_number
Maximum number of iterations to perform_training. It is used as a stopping criterion.
std::string write_training_rate_method(void) const
Returns a string with the name of the training rate method to be used.
double minimum_parameters_increment_norm
Norm of the parameters increment vector at which training stops.
tinyxml2::XMLDocument * to_XML(void) const
Matrix< T > calculate_absolute_value(void) const
Returns a matrix with the absolute values of this matrix.
Definition: matrix.h:4840
void set_warning_gradient_norm(const double &)
const double & get_warning_gradient_norm(void) const
Returns the minimum value for the norm of the gradient vector at wich a warning message is written to...
void set_maximum_iterations_number(const size_t &)
void set_save_period(const size_t &)
const double & get_gradient_norm_goal(void) const
void from_XML(const tinyxml2::XMLDocument &)
const TrainingRateAlgorithm & get_training_rate_algorithm(void) const
Returns a constant reference to the training rate algorithm object inside the quasi-Newton method obj...
double final_parameters_norm
Final neural network parameters norm.
double final_performance
Final performance function evaluation.
size_t save_period
Number of iterations between the training saving progress.
std::string write_training_algorithm_type(void) const
This method writes a string with the type of training algoritm.
Matrix< double > calculate_inverse_Hessian_approximation(const Vector< double > &, const Vector< double > &, const Vector< double > &, const Vector< double > &, const Matrix< double > &) const
Vector< double > calculate_directional_point(const double &, const Vector< double > &, const double &) const
Matrix< T > direct(const Vector< T > &) const
Definition: vector.h:3697
const size_t & get_columns_number(void) const
Returns the number of columns in the matrix.
Definition: matrix.h:1090
Vector< double > final_training_direction
Final quasi-Newton method training direction.
Vector< double > final_gradient
Final performance function gradient.
QuasiNewtonMethod * get_quasi_Newton_method_pointer(void) const
Returns the pointer to the quasi-Newton method object required by the corresponding results structure...
double gradient_norm_goal
Goal value for the norm of the objective function gradient. It is used as a stopping criterion...
void set_warning_training_rate(const double &)
double warning_parameters_norm
Value for the parameters norm at which a warning message is written to the screen.
InverseHessianApproximationMethod inverse_Hessian_approximation_method
Variable containing the actual method used to obtain a suitable training rate.
const bool & get_reserve_parameters_norm_history(void) const
Returns true if the parameters norm history vector is to be reserved, and false otherwise.
virtual std::string write_information(void)
void set_minimum_parameters_increment_norm(const double &)
double calculate_norm(void) const
Returns the vector norm.
Definition: vector.h:2358
QuasiNewtonMethodResults * perform_training(void)
double final_generalization_performance
Final generalization performance.
bool reserve_training_rate_history
True if the training rate history vector is to be reserved, false otherwise.
Vector< Vector< double > > gradient_history
History of the performance function gradient over the training iterations.
std::string neural_network_file_name
Path where the neural network is saved.
void set_inverse_Hessian_approximation_method(const InverseHessianApproximationMethod &)
void set_reserve_training_direction_history(const bool &)
Vector< double > generalization_performance_history
History of the generalization performance over the training iterations.
void set_neural_network_file_name(const std::string &)
Vector< double > gradient_norm_history
History of the gradient norm over the training iterations.
void initialize_identity(void)
Definition: matrix.h:2659
const double & get_minimum_parameters_increment_norm(void) const
Returns the minimum norm of the parameter increment vector used as a stopping criteria when training...
void set_column(const size_t &, const Vector< T > &)
Definition: matrix.h:1774
size_t iterations_number
Maximum number of training iterations.
void set_maximum_generalization_performance_decreases(const size_t &)
Vector< T > calculate_normalized(void) const
Returns this vector divided by its norm.
Definition: vector.h:2530
void set_reserve_training_rate_history(const bool &)
virtual void set_default(void)
Sets the members of the training rate algorithm to their default values.
bool reserve_parameters_norm_history
True if the parameters norm history vector is to be reserved, false otherwise.
bool reserve_performance_history
True if the performance history vector is to be reserved, false otherwise.
void set_reserve_inverse_Hessian_history(const bool &)
void set_reserve_elapsed_time_history(const bool &)
void from_XML(const tinyxml2::XMLDocument &)
TrainingRateAlgorithm * get_training_rate_algorithm_pointer(void)
Returns a pointer to the training rate algorithm object inside the quasi-Newton method object...
std::string to_string(void) const
Returns a default string representation of a training algorithm.
double dot(const Vector< double > &) const
Definition: vector.h:3654
Matrix< double > calculate_DFP_inverse_Hessian(const Vector< double > &, const Vector< double > &, const Vector< double > &, const Vector< double > &, const Matrix< double > &) const
void set_reserve_gradient_norm_history(const bool &)
const size_t & get_rows_number(void) const
Returns the number of rows in the matrix.
Definition: matrix.h:1079
QuasiNewtonMethod * quasi_Newton_method_pointer
Pointer to the quasi-Newton method object for which the training results are to be stored...
Vector< Vector< double > > training_direction_history
History of the random search training direction over the training iterations.
std::string to_string(void) const
Returns a string representation of the current quasi-Newton method results structure.
void set_minimum_performance_increase(const double &)
const double & get_error_gradient_norm(void) const
Vector< double > calculate_training_direction(const Vector< double > &, const Matrix< double > &) const
const double & get_training_rate_tolerance(void) const
Returns the tolerance value in line minimization.
bool reserve_elapsed_time_history
True if the elapsed time history vector is to be reserved, false otherwise.
Matrix< double > calculate_BFGS_inverse_Hessian(const Vector< double > &, const Vector< double > &, const Vector< double > &, const Vector< double > &, const Matrix< double > &) const
void set_reserve_generalization_performance_history(const bool &)
void save(const std::string &) const
virtual void check(void) const
bool reserve_parameters_history
True if the parameters history matrix is to be reserved, false otherwise.
const size_t & get_maximum_generalization_performance_decreases(void) const
Returns the maximum number of generalization failures during the training process.
TrainingRateAlgorithm training_rate_algorithm
Vector< double > dot(const Vector< double > &) const
Definition: matrix.h:5772
void set_performance_functional_pointer(PerformanceFunctional *)
bool reserve_training_direction_history
True if the training direction history matrix is to be reserved, false otherwise. ...
double error_gradient_norm
Value for the gradient norm at which the training process is assumed to fail.
Vector< double > performance_history
History of the performance function performance over the training iterations.
Vector< Vector< double > > parameters_history
History of the neural network parameters over the training iterations.
void set_quasi_Newton_method_pointer(QuasiNewtonMethod *)
Returns the pointer to the quasi-Newton method object required by the corresponding results structure...
double warning_training_rate
Training rate value at wich a warning message is written to the screen.
Vector< double > training_rate_history
History of the random search training rate over the training iterations.
void set_maximum_time(const double &)
double final_training_rate
Final quasi-Newton method training rate.
double elapsed_time
Elapsed time of the training process.
void set_gradient_norm_goal(const double &)
bool reserve_gradient_history
True if the gradient history matrix is to be reserved, false otherwise.
const bool & get_reserve_performance_history(void) const
Returns true if the performance history vector is to be reserved, and false otherwise.
double performance_goal
Goal value for the performance. It is used as a stopping criterion.
PerformanceFunctional * performance_functional_pointer
Pointer to a performance functional for a multilayer perceptron object.
const bool & get_reserve_training_direction_history(void) const
Returns true if the training direction history matrix is to be reserved, and false otherwise...
double minimum_performance_increase
Minimum performance improvement between two successive iterations. It is used as a stopping criterion...
const InverseHessianApproximationMethod & get_inverse_Hessian_approximation_method(void) const
Returns the method for approximating the inverse Hessian matrix to be used when training.
tinyxml2::XMLDocument * to_XML(void) const
size_t display_period
Number of iterations between the training showing progress.
const double & get_minimum_performance_increase(void) const
Returns the minimum performance improvement during training.
const double & get_maximum_time(void) const
Returns the maximum training time.
void set_reserve_performance_history(const bool &)
Matrix< std::string > to_string_matrix(void) const
const size_t & get_maximum_iterations_number(void) const
Returns the maximum number of iterations for training.
bool reserve_generalization_performance_history
True if the Generalization performance history vector is to be reserved, false otherwise.
void set_parameters(const Vector< double > &)