16 #include "quasi_newton_method.h"
131 std::ostringstream buffer;
133 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
134 <<
"std::string write_inverse_Hessian_approximation_method(void) const method.\n"
135 <<
"Unknown inverse Hessian approximation method.\n";
137 throw std::logic_error(buffer.str());
400 new_inverse_Hessian_approximation_method)
418 if(new_inverse_Hessian_approximation_method_name ==
"DFP")
422 else if(new_inverse_Hessian_approximation_method_name ==
"BFGS")
428 std::ostringstream buffer;
430 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
431 <<
"void set_inverse_Hessian_approximation_method(const std::string&) method.\n"
432 <<
"Unknown inverse Hessian approximation method: " << new_inverse_Hessian_approximation_method_name <<
".\n";
434 throw std::logic_error(buffer.str());
522 if(new_warning_parameters_norm < 0.0)
524 std::ostringstream buffer;
526 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
527 <<
"void set_warning_parameters_norm(const double&) method.\n"
528 <<
"Warning parameters norm must be equal or greater than 0.\n";
530 throw std::logic_error(buffer.str());
553 if(new_warning_gradient_norm < 0.0)
555 std::ostringstream buffer;
557 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
558 <<
"void set_warning_gradient_norm(const double&) method.\n"
559 <<
"Warning gradient norm must be equal or greater than 0.\n";
561 throw std::logic_error(buffer.str());
584 if(new_warning_training_rate < 0.0)
586 std::ostringstream buffer;
588 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
589 <<
"void set_warning_training_rate(const double&) method.\n"
590 <<
"Warning training rate must be equal or greater than 0.\n";
592 throw std::logic_error(buffer.str());
613 if(new_error_parameters_norm < 0.0)
615 std::ostringstream buffer;
617 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
618 <<
"void set_error_parameters_norm(const double&) method.\n"
619 <<
"Error parameters norm must be equal or greater than 0.\n";
621 throw std::logic_error(buffer.str());
644 if(new_error_gradient_norm < 0.0)
646 std::ostringstream buffer;
648 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
649 <<
"void set_error_gradient_norm(const double&) method.\n"
650 <<
"Error gradient norm must be equal or greater than 0.\n";
652 throw std::logic_error(buffer.str());
675 if(new_error_training_rate < 0.0)
677 std::ostringstream buffer;
679 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
680 <<
"void set_error_training_rate(const double&) method.\n"
681 <<
"Error training rate must be equal or greater than 0.\n";
683 throw std::logic_error(buffer.str());
705 if(new_minimum_parameters_increment_norm < 0.0)
707 std::ostringstream buffer;
709 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
710 <<
"void new_minimum_parameters_increment_norm(const double&) method.\n"
711 <<
"Minimum parameters increment norm must be equal or greater than 0.\n";
713 throw std::logic_error(buffer.str());
735 if(new_minimum_performance_increase < 0.0)
737 std::ostringstream buffer;
739 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
740 <<
"void set_minimum_performance_increase(const double&) method.\n"
741 <<
"Minimum performance improvement must be equal or greater than 0.\n";
743 throw std::logic_error(buffer.str());
778 if(new_gradient_norm_goal < 0.0)
780 std::ostringstream buffer;
782 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
783 <<
"void set_gradient_norm_goal(const double&) method.\n"
784 <<
"Gradient norm goal must be equal or greater than 0.\n";
786 throw std::logic_error(buffer.str());
834 if(new_maximum_time < 0.0)
836 std::ostringstream buffer;
838 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
839 <<
"void set_maximum_time(const double&) method.\n"
840 <<
"Maximum time must be equal or greater than 0.\n";
842 throw std::logic_error(buffer.str());
981 if(new_display_period == 0)
983 std::ostringstream buffer;
985 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
986 <<
"void set_display_period(const size_t&) method.\n"
987 <<
"Display period must be greater than 0.\n";
989 throw std::logic_error(buffer.str());
1031 std::ostringstream buffer;
1033 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1034 <<
"Vector<double> calculate_inverse_Hessian_approximation(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1035 <<
"Unknown inverse Hessian approximation method.\n";
1037 throw std::logic_error(buffer.str());
1052 return((inverse_Hessian_approximation.
dot(gradient)*(-1.0)).calculate_normalized());
1067 std::ostringstream buffer;
1071 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1072 <<
"Vector<double> calculate_gradient_descent_training_direction(const Vector<double>&) const method.\n"
1073 <<
"Performance functional pointer is NULL.\n";
1075 throw std::logic_error(buffer.str());
1085 const size_t gradient_size = gradient.size();
1088 if(gradient_size != parameters_number)
1090 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1091 <<
"Vector<double> calculate_gradient_descent_training_direction(const Vector<double>&) const method.\n"
1092 <<
"Size of gradient (" << gradient_size <<
") is not equal to number of parameters (" << parameters_number <<
").\n";
1094 throw std::logic_error(buffer.str());
1116 std::ostringstream buffer;
1126 const size_t old_parameters_size = old_parameters.size();
1127 const size_t parameters_size = parameters.size();
1129 if(old_parameters_size != parameters_number)
1131 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1132 <<
"Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1133 <<
"Size of old parameters vector must be equal to number of parameters.\n";
1135 throw std::logic_error(buffer.str());
1137 else if(parameters_size != parameters_number)
1139 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1140 <<
"Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1141 <<
"Size of parameters vector must be equal to number of parameters.\n";
1143 throw std::logic_error(buffer.str());
1146 const size_t old_gradient_size = old_gradient.size();
1147 const size_t gradient_size = gradient.size();
1149 if(old_gradient_size != parameters_number)
1151 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1152 <<
"Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1153 <<
"Size of old gradient vector must be equal to number of parameters.\n";
1155 throw std::logic_error(buffer.str());
1157 else if(gradient_size != parameters_number)
1159 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1160 <<
"Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1161 <<
"Size of gradient vector must be equal to number of parameters.\n";
1163 throw std::logic_error(buffer.str());
1169 if(rows_number != parameters_number)
1171 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1172 <<
"Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1173 <<
"Number of rows in old inverse Hessian must be equal to number of parameters.\n";
1175 throw std::logic_error(buffer.str());
1177 else if(columns_number != parameters_number)
1179 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1180 <<
"Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1181 <<
"Number of columns in old inverse Hessian must be equal to number of parameters.\n";
1183 throw std::logic_error(buffer.str());
1190 const Vector<double> parameters_difference = parameters - old_parameters;
1196 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1197 <<
"Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1198 <<
"Parameters difference vector is zero.\n";
1200 throw std::logic_error(buffer.str());
1205 const Vector<double> gradient_difference = gradient - old_gradient;
1209 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1210 <<
"Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1211 <<
"Gradient difference vector is zero.\n";
1213 throw std::logic_error(buffer.str());
1218 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1219 <<
"Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1220 <<
"Old inverse Hessian matrix is zero.\n";
1222 throw std::logic_error(buffer.str());
1225 if(fabs(parameters_difference.
dot(gradient_difference)) < 1.0e-50)
1227 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1228 <<
"Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1229 <<
"Denominator of first term is zero.\n";
1231 throw std::logic_error(buffer.str());
1233 else if(fabs(gradient_difference.
dot(old_inverse_Hessian).dot(gradient_difference)) < 1.0e-50)
1235 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1236 <<
"Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1237 <<
"Denominator of second term is zero.\n";
1239 throw std::logic_error(buffer.str());
1242 Matrix<double> inverse_Hessian_approximation = old_inverse_Hessian;
1244 inverse_Hessian_approximation += parameters_difference.
direct(parameters_difference)/parameters_difference.
dot(gradient_difference);
1246 inverse_Hessian_approximation -= (old_inverse_Hessian.
dot(gradient_difference)).direct(old_inverse_Hessian.
dot(gradient_difference))
1247 /gradient_difference.
dot(old_inverse_Hessian).dot(gradient_difference);
1250 return(inverse_Hessian_approximation);
1272 std::ostringstream buffer;
1278 const size_t old_parameters_size = old_parameters.size();
1279 const size_t parameters_size = parameters.size();
1281 if(old_parameters_size != parameters_number)
1283 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1284 <<
"Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1285 <<
"Size of old parameters vector must be equal to number of parameters.\n";
1287 throw std::logic_error(buffer.str());
1289 else if(parameters_size != parameters_number)
1291 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1292 <<
"Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1293 <<
"Size of parameters vector must be equal to number of parameters.\n";
1295 throw std::logic_error(buffer.str());
1298 const size_t old_gradient_size = old_gradient.size();
1300 if(old_gradient_size != parameters_number)
1302 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1303 <<
"Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method."
1305 <<
"Size of old gradient vector must be equal to number of parameters.\n";
1307 throw std::logic_error(buffer.str());
1310 const size_t gradient_size = gradient.size();
1312 if(gradient_size != parameters_number)
1314 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1315 <<
"Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method."
1317 <<
"Size of gradient vector must be equal to number of parameters.\n";
1319 throw std::logic_error(buffer.str());
1325 if(rows_number != parameters_number)
1327 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1328 <<
"Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1329 <<
"Number of rows in old inverse Hessian must be equal to number of parameters.\n";
1331 throw std::logic_error(buffer.str());
1334 if(columns_number != parameters_number)
1336 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1337 <<
"Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1338 <<
"Number of columns in old inverse Hessian must be equal to number of parameters.\n";
1340 throw std::logic_error(buffer.str());
1348 const Vector<double> parameters_difference = parameters - old_parameters;
1353 std::ostringstream buffer;
1355 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1356 <<
"Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1357 <<
"Parameters difference vector is zero.\n";
1359 throw std::logic_error(buffer.str());
1364 const Vector<double> gradient_difference = gradient - old_gradient;
1368 std::ostringstream buffer;
1370 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1371 <<
"Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1372 <<
"Gradient difference vector is zero.\n";
1374 throw std::logic_error(buffer.str());
1379 std::ostringstream buffer;
1381 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
1382 <<
"Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
1383 <<
"Old inverse Hessian matrix is zero.\n";
1385 throw std::logic_error(buffer.str());
1391 const Vector<double> BFGS = parameters_difference/parameters_difference.
dot(gradient_difference)
1392 - old_inverse_Hessian.
dot(gradient_difference)
1393 /gradient_difference.
dot(old_inverse_Hessian).dot(gradient_difference);
1397 Matrix<double> inverse_Hessian_approximation = old_inverse_Hessian;
1399 inverse_Hessian_approximation += parameters_difference.
direct(parameters_difference)/parameters_difference.
dot(gradient_difference);
1401 inverse_Hessian_approximation -= (old_inverse_Hessian.
dot(gradient_difference)).direct(gradient_difference.
dot(old_inverse_Hessian))
1402 /gradient_difference.
dot(old_inverse_Hessian).dot(gradient_difference);
1404 inverse_Hessian_approximation += (BFGS.
direct(BFGS))*(gradient_difference.
dot(old_inverse_Hessian).dot(gradient_difference));
1406 return(inverse_Hessian_approximation);
1426 quasi_Newton_method_pointer = new_quasi_Newton_method_pointer;
1441 if(quasi_Newton_method_pointer == NULL)
1443 std::ostringstream buffer;
1445 buffer <<
"OpenNN Exception: QuasiNewtonMethodResults structure.\n"
1446 <<
"void resize_training_history(const size_t&) method.\n"
1447 <<
"Quasi-Newton method pointer is NULL.\n";
1449 throw std::logic_error(buffer.str());
1454 if(quasi_Newton_method_pointer->get_reserve_parameters_history())
1456 parameters_history.resize(new_size);
1459 if(quasi_Newton_method_pointer->get_reserve_parameters_norm_history())
1461 parameters_norm_history.resize(new_size);
1465 if(quasi_Newton_method_pointer->get_reserve_performance_history())
1467 performance_history.resize(new_size);
1470 if(quasi_Newton_method_pointer->get_reserve_generalization_performance_history())
1472 generalization_performance_history.resize(new_size);
1475 if(quasi_Newton_method_pointer->get_reserve_gradient_history())
1477 gradient_history.resize(new_size);
1480 if(quasi_Newton_method_pointer->get_reserve_gradient_norm_history())
1482 gradient_norm_history.resize(new_size);
1485 if(quasi_Newton_method_pointer->get_reserve_inverse_Hessian_history())
1487 inverse_Hessian_history.resize(new_size);
1490 if(quasi_Newton_method_pointer->get_reserve_training_direction_history())
1492 training_direction_history.resize(new_size);
1495 if(quasi_Newton_method_pointer->get_reserve_training_rate_history())
1497 training_rate_history.resize(new_size);
1500 if(quasi_Newton_method_pointer->get_reserve_elapsed_time_history())
1502 elapsed_time_history.resize(new_size);
1513 std::ostringstream buffer;
1515 buffer <<
"% Quasi-Newton method results\n";
1519 if(!parameters_history.empty())
1521 if(!parameters_history[0].empty())
1523 buffer <<
"% Parameters history:\n"
1524 << parameters_history <<
"\n";
1530 if(!parameters_norm_history.empty())
1532 buffer <<
"% Parameters norm history:\n"
1533 << parameters_norm_history <<
"\n";
1538 if(!performance_history.empty())
1540 buffer <<
"% Performance history:\n"
1541 << performance_history <<
"\n";
1546 if(!generalization_performance_history.empty())
1548 buffer <<
"% Generalization performance history:\n"
1549 << generalization_performance_history <<
"\n";
1554 if(!gradient_history.empty())
1556 if(!gradient_history[0].empty())
1558 buffer <<
"% Gradient history:\n"
1559 << gradient_history <<
"\n";
1565 if(!gradient_norm_history.empty())
1567 buffer <<
"% Gradient norm history:\n"
1568 << gradient_norm_history <<
"\n";
1573 if(!inverse_Hessian_history.empty())
1575 if(!inverse_Hessian_history[0].empty())
1577 buffer <<
"% Inverse Hessian history:\n"
1578 << inverse_Hessian_history <<
"\n";
1584 if(!training_direction_history.empty())
1586 if(!training_direction_history[0].empty())
1588 buffer <<
"% Training direction history:\n"
1589 << training_direction_history <<
"\n";
1595 if(!training_rate_history.empty())
1597 buffer <<
"% Training rate history:\n"
1598 << training_rate_history <<
"\n";
1603 if(!elapsed_time_history.empty())
1605 buffer <<
"% Elapsed time history:\n"
1606 << elapsed_time_history <<
"\n";
1609 return(buffer.str());
1617 std::ostringstream buffer;
1624 names.push_back(
"Final parameters norm");
1627 buffer << std::setprecision(precision) << final_parameters_norm;
1629 values.push_back(buffer.str());
1633 names.push_back(
"Final performance");
1636 buffer << std::setprecision(precision) << final_performance;
1638 values.push_back(buffer.str());
1646 names.push_back(
"Final generalization performance");
1649 buffer << std::setprecision(precision) << final_generalization_performance;
1651 values.push_back(buffer.str());
1656 names.push_back(
"Final gradient norm");
1659 buffer << std::setprecision(precision) << final_gradient_norm;
1661 values.push_back(buffer.str());
1674 names.push_back(
"Iterations number");
1677 buffer << iterations_number;
1679 values.push_back(buffer.str());
1683 names.push_back(
"Elapsed time");
1686 buffer << elapsed_time;
1688 values.push_back(buffer.str());
1690 const size_t rows_number = names.size();
1691 const size_t columns_number = 2;
1698 return(final_results);
1721 std::cout <<
"Training with quasi-Newton method...\n";
1736 double parameters_norm;
1739 double parameters_increment_norm;
1743 double performance = 0.0;
1744 double old_performance = 0.0;
1745 double performance_increase = 0.0;
1749 double gradient_norm;
1751 Matrix<double> inverse_Hessian(parameters_number, parameters_number);
1752 Matrix<double> old_inverse_Hessian(parameters_number, parameters_number);
1754 double generalization_performance = 0.0;
1755 double old_generalization_performance = 0.0;
1761 double training_slope;
1764 const double first_training_rate = 0.01;
1766 double initial_training_rate = 0.0;
1767 double training_rate = 0.0;
1768 double old_training_rate = 0.0;
1771 directional_point[0] = 0.0;
1772 directional_point[1] = 0.0;
1774 bool stop_training =
false;
1776 size_t generalization_failures = 0;
1778 time_t beginning_time, current_time;
1779 time(&beginning_time);
1780 double elapsed_time;
1797 std::cout <<
"OpenNN Warning: Parameters norm is " << parameters_norm <<
".\n";
1805 performance_increase = 0.0;
1809 performance = directional_point[1];
1810 performance_increase = old_performance - performance;
1819 std::cout <<
"OpenNN Warning: Gradient norm is " << gradient_norm <<
".\n";
1823 || (old_parameters - parameters).calculate_absolute_value() < 1.0e-99
1824 || (old_gradient - gradient).calculate_absolute_value() < 1.0e-99)
1835 if(iteration != 0 && generalization_performance > old_generalization_performance)
1837 generalization_failures++;
1846 training_slope = (gradient/gradient_norm).dot(training_direction);
1850 if(training_slope >= 0.0)
1861 initial_training_rate = first_training_rate;
1865 initial_training_rate = old_training_rate;
1870 training_rate = directional_point[0];
1874 if(iteration != 0 && training_rate < 1.0e-99)
1880 training_rate = directional_point[0];
1883 parameters_increment = training_direction*training_rate;
1884 parameters_increment_norm = parameters_increment.
calculate_norm();
1888 time(¤t_time);
1889 elapsed_time = difftime(current_time, beginning_time);
1951 std::cout <<
"Iteration " << iteration <<
": Minimum parameters increment norm reached.\n"
1952 <<
"Parameters increment norm: " << parameters_increment_norm << std::endl;
1955 stop_training =
true;
1962 std::cout <<
"Iteration " << iteration <<
": Minimum performance increase reached.\n"
1963 <<
"Performance increase: " << performance_increase << std::endl;
1966 stop_training =
true;
1973 std::cout <<
"Iteration " << iteration <<
": Performance goal reached.\n";
1976 stop_training =
true;
1983 std::cout <<
"Iteration " << iteration <<
": Gradient norm goal reached.\n";
1986 stop_training =
true;
1993 std::cout <<
"Iteration " << iteration <<
": Maximum generalization performance decreases reached.\n"
1994 <<
"Generalization performance decreases: "<< generalization_failures << std::endl;
1997 stop_training =
true;
2004 std::cout <<
"Iteration " << iteration <<
": Maximum number of iterations reached.\n";
2007 stop_training =
true;
2014 std::cout <<
"Iteration " << iteration <<
": Maximum training time reached.\n";
2017 stop_training =
true;
2020 if(iteration != 0 && iteration %
save_period == 0)
2046 std::cout <<
"Parameters norm: " << parameters_norm <<
"\n"
2047 <<
"Performance: " << performance <<
"\n"
2048 <<
"Gradient norm: " << gradient_norm <<
"\n"
2050 <<
"Training rate: " << training_rate <<
"\n"
2051 <<
"Elapsed time: " << elapsed_time << std::endl;
2053 if(generalization_performance != 0)
2055 std::cout <<
"Generalization performance: " << generalization_performance << std::endl;
2063 std::cout <<
"Iteration " << iteration <<
";\n"
2064 <<
"Parameters norm: " << parameters_norm <<
"\n"
2065 <<
"Performance: " << performance <<
"\n"
2066 <<
"Gradient norm: " << gradient_norm <<
"\n"
2068 <<
"Training rate: " << training_rate <<
"\n"
2069 <<
"Elapsed time: " << elapsed_time << std::endl;
2071 if(generalization_performance != 0)
2073 std::cout <<
"Generalization performance: " << generalization_performance << std::endl;
2079 old_parameters = parameters;
2081 old_performance = performance;
2083 old_gradient = gradient;
2085 old_inverse_Hessian = inverse_Hessian;
2087 old_generalization_performance = generalization_performance;
2089 old_training_rate = training_rate;
2093 parameters += parameters_increment;
2118 std::cout <<
"Parameters norm: " << parameters_norm <<
"\n"
2119 <<
"Performance: " << performance <<
"\n"
2120 <<
"Gradient norm: " << gradient_norm <<
"\n"
2122 <<
"Training rate: " << training_rate <<
"\n"
2123 <<
"Elapsed time: " << elapsed_time << std::endl;
2125 if(generalization_performance != 0)
2127 std::cout <<
"Generalization performance: " << generalization_performance << std::endl;
2131 return(results_pointer);
2139 return(
"QUASI_NEWTON_METHOD");
2151 std::ostringstream buffer;
2153 tinyxml2::XMLDocument* document =
new tinyxml2::XMLDocument;
2157 tinyxml2::XMLElement* root_element = document->NewElement(
"QuasiNewtonMethod");
2159 document->InsertFirstChild(root_element);
2161 tinyxml2::XMLElement* element = NULL;
2162 tinyxml2::XMLText* text = NULL;
2166 element = document->NewElement(
"InverseHessianApproximationMethod");
2167 root_element->LinkEndChild(element);
2170 element->LinkEndChild(text);
2176 tinyxml2::XMLElement* element = document->NewElement(
"TrainingRateAlgorithm");
2177 root_element->LinkEndChild(element);
2181 const tinyxml2::XMLElement* training_rate_algorithm_element = training_rate_algorithm_document->FirstChildElement(
"TrainingRateAlgorithm");
2183 DeepClone(element, training_rate_algorithm_element, document, NULL);
2185 delete training_rate_algorithm_document;
2190 element = document->NewElement(
"WarningParametersNorm");
2191 root_element->LinkEndChild(element);
2196 text = document->NewText(buffer.str().c_str());
2197 element->LinkEndChild(text);
2202 element = document->NewElement(
"WarningGradientNorm");
2203 root_element->LinkEndChild(element);
2208 text = document->NewText(buffer.str().c_str());
2209 element->LinkEndChild(text);
2214 element = document->NewElement(
"WarningTrainingRate");
2215 root_element->LinkEndChild(element);
2220 text = document->NewText(buffer.str().c_str());
2221 element->LinkEndChild(text);
2226 element = document->NewElement(
"ErrorParametersNorm");
2227 root_element->LinkEndChild(element);
2232 text = document->NewText(buffer.str().c_str());
2233 element->LinkEndChild(text);
2238 element = document->NewElement(
"ErrorGradientNorm");
2239 root_element->LinkEndChild(element);
2244 text = document->NewText(buffer.str().c_str());
2245 element->LinkEndChild(text);
2250 element = document->NewElement(
"ErrorTrainingRate");
2251 root_element->LinkEndChild(element);
2256 text = document->NewText(buffer.str().c_str());
2257 element->LinkEndChild(text);
2262 element = document->NewElement(
"MinimumParametersIncrementNorm");
2263 root_element->LinkEndChild(element);
2268 text = document->NewText(buffer.str().c_str());
2269 element->LinkEndChild(text);
2274 element = document->NewElement(
"MinimumPerformanceIncrease");
2275 root_element->LinkEndChild(element);
2280 text = document->NewText(buffer.str().c_str());
2281 element->LinkEndChild(text);
2286 element = document->NewElement(
"PerformanceGoal");
2287 root_element->LinkEndChild(element);
2292 text = document->NewText(buffer.str().c_str());
2293 element->LinkEndChild(text);
2298 element = document->NewElement(
"GradientNormGoal");
2299 root_element->LinkEndChild(element);
2304 text = document->NewText(buffer.str().c_str());
2305 element->LinkEndChild(text);
2310 element = document->NewElement(
"MaximumGeneralizationPerformanceDecreases");
2311 root_element->LinkEndChild(element);
2316 text = document->NewText(buffer.str().c_str());
2317 element->LinkEndChild(text);
2322 element = document->NewElement(
"MaximumIterationsNumber");
2323 root_element->LinkEndChild(element);
2328 text = document->NewText(buffer.str().c_str());
2329 element->LinkEndChild(text);
2334 element = document->NewElement(
"MaximumTime");
2335 root_element->LinkEndChild(element);
2340 text = document->NewText(buffer.str().c_str());
2341 element->LinkEndChild(text);
2346 element = document->NewElement(
"ReserveParametersHistory");
2347 root_element->LinkEndChild(element);
2352 text = document->NewText(buffer.str().c_str());
2353 element->LinkEndChild(text);
2358 element = document->NewElement(
"ReserveParametersNormHistory");
2359 root_element->LinkEndChild(element);
2364 text = document->NewText(buffer.str().c_str());
2365 element->LinkEndChild(text);
2370 element = document->NewElement(
"ReservePerformanceHistory");
2371 root_element->LinkEndChild(element);
2376 text = document->NewText(buffer.str().c_str());
2377 element->LinkEndChild(text);
2382 element = document->NewElement(
"ReserveGeneralizationPerformanceHistory");
2383 root_element->LinkEndChild(element);
2388 text = document->NewText(buffer.str().c_str());
2389 element->LinkEndChild(text);
2395 element = document->NewElement(
"ReserveGradientHistory");
2396 root_element->LinkEndChild(element);
2401 text = document->NewText(buffer.str().c_str());
2402 element->LinkEndChild(text);
2407 element = document->NewElement(
"ReserveGradientNormHistory");
2408 root_element->LinkEndChild(element);
2413 text = document->NewText(buffer.str().c_str());
2414 element->LinkEndChild(text);
2419 element = document->NewElement(
"ReserveInverseHessianHistory");
2420 root_element->LinkEndChild(element);
2425 text = document->NewText(buffer.str().c_str());
2426 element->LinkEndChild(text);
2431 element = document->NewElement(
"ReserveTrainingDirectionHistory");
2432 root_element->LinkEndChild(element);
2437 text = document->NewText(buffer.str().c_str());
2438 element->LinkEndChild(text);
2443 element = document->NewElement(
"ReserveTrainingRateHistory");
2444 root_element->LinkEndChild(element);
2449 text = document->NewText(buffer.str().c_str());
2450 element->LinkEndChild(text);
2455 element = document->NewElement(
"ReserveElapsedTimeHistory");
2456 root_element->LinkEndChild(element);
2461 text = document->NewText(buffer.str().c_str());
2462 element->LinkEndChild(text);
2467 element = document->NewElement(
"ReserveGeneralizationPerformanceHistory");
2468 root_element->LinkEndChild(element);
2473 text = document->NewText(buffer.str().c_str());
2474 element->LinkEndChild(text);
2479 element = document->NewElement(
"DisplayPeriod");
2480 root_element->LinkEndChild(element);
2485 text = document->NewText(buffer.str().c_str());
2486 element->LinkEndChild(text);
2491 element = document->NewElement(
"SavePeriod");
2492 root_element->LinkEndChild(element);
2497 text = document->NewText(buffer.str().c_str());
2498 element->LinkEndChild(text);
2503 element = document->NewElement(
"NeuralNetworkFileName");
2504 root_element->LinkEndChild(element);
2507 element->LinkEndChild(text);
2512 element = document->NewElement(
"Display");
2513 root_element->LinkEndChild(element);
2518 text = document->NewText(buffer.str().c_str());
2519 element->LinkEndChild(text);
2530 std::ostringstream buffer;
2532 buffer <<
"Quasi-Newton method\n";
2534 return(buffer.str());
2544 std::ostringstream buffer;
2551 labels.push_back(
"Inverse Hessian approximation method");
2555 values.push_back(inverse_Hessian_approximation_method_string);
2559 labels.push_back(
"Training rate method");
2563 values.push_back(training_rate_method);
2567 labels.push_back(
"Training rate tolerance");
2572 values.push_back(buffer.str());
2576 labels.push_back(
"Minimum parameters increment norm");
2581 values.push_back(buffer.str());
2585 labels.push_back(
"Minimum performance increase");
2590 values.push_back(buffer.str());
2594 labels.push_back(
"Performance goal");
2599 values.push_back(buffer.str());
2603 labels.push_back(
"Gradient norm goal");
2608 values.push_back(buffer.str());
2612 labels.push_back(
"Maximum generalization failures");
2617 values.push_back(buffer.str());
2621 labels.push_back(
"Maximum iterations number");
2626 values.push_back(buffer.str());
2630 labels.push_back(
"Maximum time");
2635 values.push_back(buffer.str());
2639 labels.push_back(
"Reserve parameters norm history");
2644 values.push_back(buffer.str());
2648 labels.push_back(
"Reserve performance history");
2653 values.push_back(buffer.str());
2657 labels.push_back(
"Reserve gradient norm history");
2662 values.push_back(buffer.str());
2666 labels.push_back(
"Reserve generalization performance history");
2671 values.push_back(buffer.str());
2691 labels.push_back(
"Reserve elapsed time history");
2696 values.push_back(buffer.str());
2698 const size_t rows_number = labels.size();
2699 const size_t columns_number = 2;
2706 return(string_matrix);
2714 const tinyxml2::XMLElement* root_element = document.FirstChildElement(
"QuasiNewtonMethod");
2718 std::ostringstream buffer;
2720 buffer <<
"OpenNN Exception: QuasiNewtonMethod class.\n"
2721 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
2722 <<
"Quasi-Newton method element is NULL.\n";
2724 throw std::logic_error(buffer.str());
2729 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"InverseHessianApproximationMethod");
2733 const std::string new_inverse_Hessian_approximation_method = element->GetText();
2739 catch(
const std::logic_error& e)
2741 std::cout << e.what() << std::endl;
2748 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"InverseHessianApproximationMethod");
2752 const std::string new_inverse_Hessian_approximation_method = element->GetText();
2758 catch(
const std::logic_error& e)
2760 std::cout << e.what() << std::endl;
2768 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"TrainingRateAlgorithm");
2772 tinyxml2::XMLDocument training_rate_algorithm_document;
2774 tinyxml2::XMLElement* element_clone = training_rate_algorithm_document.NewElement(
"TrainingRateAlgorithm");
2775 training_rate_algorithm_document.InsertFirstChild(element_clone);
2777 DeepClone(element_clone, element, &training_rate_algorithm_document, NULL);
2785 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"WarningParametersNorm");
2789 const double new_warning_parameters_norm = atof(element->GetText());
2795 catch(
const std::logic_error& e)
2797 std::cout << e.what() << std::endl;
2804 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"WarningGradientNorm");
2808 const double new_warning_gradient_norm = atof(element->GetText());
2814 catch(
const std::logic_error& e)
2816 std::cout << e.what() << std::endl;
2823 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"WarningTrainingRate");
2827 const double new_warning_training_rate = atof(element->GetText());
2833 catch(
const std::logic_error& e)
2835 std::cout << e.what() << std::endl;
2842 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"ErrorParametersNorm");
2846 const double new_error_parameters_norm = atof(element->GetText());
2852 catch(
const std::logic_error& e)
2854 std::cout << e.what() << std::endl;
2861 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"ErrorGradientNorm");
2865 const double new_error_gradient_norm = atof(element->GetText());
2871 catch(
const std::logic_error& e)
2873 std::cout << e.what() << std::endl;
2880 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"ErrorTrainingRate");
2884 const double new_error_training_rate = atof(element->GetText());
2890 catch(
const std::logic_error& e)
2892 std::cout << e.what() << std::endl;
2899 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"MinimumParametersIncrementNorm");
2903 const double new_minimum_parameters_increment_norm = atof(element->GetText());
2909 catch(
const std::logic_error& e)
2911 std::cout << e.what() << std::endl;
2918 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"MinimumPerformanceIncrease");
2922 const double new_minimum_performance_increase = atof(element->GetText());
2928 catch(
const std::logic_error& e)
2930 std::cout << e.what() << std::endl;
2937 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"PerformanceGoal");
2941 const double new_performance_goal = atof(element->GetText());
2947 catch(
const std::logic_error& e)
2949 std::cout << e.what() << std::endl;
2956 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"GradientNormGoal");
2960 const double new_gradient_norm_goal = atof(element->GetText());
2966 catch(
const std::logic_error& e)
2968 std::cout << e.what() << std::endl;
2975 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"MaximumGeneralizationPerformanceDecreases");
2979 const size_t new_maximum_generalization_performance_decreases = atoi(element->GetText());
2985 catch(
const std::logic_error& e)
2987 std::cout << e.what() << std::endl;
2994 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"MaximumIterationsNumber");
2998 const size_t new_maximum_iterations_number = atoi(element->GetText());
3004 catch(
const std::logic_error& e)
3006 std::cout << e.what() << std::endl;
3013 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"MaximumTime");
3017 const double new_maximum_time = atof(element->GetText());
3023 catch(
const std::logic_error& e)
3025 std::cout << e.what() << std::endl;
3032 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"ReserveParametersHistory");
3036 const std::string new_reserve_parameters_history = element->GetText();
3042 catch(
const std::logic_error& e)
3044 std::cout << e.what() << std::endl;
3051 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"ReserveParametersNormHistory");
3055 const std::string new_reserve_parameters_norm_history = element->GetText();
3061 catch(
const std::logic_error& e)
3063 std::cout << e.what() << std::endl;
3070 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"ReservePerformanceHistory");
3074 const std::string new_reserve_performance_history = element->GetText();
3080 catch(
const std::logic_error& e)
3082 std::cout << e.what() << std::endl;
3089 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"ReserveGeneralizationPerformanceHistory");
3093 const std::string new_reserve_generalization_performance_history = element->GetText();
3099 catch(
const std::logic_error& e)
3101 std::cout << e.what() << std::endl;
3108 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"ReserveGradientHistory");
3112 const std::string new_reserve_gradient_history = element->GetText();
3118 catch(
const std::logic_error& e)
3120 std::cout << e.what() << std::endl;
3127 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"ReserveGradientNormHistory");
3131 const std::string new_reserve_gradient_norm_history = element->GetText();
3137 catch(
const std::logic_error& e)
3139 std::cout << e.what() << std::endl;
3146 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"ReserveInverseHessianHistory");
3150 const std::string new_reserve_inverse_Hessian_history = element->GetText();
3156 catch(
const std::logic_error& e)
3158 std::cout << e.what() << std::endl;
3165 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"ReserveTrainingDirectionHistory");
3169 const std::string new_reserve_training_direction_history = element->GetText();
3175 catch(
const std::logic_error& e)
3177 std::cout << e.what() << std::endl;
3184 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"ReserveTrainingRateHistory");
3188 const std::string new_reserve_training_rate_history = element->GetText();
3194 catch(
const std::logic_error& e)
3196 std::cout << e.what() << std::endl;
3203 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"ReserveElapsedTimeHistory");
3207 const std::string new_reserve_elapsed_time_history = element->GetText();
3213 catch(
const std::logic_error& e)
3215 std::cout << e.what() << std::endl;
3222 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"ReserveGeneralizationPerformanceHistory");
3226 const std::string new_reserve_generalization_performance_history = element->GetText();
3232 catch(
const std::logic_error& e)
3234 std::cout << e.what() << std::endl;
3241 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"DisplayPeriod");
3245 const size_t new_display_period = atoi(element->GetText());
3251 catch(
const std::logic_error& e)
3253 std::cout << e.what() << std::endl;
3260 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"SavePeriod");
3264 const size_t new_save_period = atoi(element->GetText());
3270 catch(
const std::logic_error& e)
3272 std::cout << e.what() << std::endl;
3279 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"NeuralNetworkFileName");
3283 const std::string new_neural_network_file_name = element->GetText();
3289 catch(
const std::logic_error& e)
3291 std::cout << e.what() << std::endl;
3298 const tinyxml2::XMLElement* element = root_element->FirstChildElement(
"Display");
3302 const std::string new_display = element->GetText();
3308 catch(
const std::logic_error& e)
3310 std::cout << e.what() << std::endl;
Vector< Matrix< double > > inverse_Hessian_history
History of the inverse Hessian approximation over the training iterations.
double maximum_time
Maximum training time. It is used as a stopping criterion.
const double & get_error_training_rate(void) const
const bool & get_reserve_parameters_history(void) const
Returns true if the parameters history matrix is to be reserved, and false otherwise.
size_t count_parameters_number(void) const
Vector< double > calculate_gradient_descent_training_direction(const Vector< double > &) const
Vector< double > elapsed_time_history
History of the elapsed time over the training iterations.
const double & get_warning_parameters_norm(void) const
Returns the minimum value for the norm of the parameters vector at wich a warning message is written ...
double error_training_rate
Training rate at wich the line minimization algorithm is assumed to be unable to bracket a minimum...
std::string write_inverse_Hessian_approximation_method(void) const
Returns the name of the method for the approximation of the inverse Hessian.
const bool & get_reserve_training_rate_history(void) const
Returns true if the training rate history vector is to be reserved, and false otherwise.
const bool & get_reserve_gradient_history(void) const
Returns true if the gradient history vector of vectors is to be reserved, and false otherwise...
InverseHessianApproximationMethod
Enumeration of the available training operators for obtaining the approximation to the inverse Hessia...
double final_gradient_norm
Final gradient norm.
void set_reserve_gradient_history(const bool &)
const bool & get_reserve_elapsed_time_history(void) const
Returns true if the elapsed time history vector is to be reserved, and false otherwise.
void set_error_parameters_norm(const double &)
double warning_gradient_norm
Value for the gradient norm at which a warning message is written to the screen.
const double & get_performance_goal(void) const
void set_reserve_all_training_history(const bool &)
void set_performance_goal(const double &)
void set_warning_parameters_norm(const double &)
const double & get_warning_training_rate(void) const
Returns the training rate value at wich a warning message is written to the screen during line minimi...
bool reserve_inverse_Hessian_history
True if the inverse Hessian history vector of matrices is to be reserved, false otherwise.
void set_error_gradient_norm(const double &)
void set_error_training_rate(const double &)
bool reserve_gradient_norm_history
True if the gradient norm history vector is to be reserved, false otherwise.
const bool & get_reserve_inverse_Hessian_history(void) const
Returns true if the inverse Hessian history is to be reserved, and false otherwise.
Vector< double > parameters_norm_history
History of the parameters norm over the training iterations.
const bool & get_reserve_gradient_norm_history(void) const
Returns true if the gradient norm history vector is to be reserved, and false otherwise.
Vector< double > arrange_parameters(void) const
const bool & get_reserve_generalization_performance_history(void) const
Returns true if the Generalization performance history vector is to be reserved, and false otherwise...
void set_display_period(const size_t &)
void set_performance_functional_pointer(PerformanceFunctional *)
void set_default(void)
Sets the members of the training algorithm object to their default values.
const double & get_error_parameters_norm(void) const
Returns the value for the norm of the parameters vector at wich an error message is written to the sc...
void set_reserve_parameters_history(const bool &)
bool display
Display messages to screen.
Vector< double > final_parameters
Final neural network parameters vector.
void set_reserve_parameters_norm_history(const bool &)
double error_parameters_norm
Value for the parameters norm at which the training process is assumed to fail.
Vector< T > calculate_absolute_value(void) const
Returns a vector with the absolute values of the current vector.
Matrix< std::string > write_final_results(const size_t &precision=3) const
Returns a default (empty) string matrix with the final results from training.
size_t maximum_iterations_number
Maximum number of iterations to perform_training. It is used as a stopping criterion.
std::string write_training_rate_method(void) const
Returns a string with the name of the training rate method to be used.
double minimum_parameters_increment_norm
Norm of the parameters increment vector at which training stops.
tinyxml2::XMLDocument * to_XML(void) const
Matrix< T > calculate_absolute_value(void) const
Returns a matrix with the absolute values of this matrix.
void set_warning_gradient_norm(const double &)
const double & get_warning_gradient_norm(void) const
Returns the minimum value for the norm of the gradient vector at wich a warning message is written to...
void set_maximum_iterations_number(const size_t &)
void set_save_period(const size_t &)
const double & get_gradient_norm_goal(void) const
void from_XML(const tinyxml2::XMLDocument &)
const TrainingRateAlgorithm & get_training_rate_algorithm(void) const
Returns a constant reference to the training rate algorithm object inside the quasi-Newton method obj...
double final_parameters_norm
Final neural network parameters norm.
double final_performance
Final performance function evaluation.
size_t save_period
Number of iterations between the training saving progress.
std::string write_training_algorithm_type(void) const
This method writes a string with the type of training algoritm.
Matrix< double > calculate_inverse_Hessian_approximation(const Vector< double > &, const Vector< double > &, const Vector< double > &, const Vector< double > &, const Matrix< double > &) const
Vector< double > calculate_directional_point(const double &, const Vector< double > &, const double &) const
Matrix< T > direct(const Vector< T > &) const
const size_t & get_columns_number(void) const
Returns the number of columns in the matrix.
Vector< double > final_training_direction
Final quasi-Newton method training direction.
Vector< double > final_gradient
Final performance function gradient.
QuasiNewtonMethod * get_quasi_Newton_method_pointer(void) const
Returns the pointer to the quasi-Newton method object required by the corresponding results structure...
double gradient_norm_goal
Goal value for the norm of the objective function gradient. It is used as a stopping criterion...
void set_warning_training_rate(const double &)
double warning_parameters_norm
Value for the parameters norm at which a warning message is written to the screen.
InverseHessianApproximationMethod inverse_Hessian_approximation_method
Variable containing the actual method used to obtain a suitable training rate.
const bool & get_reserve_parameters_norm_history(void) const
Returns true if the parameters norm history vector is to be reserved, and false otherwise.
void set_minimum_parameters_increment_norm(const double &)
double calculate_norm(void) const
Returns the vector norm.
void set_display(const bool &)
QuasiNewtonMethodResults * perform_training(void)
double final_generalization_performance
Final generalization performance.
bool reserve_training_rate_history
True if the training rate history vector is to be reserved, false otherwise.
Vector< Vector< double > > gradient_history
History of the performance function gradient over the training iterations.
std::string neural_network_file_name
Path where the neural network is saved.
void set_inverse_Hessian_approximation_method(const InverseHessianApproximationMethod &)
void set_reserve_training_direction_history(const bool &)
Vector< double > generalization_performance_history
History of the generalization performance over the training iterations.
void set_neural_network_file_name(const std::string &)
Vector< double > gradient_norm_history
History of the gradient norm over the training iterations.
void initialize_identity(void)
const double & get_minimum_parameters_increment_norm(void) const
Returns the minimum norm of the parameter increment vector used as a stopping criteria when training...
void set_column(const size_t &, const Vector< T > &)
size_t iterations_number
Maximum number of training iterations.
void set_maximum_generalization_performance_decreases(const size_t &)
Vector< T > calculate_normalized(void) const
Returns this vector divided by its norm.
void set_reserve_training_rate_history(const bool &)
virtual void set_default(void)
Sets the members of the training rate algorithm to their default values.
bool reserve_parameters_norm_history
True if the parameters norm history vector is to be reserved, false otherwise.
bool reserve_performance_history
True if the performance history vector is to be reserved, false otherwise.
void set_reserve_inverse_Hessian_history(const bool &)
void set_reserve_elapsed_time_history(const bool &)
void from_XML(const tinyxml2::XMLDocument &)
TrainingRateAlgorithm * get_training_rate_algorithm_pointer(void)
Returns a pointer to the training rate algorithm object inside the quasi-Newton method object...
std::string to_string(void) const
Returns a default string representation of a training algorithm.
double dot(const Vector< double > &) const
Matrix< double > calculate_DFP_inverse_Hessian(const Vector< double > &, const Vector< double > &, const Vector< double > &, const Vector< double > &, const Matrix< double > &) const
void set_reserve_gradient_norm_history(const bool &)
const size_t & get_rows_number(void) const
Returns the number of rows in the matrix.
QuasiNewtonMethod * quasi_Newton_method_pointer
Pointer to the quasi-Newton method object for which the training results are to be stored...
Vector< Vector< double > > training_direction_history
History of the random search training direction over the training iterations.
std::string to_string(void) const
Returns a string representation of the current quasi-Newton method results structure.
void set_minimum_performance_increase(const double &)
const double & get_error_gradient_norm(void) const
Vector< double > calculate_training_direction(const Vector< double > &, const Matrix< double > &) const
const double & get_training_rate_tolerance(void) const
Returns the tolerance value in line minimization.
bool reserve_elapsed_time_history
True if the elapsed time history vector is to be reserved, false otherwise.
Matrix< double > calculate_BFGS_inverse_Hessian(const Vector< double > &, const Vector< double > &, const Vector< double > &, const Vector< double > &, const Matrix< double > &) const
void set_reserve_generalization_performance_history(const bool &)
void save(const std::string &) const
virtual void check(void) const
bool reserve_parameters_history
True if the parameters history matrix is to be reserved, false otherwise.
const size_t & get_maximum_generalization_performance_decreases(void) const
Returns the maximum number of generalization failures during the training process.
TrainingRateAlgorithm training_rate_algorithm
Vector< double > dot(const Vector< double > &) const
void set_performance_functional_pointer(PerformanceFunctional *)
bool reserve_training_direction_history
True if the training direction history matrix is to be reserved, false otherwise. ...
double error_gradient_norm
Value for the gradient norm at which the training process is assumed to fail.
Vector< double > performance_history
History of the performance function performance over the training iterations.
Vector< Vector< double > > parameters_history
History of the neural network parameters over the training iterations.
void set_quasi_Newton_method_pointer(QuasiNewtonMethod *)
Returns the pointer to the quasi-Newton method object required by the corresponding results structure...
double warning_training_rate
Training rate value at wich a warning message is written to the screen.
Vector< double > training_rate_history
History of the random search training rate over the training iterations.
void set_maximum_time(const double &)
double final_training_rate
Final quasi-Newton method training rate.
double elapsed_time
Elapsed time of the training process.
void set_gradient_norm_goal(const double &)
bool reserve_gradient_history
True if the gradient history matrix is to be reserved, false otherwise.
const bool & get_reserve_performance_history(void) const
Returns true if the performance history vector is to be reserved, and false otherwise.
double performance_goal
Goal value for the performance. It is used as a stopping criterion.
PerformanceFunctional * performance_functional_pointer
Pointer to a performance functional for a multilayer perceptron object.
void resize_training_history(const size_t &)
const bool & get_reserve_training_direction_history(void) const
Returns true if the training direction history matrix is to be reserved, and false otherwise...
double minimum_performance_increase
Minimum performance improvement between two successive iterations. It is used as a stopping criterion...
const InverseHessianApproximationMethod & get_inverse_Hessian_approximation_method(void) const
Returns the method for approximating the inverse Hessian matrix to be used when training.
size_t maximum_generalization_performance_decreases
tinyxml2::XMLDocument * to_XML(void) const
size_t display_period
Number of iterations between the training showing progress.
const double & get_minimum_performance_increase(void) const
Returns the minimum performance improvement during training.
const double & get_maximum_time(void) const
Returns the maximum training time.
void set_reserve_performance_history(const bool &)
Matrix< std::string > to_string_matrix(void) const
const size_t & get_maximum_iterations_number(void) const
Returns the maximum number of iterations for training.
bool reserve_generalization_performance_history
True if the Generalization performance history vector is to be reserved, false otherwise.
virtual ~QuasiNewtonMethod(void)
void set_parameters(const Vector< double > &)