OpenNN  2.2
Open Neural Networks Library
cross_entropy_error.cpp
1 /****************************************************************************************************************/
2 /* */
3 /* OpenNN: Open Neural Networks Library */
4 /* www.artelnics.com/opennn */
5 /* */
6 /* C R O S S E N T R O P Y E R R O R C L A S S */
7 /* */
8 /* Roberto Lopez */
9 /* Artelnics - Making intelligent use of data */
11 /* */
12 /****************************************************************************************************************/
13 
14 // OpenNN includes
15 
16 #include "cross_entropy_error.h"
17 
18 namespace OpenNN
19 {
20 
21 // DEFAULT CONSTRUCTOR
22 
27 
29 {
30 }
31 
32 
33 // NEURAL NETWORK CONSTRUCTOR
34 
39 
41  : PerformanceTerm(new_neural_network_pointer)
42 {
43 }
44 
45 
46 // DATA SET CONSTRUCTOR
47 
52 
54 : PerformanceTerm(new_data_set_pointer)
55 {
56 }
57 
58 
59 // NEURAL NETWORK AND DATA SET CONSTRUCTOR
60 
66 
67 CrossEntropyError::CrossEntropyError(NeuralNetwork* new_neural_network_pointer, DataSet* new_data_set_pointer)
68 : PerformanceTerm(new_neural_network_pointer, new_data_set_pointer)
69 {
70 }
71 
72 
73 // XML CONSTRUCTOR
74 
79 
80 CrossEntropyError::CrossEntropyError(const tinyxml2::XMLDocument& sum_squared_error_document)
81  : PerformanceTerm(sum_squared_error_document)
82 {
83 }
84 
85 
86 // COPY CONSTRUCTOR
87 
92 
94  : PerformanceTerm(new_cross_entropy_error)
95 {
96 
97 }
98 
99 
100 // DESTRUCTOR
101 
103 
105 {
106 }
107 
108 
109 // ASSIGNMENT OPERATOR
110 
113 
115 {
116  if(this != &other_cross_entropy_error)
117  {
118  *neural_network_pointer = *other_cross_entropy_error.neural_network_pointer;
119  *data_set_pointer = *other_cross_entropy_error.data_set_pointer;
120  display = other_cross_entropy_error.display;
121  }
122 
123  return(*this);
124 
125 }
126 
127 // EQUAL TO OPERATOR
128 
132 
133 bool CrossEntropyError::operator == (const CrossEntropyError& other_cross_entropy_error) const
134 {
135  if(*neural_network_pointer == *other_cross_entropy_error.neural_network_pointer
136  && *mathematical_model_pointer == *other_cross_entropy_error.mathematical_model_pointer
137  && display == other_cross_entropy_error.display)
138  {
139  return(true);
140  }
141  else
142  {
143  return(false);
144  }
145 
146 }
147 
148 
149 // METHODS
150 
151 
152 // void check(void) const method
153 
156 
157 void CrossEntropyError::check(void) const
158 {
159  std::ostringstream buffer;
160 
161  // Neural network stuff
162 
164  {
165  buffer << "OpenNN Exception: CrossEntropyError class.\n"
166  << "void check(void) const method.\n"
167  << "Pointer to neural network is NULL.\n";
168 
169  throw std::logic_error(buffer.str());
170  }
171 
172  const MultilayerPerceptron* multilayer_perceptron_pointer = neural_network_pointer->get_multilayer_perceptron_pointer();
173 
174  if(!multilayer_perceptron_pointer)
175  {
176  buffer << "OpenNN Exception: CrossEntropyError class.\n"
177  << "void check(void) const method.\n"
178  << "Pointer to multilayer perceptron is NULL.\n";
179 
180  throw std::logic_error(buffer.str());
181  }
182 
183  const size_t inputs_number = multilayer_perceptron_pointer->get_inputs_number();
184  const size_t outputs_number = multilayer_perceptron_pointer->get_outputs_number();
185 
186  if(inputs_number == 0)
187  {
188  buffer << "OpenNN Exception: CrossEntropyError class.\n"
189  << "void check(void) const method.\n"
190  << "Number of inputs in multilayer perceptron object is zero.\n";
191 
192  throw std::logic_error(buffer.str());
193  }
194 
195  if(outputs_number == 0)
196  {
197  buffer << "OpenNN Exception: CrossEntropyError class.\n"
198  << "void check(void) const method.\n"
199  << "Number of outputs in multilayer perceptron object is zero.\n";
200 
201  throw std::logic_error(buffer.str());
202  }
203 
204  const ProbabilisticLayer* probabilistic_layer_pointer = neural_network_pointer->get_probabilistic_layer_pointer();
205 
206  if(!probabilistic_layer_pointer)
207  {
208  buffer << "OpenNN Exception: CrossEntropyError class.\n"
209  << "void check(void) const method.\n"
210  << "Pointer to probabilistic layer is NULL.\n";
211 
212  throw std::logic_error(buffer.str());
213  }
214 
215  const ProbabilisticLayer::ProbabilisticMethod& outputs_probabilizing_method = probabilistic_layer_pointer->get_probabilistic_method();
216 
217  if(outputs_probabilizing_method != ProbabilisticLayer::Softmax)
218  {
219  buffer << "OpenNN Exception: CrossEntropyError class.\n"
220  << "void check(void) const method.\n"
221  << "Probabilistic method is not Softmax.\n";
222 
223  throw std::logic_error(buffer.str());
224  }
225 
226  // Data set stuff
227 
228  if(!data_set_pointer)
229  {
230  buffer << "OpenNN Exception: CrossEntropyError class.\n"
231  << "void check(void) const method.\n"
232  << "Pointer to data set is NULL.\n";
233 
234  throw std::logic_error(buffer.str());
235  }
236 
237  // Sum squared error stuff
238 
239  const Variables& variables = data_set_pointer->get_variables();
240 
241  const size_t data_set_inputs_number = variables.count_inputs_number();
242  const size_t targets_number = variables.count_targets_number();
243 
244  if(inputs_number != data_set_inputs_number)
245  {
246  buffer << "OpenNN Exception: CrossEntropyError class.\n"
247  << "void check(void) const method.\n"
248  << "Number of inputs in neural network must be equal to number of inputs in data set.\n";
249 
250  throw std::logic_error(buffer.str());
251  }
252 
253  if(outputs_number != targets_number)
254  {
255  buffer << "OpenNN Exception: CrossEntropyError class.\n"
256  << "void check(void) const method.\n"
257  << "Number of outputs in neural network must be equal to number of targets in data set.\n";
258 
259  throw std::logic_error(buffer.str());
260  }
261 }
262 
263 
264 // double calculate_performance(void) method
265 
267 
269 {
270  std::ostringstream buffer;
271 
272  buffer << "OpenNN Exception: CrossEntropyError class.\n"
273  << "double calculate_performance(void) method.\n"
274  << "This method is under development.\n";
275 
276  throw std::logic_error(buffer.str());
277 /*
278  #ifndef NDEBUG
279 
280  check();
281 
282  #endif
283 
284  // Neural network stuff
285 
286  const MultilayerPerceptron* multilayer_perceptron_pointer = neural_network_pointer->get_multilayer_perceptron_pointer();
287 
288  const size_t inputs_number = multilayer_perceptron_pointer->get_inputs_number();
289  const size_t outputs_number = multilayer_perceptron_pointer->get_outputs_number();
290 
291  // Data set stuff
292 
293  const Instances& instances = data_set_pointer->get_instances();
294 
295  const size_t training_instances_number = instances.count_training_instances_number();
296 
297  const Vector<size_t> training_indices = instances.arrange_training_indices();
298 
299  size_t training_index;
300 
301  const MissingValues& missing_values = data_set_pointer->get_missing_values();
302 
303  // Cross entropy error
304 
305  Vector<double> inputs(inputs_number);
306  Vector<double> outputs(outputs_number);
307  Vector<double> targets(outputs_number);
308 
309  double objective = 0.0;
310 
311  #pragma omp parallel for private(i, training_index, inputs, outputs, targets) reduction(+ : sum_squared_error)
312 
313  for(size_t i = 0; i < training_instances_number; i++)
314  {
315  if(missing_values.has_missing_values(i))
316  {
317  continue;
318  }
319 
320  // Input vector
321 
322  inputs = data_set_pointer->get_training_input_instance(i);
323 
324  // Output vector
325 
326  outputs = multilayer_perceptron_pointer->calculate_outputs(inputs);
327 
328  // Target vector
329 
330  targets = data_set_pointer->get_training_target_instance(i);
331 
332  // Cross entropy error
333 
334  for(size_t j = 0; j < outputs_number; j++)
335  {
336  objective -= targets[j]*log(outputs[j]) + (1.0 - targets[j])*log(1.0 - outputs[j]);
337  }
338  }
339 
340  return(objective);
341 */
342 }
343 
344 
345 
346 // double calculate_performance(const Vector<double>&) const method
347 
349 {
350  return(0.0);
351 }
352 
353 
354 // double calculate_minimum_performance(void) method
355 
358 
360 {
361  return(0.0);
362 }
363 
364 
365 // double calculate_generalization_performance(void) const method
366 
369 
371 {
372  // Control sentence
373 
374  #ifndef NDEBUG
375 
376  check();
377 
378  #endif
379 
380  // Neural network stuff
381 
382  const MultilayerPerceptron* multilayer_perceptron_pointer = neural_network_pointer->get_multilayer_perceptron_pointer();
383 
384  const size_t inputs_number = multilayer_perceptron_pointer->get_inputs_number();
385  const size_t outputs_number = multilayer_perceptron_pointer->get_outputs_number();
386 
387  // Data set stuff
388 
389  const Instances& instances = data_set_pointer->get_instances();
390 
391  const size_t generalization_instances_number = instances.count_generalization_instances_number();
392 
393  const Vector<size_t> generalization_indices = instances.arrange_generalization_indices();
394 
395  size_t generalization_index;
396 
397  const Variables& variables = data_set_pointer->get_variables();
398 
399  const Vector<size_t> inputs_indices = variables.arrange_inputs_indices();
400  const Vector<size_t> targets_indices = variables.arrange_targets_indices();
401 
402  const MissingValues& missing_values = data_set_pointer->get_missing_values();
403 
404  // Performance functional
405 
406  Vector<double> inputs(inputs_number);
407  Vector<double> outputs(outputs_number);
408  Vector<double> targets(outputs_number);
409 
410  double generalization_performance = 0.0;
411 
412  int i = 0;
413 
414  #pragma omp parallel for private(i, generalization_index, inputs, outputs, targets) reduction(- : generalization_performance)
415 
416  for(i = 0; i < (int)generalization_instances_number; i++)
417  {
418  generalization_index = generalization_indices[i];
419 
420  if(missing_values.has_missing_values(generalization_index))
421  {
422  continue;
423  }
424 
425  // Input vector
426 
427  inputs = data_set_pointer->get_instance(generalization_index, inputs_indices);
428 
429  // Output vector
430 
431  outputs = multilayer_perceptron_pointer->calculate_outputs(inputs);
432 
433  // Target vector
434 
435  targets = data_set_pointer->get_instance(generalization_index, targets_indices);
436 
437  // Cross entropy error
438 
439  for(size_t j = 0; j < outputs_number; j++)
440  {
441  generalization_performance -= targets[j]*log(outputs[j]) + (1.0 - targets[j])*log(1.0 - outputs[j]);
442  }
443  }
444 
445  return(generalization_performance);
446 }
447 
448 
449 // double calculate_minimum_generalization_performance(void) method
450 
453 
455 {
456  return(0.0);
457 }
458 
459 
460 // Vector<double> calculate_gradient(void) const
461 
465 
467 {
468  #ifndef NDEBUG
469 
470  check();
471 
472  #endif
473 
474  // Neural network stuff
475 
476  const MultilayerPerceptron* multilayer_perceptron_pointer = neural_network_pointer->get_multilayer_perceptron_pointer();
477 
478  const size_t inputs_number = multilayer_perceptron_pointer->get_inputs_number();
479  const size_t outputs_number = multilayer_perceptron_pointer->get_outputs_number();
480 
481  const size_t layers_number = multilayer_perceptron_pointer->get_layers_number();
482 
483  const size_t neural_parameters_number = multilayer_perceptron_pointer->count_parameters_number();
484 
485  const bool has_conditions_layer = neural_network_pointer->has_conditions_layer();
486 
487  const ConditionsLayer* conditions_layer_pointer = has_conditions_layer ? neural_network_pointer->get_conditions_layer_pointer() : NULL;
488 
489 
490  #ifndef NDEBUG
491 
492  std::ostringstream buffer;
493 
494  const Matrix<double> target_data = data_set_pointer->arrange_target_data();
495 
496  if(target_data < 0.0)
497  {
498  buffer << "OpenNN Exception: CrossEntropyError class.\n"
499  << "Vector<double> calculate_gradient(void) const method.\n"
500  << "Target data must be equal or greater than zero.\n";
501 
502  throw std::logic_error(buffer.str());
503  }
504 
505  if(target_data > 1.0)
506  {
507  buffer << "OpenNN Exception: CrossEntropyError class.\n"
508  << "Vector<double> calculate_gradient(void) const method.\n"
509  << "Target data must be less or equal or than one.\n";
510 
511  throw std::logic_error(buffer.str());
512  }
513 
514  #endif
515 
516  // Neural network stuff
517 
518  Vector< Vector< Vector<double> > > first_order_forward_propagation(2);
519 
520  Vector<double> particular_solution;
521  Vector<double> homogeneous_solution;
522 
523  // Data set stuff
524 
525  const Instances& instances = data_set_pointer->get_instances();
526 
527  const size_t training_instances_number = instances.count_training_instances_number();
528 
529  const Vector<size_t> training_indices = instances.arrange_training_indices();
530 
531  size_t training_index;
532 
533  const Variables& variables = data_set_pointer->get_variables();
534 
535  const Vector<size_t> inputs_indices = variables.arrange_inputs_indices();
536  const Vector<size_t> targets_indices = variables.arrange_targets_indices();
537 
538  const MissingValues& missing_values = data_set_pointer->get_missing_values();
539 
540  Vector<double> inputs(inputs_number);
541  Vector<double> targets(outputs_number);
542 
543  // Sum squared error stuff
544 
545  Vector<double> output_objective_gradient(outputs_number);
546 
547  Vector< Vector<double> > layers_delta;
548 
549  Vector<double> point_gradient(neural_parameters_number, 0.0);
550 
551  Vector<double> gradient(neural_parameters_number, 0.0);
552 
553  for(size_t i = 0; i < training_instances_number; i++)
554  {
555  training_index = training_indices[i];
556 
557  if(missing_values.has_missing_values(training_index))
558  {
559  continue;
560  }
561 
562  inputs = data_set_pointer->get_instance(training_index, inputs_indices);
563 
564  targets = data_set_pointer->get_instance(training_index, targets_indices);
565 
566  first_order_forward_propagation = multilayer_perceptron_pointer->calculate_first_order_forward_propagation(inputs);
567 
568  const Vector< Vector<double> >& layers_activation = first_order_forward_propagation[0];
569  const Vector< Vector<double> >& layers_activation_derivative = first_order_forward_propagation[1];
570 
571  if(!has_conditions_layer)
572  {
573  const Vector<double>& outputs = layers_activation[layers_number-1];
574 
575  for(size_t j = 0; j < outputs_number; j++)
576  {
577  //output_objective_gradient[j] = -targets[j]/outputs[j]
578  //+ (1.0 - targets[j])*(1.0 - outputs[j]);
579 
580  output_objective_gradient[j]
581  = -targets[j]/outputs[j] + (1.0 - targets[j])/(1.0 - outputs[j]);
582 
583  }
584 
585  layers_delta = calculate_layers_delta(layers_activation_derivative, output_objective_gradient);
586  }
587  else
588  {
589  particular_solution = conditions_layer_pointer->calculate_particular_solution(inputs);
590  homogeneous_solution = conditions_layer_pointer->calculate_homogeneous_solution(inputs);
591 
592  const Vector<double>& outputs = particular_solution + homogeneous_solution*layers_activation[layers_number-1];
593 
594  for(size_t j = 0; j < outputs_number; j++)
595  {
596 // output_objective_gradient[j]
597 // = -targets[j]/outputs[j] + (1.0 - targets[j])*(1.0 - outputs[j]);
598 
599  output_objective_gradient[j]
600  = -targets[j]/outputs[j] + (1.0 - targets[j])*(1.0 - outputs[j]);
601 
602  }
603 
604  layers_delta = calculate_layers_delta(layers_activation_derivative, homogeneous_solution, output_objective_gradient);
605  }
606 
607  point_gradient = calculate_point_gradient(inputs, layers_activation, layers_delta);
608 
609  gradient += point_gradient;
610  }
611 
612  return(gradient);
613 }
614 
615 
616 // Matrix<double> calculate_Hessian(void) const
617 
619 
621 {
622  Matrix<double> objective_Hessian;
623 
624  return(objective_Hessian);
625 }
626 
627 
628 // std::string write_performance_term_type(void) const method
629 
631 
633 {
634  return("CROSS_ENTROPY_ERROR");
635 }
636 
637 
638 // tinyxml2::XMLDocument* to_XML(void) const method
639 
642 
643 tinyxml2::XMLDocument* CrossEntropyError::to_XML(void) const
644 {
645  std::ostringstream buffer;
646 
647  tinyxml2::XMLDocument* document = new tinyxml2::XMLDocument;
648 
649  // Cross entropy error
650 
651  tinyxml2::XMLElement* cross_entropy_error_element = document->NewElement("CrossEntropyError");
652 
653  document->InsertFirstChild(cross_entropy_error_element);
654 
655  // Display
656 
657  {
658  tinyxml2::XMLElement* display_element = document->NewElement("Display");
659  cross_entropy_error_element->LinkEndChild(display_element);
660 
661  buffer.str("");
662  buffer << display;
663 
664  tinyxml2::XMLText* display_text = document->NewText(buffer.str().c_str());
665  display_element->LinkEndChild(display_text);
666  }
667 
668  return(document);
669 }
670 
671 
672 // void from_XML(const tinyxml2::XMLDocument&) method
673 
676 
677 void CrossEntropyError::from_XML(const tinyxml2::XMLDocument& document)
678 {
679  const tinyxml2::XMLElement* root_element = document.FirstChildElement("CrossEntropyError");
680 
681  if(!root_element)
682  {
683  std::ostringstream buffer;
684 
685  buffer << "OpenNN Exception: CrossEntropyError class.\n"
686  << "void from_XML(const tinyxml2::XMLDocument&) method.\n"
687  << "Cross entropy error element is NULL.\n";
688 
689  throw std::logic_error(buffer.str());
690  }
691 
692  // Display
693  {
694  const tinyxml2::XMLElement* display_element = root_element->FirstChildElement("Display");
695 
696  if(display_element)
697  {
698  const std::string new_display_string = display_element->GetText();
699 
700  try
701  {
702  set_display(new_display_string != "0");
703  }
704  catch(const std::logic_error& e)
705  {
706  std::cout << e.what() << std::endl;
707  }
708  }
709  }
710 }
711 
712 }
713 
714 
715 // OpenNN: Open Neural Networks Library.
716 // Copyright (c) 2005-2015 Roberto Lopez.
717 //
718 // This library is free software; you can redistribute it and/or
719 // modify it under the terms of the GNU Lesser General Public
720 // License as published by the Free Software Foundation; either
721 // version 2.1 of the License, or any later version.
722 //
723 // This library is distributed in the hope that it will be useful,
724 // but WITHOUT ANY WARRANTY; without even the implied warranty of
725 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
726 // Lesser General Public License for more details.
727 
728 // You should have received a copy of the GNU Lesser General Public
729 // License along with this library; if not, write to the Free Software
730 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
double calculate_minimum_generalization_performance(void)
const ProbabilisticMethod & get_probabilistic_method(void) const
const Variables & get_variables(void) const
Returns a constant reference to the variables object composing this data set object.
Definition: data_set.cpp:202
Matrix< double > arrange_target_data(void) const
Definition: data_set.cpp:572
MathematicalModel * mathematical_model_pointer
Pointer to a mathematical model object.
size_t count_training_instances_number(void) const
Returns the number of instances in the data set which will be used for training.
Definition: instances.cpp:387
Vector< double > calculate_gradient(void) const
size_t get_inputs_number(void) const
Returns the number of inputs to the multilayer perceptron.
Vector< double > calculate_point_gradient(const Vector< double > &, const Vector< Vector< double > > &, const Vector< Vector< double > > &) const
size_t get_layers_number(void) const
Returns the number of layers in the multilayer perceptron.
void from_XML(const tinyxml2::XMLDocument &)
bool has_missing_values(void) const
size_t get_outputs_number(void) const
Returns the number of outputs neurons in the multilayer perceptron.
Vector< double > calculate_outputs(const Vector< double > &) const
double calculate_generalization_performance(void) const
ProbabilisticLayer * get_probabilistic_layer_pointer(void) const
Returns a pointer to the probabilistic layer composing this neural network.
const MissingValues & get_missing_values(void) const
Returns a reference to the missing values object in the data set.
Definition: data_set.cpp:275
bool operator==(const CrossEntropyError &) const
void set_display(const bool &)
std::string write_performance_term_type(void) const
Returns a string with the name of the cross entropy error performance type, "CROSS_ENTROPY_ERROR".
Vector< size_t > arrange_targets_indices(void) const
Returns the indices of the target variables.
Definition: variables.cpp:519
Vector< size_t > arrange_training_indices(void) const
Returns the indices of the instances which will be used for training.
Definition: instances.cpp:489
size_t count_generalization_instances_number(void) const
Returns the number of instances in the data set which will be used for generalization.
Definition: instances.cpp:409
Vector< double > get_instance(const size_t &) const
Definition: data_set.cpp:684
bool has_conditions_layer(void) const
MultilayerPerceptron * get_multilayer_perceptron_pointer(void) const
Returns a pointer to the multilayer perceptron composing this neural network.
virtual Vector< double > calculate_homogeneous_solution(const Vector< double > &) const
Returns the homogeneous solution for applying boundary conditions.
NeuralNetwork * neural_network_pointer
Pointer to a multilayer perceptron object.
Matrix< double > calculate_Hessian(void) const
virtual Vector< double > calculate_particular_solution(const Vector< double > &) const
Returns the particular solution for applying boundary conditions.
bool display
Display messages to screen.
ConditionsLayer * get_conditions_layer_pointer(void) const
Returns a pointer to the conditions layer composing this neural network.
size_t count_inputs_number(void) const
Returns the number of input variables of the data set.
Definition: variables.cpp:249
CrossEntropyError & operator=(const CrossEntropyError &)
virtual ~CrossEntropyError(void)
Destructor.
double calculate_performance(void) const
Vector< Vector< Vector< double > > > calculate_first_order_forward_propagation(const Vector< double > &) const
DataSet * data_set_pointer
Pointer to a data set object.
size_t count_targets_number(void) const
Returns the number of target variables of the data set.
Definition: variables.cpp:271
tinyxml2::XMLDocument * to_XML(void) const
Vector< Vector< double > > calculate_layers_delta(const Vector< Vector< double > > &, const Vector< double > &) const
Vector< size_t > arrange_generalization_indices(void) const
Returns the indices of the instances which will be used for generalization.
Definition: instances.cpp:516
ProbabilisticMethod
Enumeration of available methods for interpreting variables as probabilities.
Vector< size_t > arrange_inputs_indices(void) const
Returns the indices of the input variables.
Definition: variables.cpp:493
const Instances & get_instances(void) const
Returns a constant reference to the instances object composing this data set object.
Definition: data_set.cpp:222
size_t count_parameters_number(void) const
Returns the number of parameters (biases and synaptic weights) in the multilayer perceptron.