OpenNN  2.2
Open Neural Networks Library
Public Types | Public Member Functions | Static Public Member Functions | Private Member Functions | Private Attributes | List of all members
OpenNN::DataSet Class Reference

#include <data_set.h>

Public Types

enum  Separator { Space, Tab, Comma, Semicolon }
 
enum  ScalingUnscalingMethod { MinimumMaximum, MeanStandardDeviation }
 
enum  AngularUnits { Radians, Degrees }
 

Public Member Functions

 DataSet (void)
 
 DataSet (const Matrix< double > &)
 
 DataSet (const size_t &, const size_t &)
 
 DataSet (const size_t &, const size_t &, const size_t &)
 
 DataSet (const tinyxml2::XMLDocument &)
 
 DataSet (const std::string &)
 
 DataSet (const DataSet &)
 
virtual ~DataSet (void)
 
DataSetoperator= (const DataSet &)
 
bool operator== (const DataSet &) const
 
const std::string & get_data_file_name (void) const
 
const bool & get_header_line (void) const
 
const Separatorget_separator (void) const
 
std::string get_separator_string (void) const
 
std::string write_separator (void) const
 
const std::string & get_missing_values_label (void) const
 
const size_t & get_lags_number (void) const
 
const bool & get_autoassociation (void) const
 
const Vector< size_t > & get_angular_variables (void) const
 
const AngularUnitsget_angular_units (void) const
 
const MissingValuesget_missing_values (void) const
 
MissingValuesget_missing_values_pointer (void)
 
const Variablesget_variables (void) const
 
Variablesget_variables_pointer (void)
 
const Instancesget_instances (void) const
 
Instancesget_instances_pointer (void)
 
const bool & get_display (void) const
 
bool empty (void) const
 
const Matrix< double > & get_data (void) const
 
Matrix< double > arrange_training_data (void) const
 
Matrix< double > arrange_generalization_data (void) const
 
Matrix< double > arrange_testing_data (void) const
 
Matrix< double > arrange_input_data (void) const
 
Matrix< double > arrange_target_data (void) const
 
Matrix< double > arrange_training_input_data (void) const
 
Matrix< double > arrange_training_target_data (void) const
 
Matrix< double > get_generalization_input_data (void) const
 
Matrix< double > get_generalization_target_data (void) const
 
Matrix< double > arrange_testing_input_data (void) const
 
Matrix< double > arrange_testing_target_data (void) const
 
Vector< double > get_instance (const size_t &) const
 
Vector< double > get_instance (const size_t &, const Vector< size_t > &) const
 
Vector< double > get_variable (const size_t &) const
 
Vector< double > get_variable (const size_t &, const Vector< size_t > &) const
 
void set (void)
 
void set (const Matrix< double > &)
 
void set (const size_t &, const size_t &)
 
void set (const size_t &, const size_t &, const size_t &)
 
void set (const DataSet &)
 
void set (const tinyxml2::XMLDocument &)
 
void set (const std::string &)
 
void set_data (const Matrix< double > &)
 
void set_instances_number (const size_t &)
 
void set_variables_number (const size_t &)
 
void set_data_file_name (const std::string &)
 
void set_header_line (const bool &)
 
void set_separator (const Separator &)
 
void set_separator (const std::string &)
 
void set_missing_values_label (const std::string &)
 
void set_lags_number (const size_t &)
 
void set_autoassociation (const bool &)
 
void set_angular_variables (const Vector< size_t > &)
 
void set_angular_units (AngularUnits &)
 
void set_display (const bool &)
 
void set_default (void)
 
void set_instance (const size_t &, const Vector< double > &)
 
void add_instance (const Vector< double > &)
 
void subtract_instance (const size_t &)
 
void append_variable (const Vector< double > &)
 
void subtract_variable (const size_t &)
 
Vector< size_t > unuse_constant_variables (void)
 
Vector< size_t > unuse_repeated_instances (void)
 
void initialize_data (const double &)
 
void randomize_data_uniform (const double &minimum=-1.0, const double &maximum=1.0)
 
void randomize_data_normal (const double &mean=0.0, const double &standard_deviation=1.0)
 
Vector< Statistics< double > > calculate_data_statistics (void) const
 
Matrix< double > calculate_data_statistics_matrix (void) const
 
Vector< Statistics< double > > calculate_training_instances_statistics (void) const
 
Vector< Statistics< double > > calculate_generalization_instances_statistics (void) const
 
Vector< Statistics< double > > calculate_testing_instances_statistics (void) const
 
Vector< Statistics< double > > calculate_inputs_statistics (void) const
 
Vector< Statistics< double > > calculate_targets_statistics (void) const
 
Vector< double > calculate_training_target_data_mean (void) const
 
Vector< double > calculate_generalization_target_data_mean (void) const
 
Vector< double > calculate_testing_target_data_mean (void) const
 
Matrix< double > calculate_linear_correlations (void) const
 
Vector< Histogram< double > > calculate_data_histograms (const size_t &=10) const
 
Vector< size_t > filter_data (const Vector< double > &, const Vector< double > &)
 
void scale_data_minimum_maximum (const Vector< Statistics< double > > &)
 
void scale_data_mean_standard_deviation (const Vector< Statistics< double > > &)
 
Vector< Statistics< double > > scale_data_minimum_maximum (void)
 
Vector< Statistics< double > > scale_data_mean_standard_deviation (void)
 
void scale_data (const std::string &, const Vector< Statistics< double > > &)
 
Vector< Statistics< double > > scale_data (const std::string &)
 
void scale_inputs_minimum_maximum (const Vector< Statistics< double > > &)
 
Vector< Statistics< double > > scale_inputs_minimum_maximum (void)
 
void scale_inputs_mean_standard_deviation (const Vector< Statistics< double > > &)
 
Vector< Statistics< double > > scale_inputs_mean_standard_deviation (void)
 
Vector< Statistics< double > > scale_inputs (const std::string &)
 
void scale_inputs (const std::string &, const Vector< Statistics< double > > &)
 
void scale_targets_minimum_maximum (const Vector< Statistics< double > > &)
 
Vector< Statistics< double > > scale_targets_minimum_maximum (void)
 
void scale_targets_mean_standard_deviation (const Vector< Statistics< double > > &)
 
Vector< Statistics< double > > scale_targets_mean_standard_deviation (void)
 
Vector< Statistics< double > > scale_targets (const std::string &)
 
void scale_targets (const std::string &, const Vector< Statistics< double > > &)
 
void unscale_data_minimum_maximum (const Vector< Statistics< double > > &)
 
void unscale_data_mean_standard_deviation (const Vector< Statistics< double > > &)
 
void unscale_inputs_minimum_maximum (const Vector< Statistics< double > > &)
 
void unscale_inputs_mean_standard_deviation (const Vector< Statistics< double > > &)
 
void unscale_targets_minimum_maximum (const Vector< Statistics< double > > &)
 
void unscale_targets_mean_standard_deviation (const Vector< Statistics< double > > &)
 
Vector< size_t > calculate_target_class_distribution (void) const
 
Vector< double > calculate_distances (void) const
 
void balance_data (const double &)
 
void balance_target_class_distribution (void)
 
std::string to_string (void) const
 
void print (void) const
 
void print_summary (void) const
 
tinyxml2::XMLDocument * to_XML (void) const
 
void from_XML (const tinyxml2::XMLDocument &)
 
void save (const std::string &) const
 
void load (const std::string &)
 
void print_data (void) const
 
void print_data_preview (void) const
 
void save_data (void) const
 
bool has_data (void) const
 
void load_data (void)
 
Vector< std::string > arrange_time_series_names (const Vector< std::string > &) const
 
Vector< std::string > arrange_autoassociation_names (const Vector< std::string > &) const
 
void convert_time_series (void)
 
void convert_autoassociation (void)
 
void convert_angular_variable_degrees (const size_t &)
 
void convert_angular_variable_radians (const size_t &)
 
void convert_angular_variables_degrees (const Vector< size_t > &)
 
void convert_angular_variables_radians (const Vector< size_t > &)
 
void convert_angular_variables (void)
 
void scrub_missing_values_unuse (void)
 
void scrub_missing_values_mean (void)
 
void scrub_missing_values (void)
 
size_t count_tokens (std::string &) const
 
Vector< std::string > get_tokens (const std::string &) const
 
bool is_numeric (const std::string &) const
 
void trim (std::string &) const
 
std::string get_trimmed (const std::string &) const
 
std::string prepend (const std::string &, const std::string &) const
 
bool is_numeric (const Vector< std::string > &) const
 
bool is_not_numeric (const Vector< std::string > &) const
 
bool is_mixed (const Vector< std::string > &) const
 

Static Public Member Functions

static ScalingUnscalingMethod get_scaling_unscaling_method (const std::string &)
 

Private Member Functions

size_t get_column_index (const Vector< Vector< std::string > > &, const size_t) const
 
void check_separator (const std::string &) const
 
size_t count_data_file_columns_number (void) const
 
void check_header_line (void)
 
Vector< std::string > read_header_line (void) const
 
void read_instance (const std::string &, const Vector< Vector< std::string > > &, const size_t &)
 
Vector< Vector< std::string > > set_from_data_file (void)
 
void read_from_data_file (const Vector< Vector< std::string > > &)
 

Private Attributes

std::string data_file_name
 
bool header_line
 
Separator separator
 
std::string missing_values_label
 
size_t lags_number
 
bool autoassociation
 
Vector< size_t > angular_variables
 
AngularUnits angular_units
 
Matrix< double > data
 
Variables variables
 
Instances instances
 
MissingValues missing_values
 
bool display
 

Detailed Description

This class represents the concept of data set for data modelling problems, such as function regression, pattern recognition and time series prediction. It basically consists of a data matrix plus a variables and an instances objects.

Definition at line 50 of file data_set.h.

Constructor & Destructor Documentation

OpenNN::DataSet::DataSet ( void  )
explicit

Default constructor. It creates a data set object with zero instances and zero inputs and target variables. It also initializes the rest of class members to their default values.

Definition at line 27 of file data_set.cpp.

OpenNN::DataSet::DataSet ( const Matrix< double > &  data)
explicit

Data constructor. It creates a data set object from a data matrix. It also initializes the rest of class members to their default values.

Parameters
dataData matrix.

Definition at line 41 of file data_set.cpp.

OpenNN::DataSet::DataSet ( const size_t &  new_variables_number,
const size_t &  new_instances_number 
)
explicit

Instances and variables number constructor. It creates a data set object with given instances and variables numbers. All the variables are set as inputs. It also initializes the rest of class members to their default values.

Parameters
new_variables_numberNumber of variables.
new_instances_numberNumber of instances in the data set.

Definition at line 58 of file data_set.cpp.

OpenNN::DataSet::DataSet ( const size_t &  new_inputs_number,
const size_t &  new_targets_number,
const size_t &  new_instances_number 
)
explicit

Instances number, input variables number and target variables number constructor. It creates a data set object with given instances and inputs and target variables numbers. It also initializes the rest of class members to their default values.

Parameters
new_inputs_numberNumber of input variables.
new_targets_numberNumber of target variables.
new_instances_numberNumber of instances in the data set.

Definition at line 75 of file data_set.cpp.

OpenNN::DataSet::DataSet ( const tinyxml2::XMLDocument &  data_set_document)
explicit

Sets the data set members from a XML document.

Parameters
data_set_documentTinyXML document containing the member data.

Definition at line 89 of file data_set.cpp.

OpenNN::DataSet::DataSet ( const std::string &  file_name)
explicit

File constructor. It creates a data set object by loading the object members from a XML-type file. Please mind about the file format. This is specified in the User's Guide.

Parameters
file_nameData set file name.

Definition at line 103 of file data_set.cpp.

OpenNN::DataSet::DataSet ( const DataSet other_data_set)

Copy constructor. It creates a copy of an existing inputs targets data set object.

Parameters
other_data_setData set object to be copied.

Definition at line 120 of file data_set.cpp.

Member Function Documentation

void OpenNN::DataSet::add_instance ( const Vector< double > &  instance)

Adds a new instance to the data matrix from a vector of real numbers. The size of that vector must be equal to the number of variables. Note that resizing is here necessary and therefore computationally expensive. All instances are also set for training.

Parameters
instanceInput and target values of the instance to be added.

Definition at line 1307 of file data_set.cpp.

void OpenNN::DataSet::append_variable ( const Vector< double > &  variable)

Appends a variable with given values to the data matrix.

Parameters
variableVector of values. The size must be equal to the number of instances.

Definition at line 1377 of file data_set.cpp.

Vector< std::string > OpenNN::DataSet::arrange_autoassociation_names ( const Vector< std::string > &  ) const

Returns a vector with the names arranged for autoassociation.

Todo:

Definition at line 3594 of file data_set.cpp.

Matrix< double > OpenNN::DataSet::arrange_generalization_data ( void  ) const

Returns a matrix with the generalization instances in the data set. The number of rows is the number of generalization instances. The number of columns is the number of variables.

Definition at line 520 of file data_set.cpp.

Matrix< double > OpenNN::DataSet::arrange_input_data ( void  ) const

Returns a matrix with the input variables in the data set. The number of rows is the number of instances. The number of columns is the number of input variables.

Definition at line 555 of file data_set.cpp.

Matrix< double > OpenNN::DataSet::arrange_target_data ( void  ) const

Returns a matrix with the target variables in the data set. The number of rows is the number of instances. The number of columns is the number of target variables.

Definition at line 572 of file data_set.cpp.

Matrix< double > OpenNN::DataSet::arrange_testing_data ( void  ) const

Returns a matrix with the testing instances in the data set. The number of rows is the number of testing instances. The number of columns is the number of variables.

Definition at line 538 of file data_set.cpp.

Matrix< double > OpenNN::DataSet::arrange_testing_input_data ( void  ) const

Returns a matrix with testing instances and input variables. The number of rows is the number of testing instances. The number of columns is the number of input variables.

Definition at line 653 of file data_set.cpp.

Matrix< double > OpenNN::DataSet::arrange_testing_target_data ( void  ) const

Returns a matrix with testing instances and target variables. The number of rows is the number of testing instances. The number of columns is the number of target variables.

Definition at line 669 of file data_set.cpp.

Vector< std::string > OpenNN::DataSet::arrange_time_series_names ( const Vector< std::string > &  ) const

Returns a vector with the names arranged for time series prediction, according to the number of lags.

Todo:

Definition at line 3563 of file data_set.cpp.

Matrix< double > OpenNN::DataSet::arrange_training_data ( void  ) const

Returns a matrix with the training instances in the data set. The number of rows is the number of training instances. The number of columns is the number of variables.

Definition at line 502 of file data_set.cpp.

Matrix< double > OpenNN::DataSet::arrange_training_input_data ( void  ) const

Returns a matrix with training instances and input variables. The number of rows is the number of training instances. The number of columns is the number of input variables.

Definition at line 589 of file data_set.cpp.

Matrix< double > OpenNN::DataSet::arrange_training_target_data ( void  ) const

Returns a matrix with training instances and target variables. The number of rows is the number of training instances. The number of columns is the number of target variables.

Definition at line 605 of file data_set.cpp.

void OpenNN::DataSet::balance_data ( const double &  )
Todo:
This method is not implemented.

Definition at line 3896 of file data_set.cpp.

void OpenNN::DataSet::balance_target_class_distribution ( void  )
Todo:
This method is not implemented.

Definition at line 3998 of file data_set.cpp.

Vector< Histogram< double > > OpenNN::DataSet::calculate_data_histograms ( const size_t &  bins_number = 10) const

Returns a histogram for each variable with a given number of bins. The default number of bins is 10. The format is a vector of subvectors of subsubvectors. The size of the vector is the number of variables. The size of the subvectors is 2 (centers and frequencies). The size of the subsubvectors is the number of bins.

Parameters
bins_numberNumber of bins.

Definition at line 1558 of file data_set.cpp.

Vector< Statistics< double > > OpenNN::DataSet::calculate_data_statistics ( void  ) const

Returns a vector of vectors containing some basic statistics of all the variables in the data set. The size of this vector is four. The subvectors are:

  • Minimum.
  • Maximum.
  • Mean.
  • Standard deviation.

Definition at line 1577 of file data_set.cpp.

Matrix< double > OpenNN::DataSet::calculate_data_statistics_matrix ( void  ) const

Returns all the variables statistics from a single matrix. The number of rows is the number of variables. The number of columns is four (minimum, maximum, mean and standard deviation).

Definition at line 1591 of file data_set.cpp.

Vector< double > OpenNN::DataSet::calculate_distances ( void  ) const

Returns a normalized distance between each instance and the mean instance. The size of this vector is the number of instances.

Definition at line 3864 of file data_set.cpp.

Vector< Statistics< double > > OpenNN::DataSet::calculate_generalization_instances_statistics ( void  ) const

Returns a vector of vectors containing some basic statistics of all variables on the generalization instances. The size of this vector is four. The subvectors are:

  • Generalization data mean.
  • Generalization data standard deviation.
  • Generalization data minimum.
  • Generalization data maximum.

Definition at line 1642 of file data_set.cpp.

Vector< Statistics< double > > OpenNN::DataSet::calculate_inputs_statistics ( void  ) const

Returns a vector of vectors with some basic statistics of the input variables on all instances. The size of this vector is four. The subvectors are:

  • Input variables mean.
  • Input variables standard deviation.
  • Input variables minimum.
  • Input variables maximum.

Definition at line 1684 of file data_set.cpp.

Matrix< double > OpenNN::DataSet::calculate_linear_correlations ( void  ) const

Calculates the linear correlations between all outputs and all inputs. It returns a matrix with number of rows the targets number and number of columns the inputs number. Each element contains the linear correlation between a single target and a single output.

Definition at line 1769 of file data_set.cpp.

Vector< size_t > OpenNN::DataSet::calculate_target_class_distribution ( void  ) const
Todo:
This method is not implemented.

Returns a vector containing the number of instances of each class in the data set. If the number of target variables is one then the number of classes is two. If the number of target variables is greater than one then the number of classes is equal to the number of target variables.

Definition at line 3789 of file data_set.cpp.

Vector< Statistics< double > > OpenNN::DataSet::calculate_targets_statistics ( void  ) const

Returns a vector of vectors with some basic statistics of the target variables on all instances. The size of this vector is four. The subvectors are:

  • Target variables mean.
  • Target variables standard deviation.
  • Target variables minimum.
  • Target variables maximum.

Definition at line 1705 of file data_set.cpp.

Vector< Statistics< double > > OpenNN::DataSet::calculate_testing_instances_statistics ( void  ) const

Returns a vector of vectors containing some basic statistics of all variables on the testing instances. The size of this vector is four. The subvectors are:

  • Testing data mean.
  • Testing data standard deviation.
  • Testing data minimum.
  • Testing data maximum.

Definition at line 1663 of file data_set.cpp.

Vector< Statistics< double > > OpenNN::DataSet::calculate_training_instances_statistics ( void  ) const

Returns a vector of vectors containing some basic statistics of all variables on the training instances. The size of this vector is four. The subvectors are:

  • Training data mean.
  • Training data standard deviation.
  • Training data minimum.
  • Training data maximum.

Definition at line 1621 of file data_set.cpp.

void OpenNN::DataSet::check_header_line ( void  )
private

Verifies that the data file has a header line. All elements in a header line must be strings. This method can change the value of the header line member. It throws an exception if some inconsistencies are found.

Definition at line 3088 of file data_set.cpp.

void OpenNN::DataSet::check_separator ( const std::string &  line) const
private

Verifies that a given line in the data file contains the separator characer. If the line does not contain the separator, this method throws an exception.

Parameters
lineData file line.

Definition at line 3018 of file data_set.cpp.

void OpenNN::DataSet::convert_angular_variable_degrees ( const size_t &  variable_index)

Replaces a given angular variable expressed in degrees by the sinus and cosinus of that variable. This solves the discontinuity associated with angular variables. Note that this method modifies the number of variables.

Parameters
variable_indexIndex of angular variable.

Definition at line 4142 of file data_set.cpp.

void OpenNN::DataSet::convert_angular_variable_radians ( const size_t &  variable_index)

Replaces a given angular variable expressed in radians by the sinus and cosinus of that variable. This solves the discontinuity associated with angular variables. Note that this method modifies the number of variables.

Parameters
variable_indexIndex of angular variable.

Definition at line 4188 of file data_set.cpp.

void OpenNN::DataSet::convert_angular_variables ( void  )

Replaces a given set of angular variables by the sinus and cosinus of that variable, according to the angular units used. This solves the discontinuity associated with angular variables. Note that this method modifies the number of variables.

Definition at line 4329 of file data_set.cpp.

void OpenNN::DataSet::convert_angular_variables_degrees ( const Vector< size_t > &  indices)

Replaces a given set of angular variables expressed in degrees by the sinus and cosinus of that variable. This solves the discontinuity associated with angular variables. Note that this method modifies the number of variables.

Parameters
indicesIndices of angular variables.

Definition at line 4234 of file data_set.cpp.

void OpenNN::DataSet::convert_angular_variables_radians ( const Vector< size_t > &  indices)

Replaces a given set of angular variables expressed in radians by the sinus and cosinus of that variable. This solves the discontinuity associated with angular variables. Note that this method modifies the number of variables.

Parameters
indicesIndices of angular variables.

Definition at line 4282 of file data_set.cpp.

void OpenNN::DataSet::convert_autoassociation ( void  )

Arranges the data set for autoassociation.

Todo:

Definition at line 3629 of file data_set.cpp.

void OpenNN::DataSet::convert_time_series ( void  )

Arranges an input-target matrix from a time series matrix, according to the number of lags.

Todo:

Definition at line 3607 of file data_set.cpp.

size_t OpenNN::DataSet::count_data_file_columns_number ( void  ) const
private

Returns the number of tokens in the first line of the data file. That will be interpreted as the number of columns in the data file.

Definition at line 3045 of file data_set.cpp.

size_t OpenNN::DataSet::count_tokens ( std::string &  str) const

Returns the number of strings delimited by separator. If separator does not match anywhere in the string, this method returns 0.

Parameters
strString to be tokenized.

Definition at line 4446 of file data_set.cpp.

Vector< size_t > OpenNN::DataSet::filter_data ( const Vector< double > &  minimums,
const Vector< double > &  maximums 
)

Unuses those instances with values outside a defined range.

Parameters
minimumsVector of minimum values in the range. The size must be equal to the number of variables.
maximumsVector of maximum values in the range. The size must be equal to the number of variables.

Definition at line 4072 of file data_set.cpp.

void OpenNN::DataSet::from_XML ( const tinyxml2::XMLDocument &  data_set_document)

Deserializes a TinyXML document into this data set object.

Parameters
data_set_documentXML document containing the member data.

Definition at line 2601 of file data_set.cpp.

const Vector< size_t > & OpenNN::DataSet::get_angular_variables ( void  ) const

Returns the indices of the angular variables in the data set. When loading a data set with angular variables, a transformation of the data will be performed in order to avoid discontinuities (from 359 degrees to 1 degree).

Definition at line 452 of file data_set.cpp.

const bool & OpenNN::DataSet::get_autoassociation ( void  ) const

Returns true if the data set will be used for an autoassociation application, and false otherwise. In an autoassociation problem the target data is equal to the input data.

Definition at line 440 of file data_set.cpp.

size_t OpenNN::DataSet::get_column_index ( const Vector< Vector< std::string > > &  nominal_labels,
const size_t  column_index 
) const
private

Returns the index of a variable when reading the data file.

Parameters
nominal_labelsValues of all nominal variables in the data file.
column_indexIndex of column.

Definition at line 2992 of file data_set.cpp.

const Matrix< double > & OpenNN::DataSet::get_data ( void  ) const

Returns a reference to the data matrix in the data set. The number of rows is equal to the number of instances. The number of columns is equal to the number of variables.

Definition at line 265 of file data_set.cpp.

const bool & OpenNN::DataSet::get_display ( void  ) const

Returns true if messages from this class can be displayed on the screen, or false if messages from this class can't be displayed on the screen.

Definition at line 243 of file data_set.cpp.

Matrix< double > OpenNN::DataSet::get_generalization_input_data ( void  ) const

Returns a matrix with generalization instances and input variables. The number of rows is the number of generalization instances. The number of columns is the number of input variables.

Definition at line 621 of file data_set.cpp.

Matrix< double > OpenNN::DataSet::get_generalization_target_data ( void  ) const

Returns a matrix with generalization instances and target variables. The number of rows is the number of generalization instances. The number of columns is the number of target variables.

Definition at line 637 of file data_set.cpp.

Vector< double > OpenNN::DataSet::get_instance ( const size_t &  i) const

Returns the inputs and target values of a single instance in the data set.

Parameters
iIndex of the instance.

Definition at line 684 of file data_set.cpp.

Vector< double > OpenNN::DataSet::get_instance ( const size_t &  instance_index,
const Vector< size_t > &  variables_indices 
) const

Returns the inputs and target values of a single instance in the data set.

Parameters
instance_indexIndex of the instance.
variables_indicesIndices of the variables.

Definition at line 717 of file data_set.cpp.

DataSet::ScalingUnscalingMethod OpenNN::DataSet::get_scaling_unscaling_method ( const std::string &  scaling_unscaling_method)
static

Returns a value of the scaling-unscaling method enumeration from a string containing the name of that method.

Parameters
scaling_unscaling_methodString with the name of the scaling and unscaling method.

Definition at line 473 of file data_set.cpp.

Vector< std::string > OpenNN::DataSet::get_tokens ( const std::string &  str) const

Splits the string into substrings (tokens) wherever separator occurs, and returns a vector with those strings. If separator does not match anywhere in the string, this method returns a single-element list containing this string.

Parameters
strString to be tokenized.

Definition at line 4497 of file data_set.cpp.

std::string OpenNN::DataSet::get_trimmed ( const std::string &  str) const

Returns a string that has whitespace removed from the start and the end. This includes the ASCII characters "\t", "\n", "\v", "\f", "\r", and " ".

Parameters
strString to be checked.

Definition at line 4585 of file data_set.cpp.

Vector< double > OpenNN::DataSet::get_variable ( const size_t &  i) const

Returns all the instances of a single variable in the data set.

Parameters
iIndex of the variable.

Definition at line 749 of file data_set.cpp.

Vector< double > OpenNN::DataSet::get_variable ( const size_t &  variable_index,
const Vector< size_t > &  instances_indices 
) const

Returns a given set of instances of a single variable in the data set.

Parameters
variable_indexIndex of the variable.
instances_indicesIndices of the instances.

Definition at line 782 of file data_set.cpp.

bool OpenNN::DataSet::has_data ( void  ) const

Returns true if the data matrix is not empty (it has not been loaded), and false otherwise.

Definition at line 4051 of file data_set.cpp.

void OpenNN::DataSet::initialize_data ( const double &  new_value)

Initializes the data matrix with a given value.

Parameters
new_valueInitialization value.

Definition at line 2447 of file data_set.cpp.

bool OpenNN::DataSet::is_mixed ( const Vector< std::string > &  v) const

Returns true if some the elements in a string list are numeric and some others are not numeric.

Parameters
vString list to be checked.

Definition at line 4660 of file data_set.cpp.

bool OpenNN::DataSet::is_not_numeric ( const Vector< std::string > &  v) const

Returns true if none element in a string list is numeric, and false otherwise.

Parameters
vString list to be checked.

Definition at line 4641 of file data_set.cpp.

bool OpenNN::DataSet::is_numeric ( const std::string &  str) const

Returns true if the string passed as argument represents a number, and false otherwise.

Parameters
strString to be checked.

Definition at line 4542 of file data_set.cpp.

bool OpenNN::DataSet::is_numeric ( const Vector< std::string > &  v) const

Returns true if all the elements in a string list are numeric, and false otherwise.

Parameters
vString list to be checked.

Definition at line 4622 of file data_set.cpp.

void OpenNN::DataSet::load ( const std::string &  file_name)

Loads the members of a data set object from a XML-type file:

  • Instances number.
  • Training instances number.
  • Training instances indices.
  • Generalization instances number.
  • Generalization instances indices.
  • Testing instances number.
  • Testing instances indices.
  • Input variables number.
  • Input variables indices.
  • Target variables number.
  • Target variables indices.
  • Input variables name.
  • Target variables name.
  • Input variables description.
  • Target variables description.
  • Display.
  • Data.

Please mind about the file format. This is specified in the User's Guide.

Parameters
file_nameName of data set XML-type file.

Definition at line 2880 of file data_set.cpp.

DataSet & OpenNN::DataSet::operator= ( const DataSet other_data_set)

Assignment operator. It assigns to the current object the members of an existing data set object.

Parameters
other_data_setData set object to be assigned.

Definition at line 143 of file data_set.cpp.

bool OpenNN::DataSet::operator== ( const DataSet other_data_set) const

Equal to operator. It compares this object with another object of the same class. It returns true if the members of the two objects have the same values, and false otherwise. @ param other_data_set Data set object to be compared with.

Definition at line 179 of file data_set.cpp.

std::string OpenNN::DataSet::prepend ( const std::string &  pre,
const std::string &  str 
) const

Prepends the string pre to the beginning of the string str and returns the whole string.

Parameters
preString to be prepended.
stroriginal string.

Definition at line 4607 of file data_set.cpp.

void OpenNN::DataSet::print_data_preview ( void  ) const

Prints to the sceen a preview of the data matrix, i.e., the first, second and last instances

Definition at line 2917 of file data_set.cpp.

void OpenNN::DataSet::randomize_data_normal ( const double &  mean = 0.0,
const double &  standard_deviation = 1.0 
)

Initializes the data matrix with random values chosen from a normal distribution with given mean and standard deviation.

Definition at line 2469 of file data_set.cpp.

void OpenNN::DataSet::randomize_data_uniform ( const double &  minimum = -1.0,
const double &  maximum = 1.0 
)

Initializes the data matrix with random values chosen from a uniform distribution with given minimum and maximum.

Definition at line 2458 of file data_set.cpp.

void OpenNN::DataSet::read_instance ( const std::string &  line,
const Vector< Vector< std::string > > &  nominal_labels,
const size_t &  instance_index 
)
private

Sets the values of a single instance in the data matrix from a line in the data file.

Parameters
lineData file line.
nominal_labelsValues of all nominal variables in the data file.
instance_indexIndex of instance.

Definition at line 3210 of file data_set.cpp.

void OpenNN::DataSet::save ( const std::string &  file_name) const

Saves the members of a data set object to a XML-type file in an XML-type format.

Parameters
file_nameName of data set XML-type file.

Definition at line 2845 of file data_set.cpp.

void OpenNN::DataSet::scale_data ( const std::string &  scaling_unscaling_method_string,
const Vector< Statistics< double > > &  data_statistics 
)

Scales the data matrix. The method to be used is that in the scaling and unscaling method variable.

Parameters
scaling_unscaling_method_stringString with the name of the scaling-unscaling method (MinimumMaximum or MeanStandardDeviation).
data_statisticsVector of statistics structures for all the variables in the data set. The size of that vector must be equal to the number of variables.

Definition at line 1944 of file data_set.cpp.

Vector< Statistics< double > > OpenNN::DataSet::scale_data ( const std::string &  scaling_unscaling_method)

Calculates the data statistics, scales the data with that values and returns the statistics. The method to be used is that in the scaling and unscaling method variable.

Definition at line 1980 of file data_set.cpp.

void OpenNN::DataSet::scale_data_mean_standard_deviation ( const Vector< Statistics< double > > &  data_statistics)

Scales the data matrix with given mean and standard deviation values. It updates the data matrix.

Parameters
data_statisticsVector of statistics structures for all the variables in the data set. The size of that vector must be equal to the number of variables.

Definition at line 1817 of file data_set.cpp.

Vector< Statistics< double > > OpenNN::DataSet::scale_data_mean_standard_deviation ( void  )

Scales the data using the mean and standard deviation method, and the mean and standard deviation values calculated from the data matrix. It also returns the statistics from all columns.

Definition at line 1879 of file data_set.cpp.

void OpenNN::DataSet::scale_data_minimum_maximum ( const Vector< Statistics< double > > &  data_statistics)

Scales the data matrix with given minimum and maximum values. It updates the data matrix.

Parameters
data_statisticsVector of statistics structures for all the variables in the data set. The size of that vector must be equal to the number of variables.

Definition at line 1896 of file data_set.cpp.

Vector< Statistics< double > > OpenNN::DataSet::scale_data_minimum_maximum ( void  )

Scales the data using the minimum and maximum method, and the minimum and maximum values calculated from the data matrix. It also returns the statistics from all columns.

Definition at line 1863 of file data_set.cpp.

Vector< Statistics< double > > OpenNN::DataSet::scale_inputs ( const std::string &  scaling_unscaling_method)

Calculates the input and target variables statistics. Then it scales the input variables with that values. The method to be used is that in the scaling and unscaling method variable. Finally, it returns the statistics.

Definition at line 2118 of file data_set.cpp.

void OpenNN::DataSet::scale_inputs ( const std::string &  scaling_unscaling_method,
const Vector< Statistics< double > > &  inputs_statistics 
)

Calculates the input and target variables statistics. Then it scales the input variables with that values. The method to be used is that in the scaling and unscaling method variable.

Definition at line 2155 of file data_set.cpp.

void OpenNN::DataSet::scale_inputs_mean_standard_deviation ( const Vector< Statistics< double > > &  inputs_statistics)

Scales the input variables with given mean and standard deviation values. It updates the input variables of the data matrix.

Parameters
inputs_statisticsVector of statistics structures for the input variables. The size of that vector must be equal to the number of inputs.

Definition at line 2022 of file data_set.cpp.

Vector< Statistics< double > > OpenNN::DataSet::scale_inputs_mean_standard_deviation ( void  )

Scales the input variables with the calculated mean and standard deviation values from the data matrix. It updates the input variables of the data matrix. It also returns a vector of vectors with the variables statistics.

Definition at line 2036 of file data_set.cpp.

void OpenNN::DataSet::scale_inputs_minimum_maximum ( const Vector< Statistics< double > > &  inputs_statistics)

Scales the input variables with given minimum and maximum values. It updates the input variables of the data matrix.

Parameters
inputs_statisticsVector of statistics structures for all the inputs in the data set. The size of that vector must be equal to the number of input variables.

Definition at line 2070 of file data_set.cpp.

Vector< Statistics< double > > OpenNN::DataSet::scale_inputs_minimum_maximum ( void  )

Scales the input variables with the calculated minimum and maximum values from the data matrix. It updates the input variables of the data matrix. It also returns a vector of vectors with the minimum and maximum values of the input variables.

Definition at line 2084 of file data_set.cpp.

Vector< Statistics< double > > OpenNN::DataSet::scale_targets ( const std::string &  scaling_unscaling_method)

Calculates the input and target variables statistics. Then it scales the target variables with that values. The method to be used is that in the scaling and unscaling method variable. Finally, it returns the statistics.

Definition at line 2289 of file data_set.cpp.

void OpenNN::DataSet::scale_targets ( const std::string &  scaling_unscaling_method,
const Vector< Statistics< double > > &  targets_statistics 
)

It scales the input variables with that values. The method to be used is that in the scaling and unscaling method variable.

Definition at line 2325 of file data_set.cpp.

void OpenNN::DataSet::scale_targets_mean_standard_deviation ( const Vector< Statistics< double > > &  targets_statistics)

Scales the target variables with given mean and standard deviation values. It updates the target variables of the data matrix.

Parameters
targets_statisticsVector of statistics structures for all the targets in the data set. The size of that vector must be equal to the number of target variables.

Definition at line 2193 of file data_set.cpp.

Vector< Statistics< double > > OpenNN::DataSet::scale_targets_mean_standard_deviation ( void  )

Scales the target variables with the calculated mean and standard deviation values from the data matrix. It updates the target variables of the data matrix. It also returns a vector of statistics structures with the basic statistics of all the variables.

Definition at line 2207 of file data_set.cpp.

void OpenNN::DataSet::scale_targets_minimum_maximum ( const Vector< Statistics< double > > &  targets_statistics)

Scales the target variables with given minimum and maximum values. It updates the target variables of the data matrix.

Parameters
targets_statisticsVector of statistics structures for all the targets in the data set. The size of that vector must be equal to the number of target variables.

Definition at line 2241 of file data_set.cpp.

Vector< Statistics< double > > OpenNN::DataSet::scale_targets_minimum_maximum ( void  )

Scales the target variables with the calculated minimum and maximum values from the data matrix. It updates the target variables of the data matrix. It also returns a vector of vectors with the statistics of the input target variables.

Definition at line 2272 of file data_set.cpp.

void OpenNN::DataSet::scrub_missing_values ( void  )

General method for dealing with missing values. It switches among the different scrubbing methods available, according to the corresponding value in the missing values object.

Definition at line 4407 of file data_set.cpp.

void OpenNN::DataSet::set ( const Matrix< double > &  new_data)

Sets all variables from a data matrix.

Parameters
new_dataData matrix.

Definition at line 833 of file data_set.cpp.

void OpenNN::DataSet::set ( const size_t &  new_variables_number,
const size_t &  new_instances_number 
)

Sets new numbers of instances and variables in the inputs targets data set. All the instances are set for training. All the variables are set as inputs.

Parameters
new_variables_numberNumber of variables.
new_instances_numberNumber of instances.

Definition at line 859 of file data_set.cpp.

void OpenNN::DataSet::set ( const size_t &  new_inputs_number,
const size_t &  new_targets_number,
const size_t &  new_instances_number 
)

Sets new numbers of instances and inputs and target variables in the data set. The variables in the data set are the number of inputs plus the number of targets.

Parameters
new_inputs_numberNumber of input variables.
new_targets_numberNumber of target variables.
new_instances_numberNumber of instances.

Definition at line 910 of file data_set.cpp.

void OpenNN::DataSet::set ( const DataSet other_data_set)

Sets the members of this data set object with those from another data set object.

Parameters
other_data_setData set object to be copied.

Definition at line 933 of file data_set.cpp.

void OpenNN::DataSet::set ( const tinyxml2::XMLDocument &  data_set_document)

Sets the data set members from a XML document.

Parameters
data_set_documentTinyXML document containing the member data.

Definition at line 960 of file data_set.cpp.

void OpenNN::DataSet::set ( const std::string &  file_name)

Sets the data set members by loading them from a XML file.

Parameters
file_nameData set XML file_name.

Definition at line 973 of file data_set.cpp.

void OpenNN::DataSet::set_angular_variables ( const Vector< size_t > &  new_angular_variables)

Sets the indices of those variables which represent angles.

Parameters
new_angular_variablesIndices of angular variables.

Definition at line 1201 of file data_set.cpp.

void OpenNN::DataSet::set_autoassociation ( const bool &  new_autoassociation)

Sets a new autoasociation flag. If the new value is true, the data will be processed for autoassociation when loading. That is, the data file will contain the input data. The target data will be created as being equal to the input data. If the autoassociation value is set to false, the data from the file will not be processed.

Parameters
new_autoassociationAutoassociation value.

Definition at line 1190 of file data_set.cpp.

void OpenNN::DataSet::set_data ( const Matrix< double > &  new_data)

Sets a new data matrix. The number of rows must be equal to the number of instances. The number of columns must be equal to the number of variables. Indices of all training, generalization and testing instances and inputs and target variables do not change.

Parameters
new_dataData matrix.

Definition at line 1025 of file data_set.cpp.

void OpenNN::DataSet::set_data_file_name ( const std::string &  new_data_file_name)

Sets the name of the data file. It also loads the data from that file. Moreover, it sets the variables and instances objects.

Parameters
new_data_file_nameName of the file containing the data.

Definition at line 1078 of file data_set.cpp.

void OpenNN::DataSet::set_default ( void  )

Sets the default member values:

  • Display: True.

Definition at line 999 of file data_set.cpp.

void OpenNN::DataSet::set_display ( const bool &  new_display)

Sets a new display value. If it is set to true messages from this class are to be displayed on the screen; if it is set to false messages from this class are not to be displayed on the screen.

Parameters
new_displayDisplay value.

Definition at line 986 of file data_set.cpp.

Vector< Vector< std::string > > OpenNN::DataSet::set_from_data_file ( void  )
private

Performs a first data file read in which the format is checked, and the numbers of variables, instances and missing values are set.

Definition at line 3344 of file data_set.cpp.

void OpenNN::DataSet::set_instance ( const size_t &  instance_index,
const Vector< double > &  instance 
)

Sets new inputs and target values of a single instance in the data set.

Parameters
instance_indexIndex of the instance.
instanceNew inputs and target values of the instance.

Definition at line 1258 of file data_set.cpp.

void OpenNN::DataSet::set_instances_number ( const size_t &  new_instances_number)

Sets a new number of instances in the data set. All instances are also set for training. The indices of the inputs and target variables do not change.

Parameters
new_instances_numberNumber of instances.

Definition at line 1225 of file data_set.cpp.

void OpenNN::DataSet::set_lags_number ( const size_t &  new_lags_number)

Sets a new number of lags to be defined for a time series prediction application. When loading the data file, the time series data will be modified according to this number.

Parameters
new_lags_numberNumber of lags (x-1, ..., x-l) to be used.

Definition at line 1176 of file data_set.cpp.

void OpenNN::DataSet::set_missing_values_label ( const std::string &  new_missing_values_label)

Sets a new label for the missing values.

Parameters
new_missing_values_labelLabel for the missing values.

Definition at line 1146 of file data_set.cpp.

void OpenNN::DataSet::set_separator ( const Separator new_separator)

Sets a new separator.

Parameters
new_separatorSeparator value.

Definition at line 1099 of file data_set.cpp.

void OpenNN::DataSet::set_separator ( const std::string &  new_separator)

Sets a new separator from a string.

Parameters
new_separatorString with the separator value.

Definition at line 1110 of file data_set.cpp.

void OpenNN::DataSet::set_variables_number ( const size_t &  new_variables_number)

Sets a new number of input variables in the data set. The indices of the training, generalization and testing instances do not change. All variables are set as inputs.

Parameters
new_variables_numberNumber of variables.

Definition at line 1242 of file data_set.cpp.

void OpenNN::DataSet::subtract_instance ( const size_t &  instance_index)

Substracts the inputs-targets instance with a given index from the data set. All instances are also set for training. Note that resizing is here necessary and therefore computationally expensive.

Parameters
instance_indexIndex of instance to be removed.

Definition at line 1344 of file data_set.cpp.

void OpenNN::DataSet::subtract_variable ( const size_t &  variable_index)

Removes a variable with given index from the data matrix.

Parameters
variable_indexIndex of variable to be subtracted.

Definition at line 1418 of file data_set.cpp.

void OpenNN::DataSet::trim ( std::string &  str) const

Removes whitespaces from the start and the end of the string passed as argument. This includes the ASCII characters "\t", "\n", "\v", "\f", "\r", and " ".

Parameters
strString to be checked.

Definition at line 4569 of file data_set.cpp.

void OpenNN::DataSet::unscale_data_mean_standard_deviation ( const Vector< Statistics< double > > &  data_statistics)

Unscales the data matrix with given mean and standard deviation values. It updates the data matrix.

Parameters
data_statisticsVector of statistics structures for all the variables in the data set. The size of that vector must be equal to the number of variables.

Definition at line 2363 of file data_set.cpp.

void OpenNN::DataSet::unscale_data_minimum_maximum ( const Vector< Statistics< double > > &  data_statistics)

Unscales the data matrix with given minimum and maximum values. It updates the data matrix.

Parameters
data_statisticsVector of statistics structures for all the variables in the data set. The size of that vector must be equal to the number of variables.

Definition at line 2376 of file data_set.cpp.

void OpenNN::DataSet::unscale_inputs_mean_standard_deviation ( const Vector< Statistics< double > > &  data_statistics)

Unscales the input variables with given mean and standard deviation values. It updates the input variables of the data matrix.

Parameters
data_statisticsVector of statistics structures for all the variables in the data set. The size of that vector must be equal to the number of variables.

Definition at line 2389 of file data_set.cpp.

void OpenNN::DataSet::unscale_inputs_minimum_maximum ( const Vector< Statistics< double > > &  data_statistics)

Unscales the input variables with given minimum and maximum values. It updates the input variables of the data matrix.

Parameters
data_statisticsVector of statistics structures for all the data in the data set. The size of that vector must be equal to the number of variables.

Definition at line 2404 of file data_set.cpp.

void OpenNN::DataSet::unscale_targets_mean_standard_deviation ( const Vector< Statistics< double > > &  data_statistics)

Unscales the target variables with given mean and standard deviation values. It updates the target variables of the data matrix.

Parameters
data_statisticsVector of statistics structures for all the variables in the data set. The size of that vector must be equal to the number of variables.

Definition at line 2419 of file data_set.cpp.

void OpenNN::DataSet::unscale_targets_minimum_maximum ( const Vector< Statistics< double > > &  data_statistics)

Unscales the target variables with given minimum and maximum values. It updates the target variables of the data matrix.

Parameters
data_statisticsVector of statistics structures for all the variables. The size of that vector must be equal to the number of variables.

Definition at line 2434 of file data_set.cpp.

Vector< size_t > OpenNN::DataSet::unuse_constant_variables ( void  )

Removes the input of target indices of that variables with zero standard deviation. It might change the size of the vectors containing the inputs and targets indices.

Definition at line 1456 of file data_set.cpp.

Vector< size_t > OpenNN::DataSet::unuse_repeated_instances ( void  )

Removes the training, generalization and testing indices of that instances which are repeated in the data matrix. It might change the size of the vectors containing the training, generalization and testing indices.

Definition at line 1497 of file data_set.cpp.

Member Data Documentation

Matrix<double> OpenNN::DataSet::data
private

Data Matrix. The number of rows is the number of instances. The number of columns is the number of variables.

Definition at line 426 of file data_set.h.


The documentation for this class was generated from the following files: