OpenNN
2.2
Open Neural Networks Library
|
#include <data_set.h>
Public Types | |
enum | Separator { Space, Tab, Comma, Semicolon } |
enum | ScalingUnscalingMethod { MinimumMaximum, MeanStandardDeviation } |
enum | AngularUnits { Radians, Degrees } |
Public Member Functions | |
DataSet (void) | |
DataSet (const Matrix< double > &) | |
DataSet (const size_t &, const size_t &) | |
DataSet (const size_t &, const size_t &, const size_t &) | |
DataSet (const tinyxml2::XMLDocument &) | |
DataSet (const std::string &) | |
DataSet (const DataSet &) | |
virtual | ~DataSet (void) |
DataSet & | operator= (const DataSet &) |
bool | operator== (const DataSet &) const |
const std::string & | get_data_file_name (void) const |
const bool & | get_header_line (void) const |
const Separator & | get_separator (void) const |
std::string | get_separator_string (void) const |
std::string | write_separator (void) const |
const std::string & | get_missing_values_label (void) const |
const size_t & | get_lags_number (void) const |
const bool & | get_autoassociation (void) const |
const Vector< size_t > & | get_angular_variables (void) const |
const AngularUnits & | get_angular_units (void) const |
const MissingValues & | get_missing_values (void) const |
MissingValues * | get_missing_values_pointer (void) |
const Variables & | get_variables (void) const |
Variables * | get_variables_pointer (void) |
const Instances & | get_instances (void) const |
Instances * | get_instances_pointer (void) |
const bool & | get_display (void) const |
bool | empty (void) const |
const Matrix< double > & | get_data (void) const |
Matrix< double > | arrange_training_data (void) const |
Matrix< double > | arrange_generalization_data (void) const |
Matrix< double > | arrange_testing_data (void) const |
Matrix< double > | arrange_input_data (void) const |
Matrix< double > | arrange_target_data (void) const |
Matrix< double > | arrange_training_input_data (void) const |
Matrix< double > | arrange_training_target_data (void) const |
Matrix< double > | get_generalization_input_data (void) const |
Matrix< double > | get_generalization_target_data (void) const |
Matrix< double > | arrange_testing_input_data (void) const |
Matrix< double > | arrange_testing_target_data (void) const |
Vector< double > | get_instance (const size_t &) const |
Vector< double > | get_instance (const size_t &, const Vector< size_t > &) const |
Vector< double > | get_variable (const size_t &) const |
Vector< double > | get_variable (const size_t &, const Vector< size_t > &) const |
void | set (void) |
void | set (const Matrix< double > &) |
void | set (const size_t &, const size_t &) |
void | set (const size_t &, const size_t &, const size_t &) |
void | set (const DataSet &) |
void | set (const tinyxml2::XMLDocument &) |
void | set (const std::string &) |
void | set_data (const Matrix< double > &) |
void | set_instances_number (const size_t &) |
void | set_variables_number (const size_t &) |
void | set_data_file_name (const std::string &) |
void | set_header_line (const bool &) |
void | set_separator (const Separator &) |
void | set_separator (const std::string &) |
void | set_missing_values_label (const std::string &) |
void | set_lags_number (const size_t &) |
void | set_autoassociation (const bool &) |
void | set_angular_variables (const Vector< size_t > &) |
void | set_angular_units (AngularUnits &) |
void | set_display (const bool &) |
void | set_default (void) |
void | set_instance (const size_t &, const Vector< double > &) |
void | add_instance (const Vector< double > &) |
void | subtract_instance (const size_t &) |
void | append_variable (const Vector< double > &) |
void | subtract_variable (const size_t &) |
Vector< size_t > | unuse_constant_variables (void) |
Vector< size_t > | unuse_repeated_instances (void) |
void | initialize_data (const double &) |
void | randomize_data_uniform (const double &minimum=-1.0, const double &maximum=1.0) |
void | randomize_data_normal (const double &mean=0.0, const double &standard_deviation=1.0) |
Vector< Statistics< double > > | calculate_data_statistics (void) const |
Matrix< double > | calculate_data_statistics_matrix (void) const |
Vector< Statistics< double > > | calculate_training_instances_statistics (void) const |
Vector< Statistics< double > > | calculate_generalization_instances_statistics (void) const |
Vector< Statistics< double > > | calculate_testing_instances_statistics (void) const |
Vector< Statistics< double > > | calculate_inputs_statistics (void) const |
Vector< Statistics< double > > | calculate_targets_statistics (void) const |
Vector< double > | calculate_training_target_data_mean (void) const |
Vector< double > | calculate_generalization_target_data_mean (void) const |
Vector< double > | calculate_testing_target_data_mean (void) const |
Matrix< double > | calculate_linear_correlations (void) const |
Vector< Histogram< double > > | calculate_data_histograms (const size_t &=10) const |
Vector< size_t > | filter_data (const Vector< double > &, const Vector< double > &) |
void | scale_data_minimum_maximum (const Vector< Statistics< double > > &) |
void | scale_data_mean_standard_deviation (const Vector< Statistics< double > > &) |
Vector< Statistics< double > > | scale_data_minimum_maximum (void) |
Vector< Statistics< double > > | scale_data_mean_standard_deviation (void) |
void | scale_data (const std::string &, const Vector< Statistics< double > > &) |
Vector< Statistics< double > > | scale_data (const std::string &) |
void | scale_inputs_minimum_maximum (const Vector< Statistics< double > > &) |
Vector< Statistics< double > > | scale_inputs_minimum_maximum (void) |
void | scale_inputs_mean_standard_deviation (const Vector< Statistics< double > > &) |
Vector< Statistics< double > > | scale_inputs_mean_standard_deviation (void) |
Vector< Statistics< double > > | scale_inputs (const std::string &) |
void | scale_inputs (const std::string &, const Vector< Statistics< double > > &) |
void | scale_targets_minimum_maximum (const Vector< Statistics< double > > &) |
Vector< Statistics< double > > | scale_targets_minimum_maximum (void) |
void | scale_targets_mean_standard_deviation (const Vector< Statistics< double > > &) |
Vector< Statistics< double > > | scale_targets_mean_standard_deviation (void) |
Vector< Statistics< double > > | scale_targets (const std::string &) |
void | scale_targets (const std::string &, const Vector< Statistics< double > > &) |
void | unscale_data_minimum_maximum (const Vector< Statistics< double > > &) |
void | unscale_data_mean_standard_deviation (const Vector< Statistics< double > > &) |
void | unscale_inputs_minimum_maximum (const Vector< Statistics< double > > &) |
void | unscale_inputs_mean_standard_deviation (const Vector< Statistics< double > > &) |
void | unscale_targets_minimum_maximum (const Vector< Statistics< double > > &) |
void | unscale_targets_mean_standard_deviation (const Vector< Statistics< double > > &) |
Vector< size_t > | calculate_target_class_distribution (void) const |
Vector< double > | calculate_distances (void) const |
void | balance_data (const double &) |
void | balance_target_class_distribution (void) |
std::string | to_string (void) const |
void | print (void) const |
void | print_summary (void) const |
tinyxml2::XMLDocument * | to_XML (void) const |
void | from_XML (const tinyxml2::XMLDocument &) |
void | save (const std::string &) const |
void | load (const std::string &) |
void | print_data (void) const |
void | print_data_preview (void) const |
void | save_data (void) const |
bool | has_data (void) const |
void | load_data (void) |
Vector< std::string > | arrange_time_series_names (const Vector< std::string > &) const |
Vector< std::string > | arrange_autoassociation_names (const Vector< std::string > &) const |
void | convert_time_series (void) |
void | convert_autoassociation (void) |
void | convert_angular_variable_degrees (const size_t &) |
void | convert_angular_variable_radians (const size_t &) |
void | convert_angular_variables_degrees (const Vector< size_t > &) |
void | convert_angular_variables_radians (const Vector< size_t > &) |
void | convert_angular_variables (void) |
void | scrub_missing_values_unuse (void) |
void | scrub_missing_values_mean (void) |
void | scrub_missing_values (void) |
size_t | count_tokens (std::string &) const |
Vector< std::string > | get_tokens (const std::string &) const |
bool | is_numeric (const std::string &) const |
void | trim (std::string &) const |
std::string | get_trimmed (const std::string &) const |
std::string | prepend (const std::string &, const std::string &) const |
bool | is_numeric (const Vector< std::string > &) const |
bool | is_not_numeric (const Vector< std::string > &) const |
bool | is_mixed (const Vector< std::string > &) const |
Static Public Member Functions | |
static ScalingUnscalingMethod | get_scaling_unscaling_method (const std::string &) |
Private Member Functions | |
size_t | get_column_index (const Vector< Vector< std::string > > &, const size_t) const |
void | check_separator (const std::string &) const |
size_t | count_data_file_columns_number (void) const |
void | check_header_line (void) |
Vector< std::string > | read_header_line (void) const |
void | read_instance (const std::string &, const Vector< Vector< std::string > > &, const size_t &) |
Vector< Vector< std::string > > | set_from_data_file (void) |
void | read_from_data_file (const Vector< Vector< std::string > > &) |
Private Attributes | |
std::string | data_file_name |
bool | header_line |
Separator | separator |
std::string | missing_values_label |
size_t | lags_number |
bool | autoassociation |
Vector< size_t > | angular_variables |
AngularUnits | angular_units |
Matrix< double > | data |
Variables | variables |
Instances | instances |
MissingValues | missing_values |
bool | display |
This class represents the concept of data set for data modelling problems, such as function regression, pattern recognition and time series prediction. It basically consists of a data matrix plus a variables and an instances objects.
Definition at line 50 of file data_set.h.
|
explicit |
Default constructor. It creates a data set object with zero instances and zero inputs and target variables. It also initializes the rest of class members to their default values.
Definition at line 27 of file data_set.cpp.
|
explicit |
Data constructor. It creates a data set object from a data matrix. It also initializes the rest of class members to their default values.
data | Data matrix. |
Definition at line 41 of file data_set.cpp.
|
explicit |
Instances and variables number constructor. It creates a data set object with given instances and variables numbers. All the variables are set as inputs. It also initializes the rest of class members to their default values.
new_variables_number | Number of variables. |
new_instances_number | Number of instances in the data set. |
Definition at line 58 of file data_set.cpp.
|
explicit |
Instances number, input variables number and target variables number constructor. It creates a data set object with given instances and inputs and target variables numbers. It also initializes the rest of class members to their default values.
new_inputs_number | Number of input variables. |
new_targets_number | Number of target variables. |
new_instances_number | Number of instances in the data set. |
Definition at line 75 of file data_set.cpp.
|
explicit |
Sets the data set members from a XML document.
data_set_document | TinyXML document containing the member data. |
Definition at line 89 of file data_set.cpp.
|
explicit |
File constructor. It creates a data set object by loading the object members from a XML-type file. Please mind about the file format. This is specified in the User's Guide.
file_name | Data set file name. |
Definition at line 103 of file data_set.cpp.
OpenNN::DataSet::DataSet | ( | const DataSet & | other_data_set | ) |
Copy constructor. It creates a copy of an existing inputs targets data set object.
other_data_set | Data set object to be copied. |
Definition at line 120 of file data_set.cpp.
void OpenNN::DataSet::add_instance | ( | const Vector< double > & | instance | ) |
Adds a new instance to the data matrix from a vector of real numbers. The size of that vector must be equal to the number of variables. Note that resizing is here necessary and therefore computationally expensive. All instances are also set for training.
instance | Input and target values of the instance to be added. |
Definition at line 1307 of file data_set.cpp.
void OpenNN::DataSet::append_variable | ( | const Vector< double > & | variable | ) |
Appends a variable with given values to the data matrix.
variable | Vector of values. The size must be equal to the number of instances. |
Definition at line 1377 of file data_set.cpp.
Vector< std::string > OpenNN::DataSet::arrange_autoassociation_names | ( | const Vector< std::string > & | ) | const |
Returns a vector with the names arranged for autoassociation.
Definition at line 3594 of file data_set.cpp.
Matrix< double > OpenNN::DataSet::arrange_generalization_data | ( | void | ) | const |
Returns a matrix with the generalization instances in the data set. The number of rows is the number of generalization instances. The number of columns is the number of variables.
Definition at line 520 of file data_set.cpp.
Matrix< double > OpenNN::DataSet::arrange_input_data | ( | void | ) | const |
Returns a matrix with the input variables in the data set. The number of rows is the number of instances. The number of columns is the number of input variables.
Definition at line 555 of file data_set.cpp.
Matrix< double > OpenNN::DataSet::arrange_target_data | ( | void | ) | const |
Returns a matrix with the target variables in the data set. The number of rows is the number of instances. The number of columns is the number of target variables.
Definition at line 572 of file data_set.cpp.
Matrix< double > OpenNN::DataSet::arrange_testing_data | ( | void | ) | const |
Returns a matrix with the testing instances in the data set. The number of rows is the number of testing instances. The number of columns is the number of variables.
Definition at line 538 of file data_set.cpp.
Matrix< double > OpenNN::DataSet::arrange_testing_input_data | ( | void | ) | const |
Returns a matrix with testing instances and input variables. The number of rows is the number of testing instances. The number of columns is the number of input variables.
Definition at line 653 of file data_set.cpp.
Matrix< double > OpenNN::DataSet::arrange_testing_target_data | ( | void | ) | const |
Returns a matrix with testing instances and target variables. The number of rows is the number of testing instances. The number of columns is the number of target variables.
Definition at line 669 of file data_set.cpp.
Vector< std::string > OpenNN::DataSet::arrange_time_series_names | ( | const Vector< std::string > & | ) | const |
Returns a vector with the names arranged for time series prediction, according to the number of lags.
Definition at line 3563 of file data_set.cpp.
Matrix< double > OpenNN::DataSet::arrange_training_data | ( | void | ) | const |
Returns a matrix with the training instances in the data set. The number of rows is the number of training instances. The number of columns is the number of variables.
Definition at line 502 of file data_set.cpp.
Matrix< double > OpenNN::DataSet::arrange_training_input_data | ( | void | ) | const |
Returns a matrix with training instances and input variables. The number of rows is the number of training instances. The number of columns is the number of input variables.
Definition at line 589 of file data_set.cpp.
Matrix< double > OpenNN::DataSet::arrange_training_target_data | ( | void | ) | const |
Returns a matrix with training instances and target variables. The number of rows is the number of training instances. The number of columns is the number of target variables.
Definition at line 605 of file data_set.cpp.
void OpenNN::DataSet::balance_data | ( | const double & | ) |
Definition at line 3896 of file data_set.cpp.
void OpenNN::DataSet::balance_target_class_distribution | ( | void | ) |
Definition at line 3998 of file data_set.cpp.
Vector< Histogram< double > > OpenNN::DataSet::calculate_data_histograms | ( | const size_t & | bins_number = 10 | ) | const |
Returns a histogram for each variable with a given number of bins. The default number of bins is 10. The format is a vector of subvectors of subsubvectors. The size of the vector is the number of variables. The size of the subvectors is 2 (centers and frequencies). The size of the subsubvectors is the number of bins.
bins_number | Number of bins. |
Definition at line 1558 of file data_set.cpp.
Vector< Statistics< double > > OpenNN::DataSet::calculate_data_statistics | ( | void | ) | const |
Returns a vector of vectors containing some basic statistics of all the variables in the data set. The size of this vector is four. The subvectors are:
Definition at line 1577 of file data_set.cpp.
Matrix< double > OpenNN::DataSet::calculate_data_statistics_matrix | ( | void | ) | const |
Returns all the variables statistics from a single matrix. The number of rows is the number of variables. The number of columns is four (minimum, maximum, mean and standard deviation).
Definition at line 1591 of file data_set.cpp.
Vector< double > OpenNN::DataSet::calculate_distances | ( | void | ) | const |
Returns a normalized distance between each instance and the mean instance. The size of this vector is the number of instances.
Definition at line 3864 of file data_set.cpp.
Vector< Statistics< double > > OpenNN::DataSet::calculate_generalization_instances_statistics | ( | void | ) | const |
Returns a vector of vectors containing some basic statistics of all variables on the generalization instances. The size of this vector is four. The subvectors are:
Definition at line 1642 of file data_set.cpp.
Vector< Statistics< double > > OpenNN::DataSet::calculate_inputs_statistics | ( | void | ) | const |
Returns a vector of vectors with some basic statistics of the input variables on all instances. The size of this vector is four. The subvectors are:
Definition at line 1684 of file data_set.cpp.
Matrix< double > OpenNN::DataSet::calculate_linear_correlations | ( | void | ) | const |
Calculates the linear correlations between all outputs and all inputs. It returns a matrix with number of rows the targets number and number of columns the inputs number. Each element contains the linear correlation between a single target and a single output.
Definition at line 1769 of file data_set.cpp.
Vector< size_t > OpenNN::DataSet::calculate_target_class_distribution | ( | void | ) | const |
Returns a vector containing the number of instances of each class in the data set. If the number of target variables is one then the number of classes is two. If the number of target variables is greater than one then the number of classes is equal to the number of target variables.
Definition at line 3789 of file data_set.cpp.
Vector< Statistics< double > > OpenNN::DataSet::calculate_targets_statistics | ( | void | ) | const |
Returns a vector of vectors with some basic statistics of the target variables on all instances. The size of this vector is four. The subvectors are:
Definition at line 1705 of file data_set.cpp.
Vector< Statistics< double > > OpenNN::DataSet::calculate_testing_instances_statistics | ( | void | ) | const |
Returns a vector of vectors containing some basic statistics of all variables on the testing instances. The size of this vector is four. The subvectors are:
Definition at line 1663 of file data_set.cpp.
Vector< Statistics< double > > OpenNN::DataSet::calculate_training_instances_statistics | ( | void | ) | const |
Returns a vector of vectors containing some basic statistics of all variables on the training instances. The size of this vector is four. The subvectors are:
Definition at line 1621 of file data_set.cpp.
|
private |
Verifies that the data file has a header line. All elements in a header line must be strings. This method can change the value of the header line member. It throws an exception if some inconsistencies are found.
Definition at line 3088 of file data_set.cpp.
|
private |
Verifies that a given line in the data file contains the separator characer. If the line does not contain the separator, this method throws an exception.
line | Data file line. |
Definition at line 3018 of file data_set.cpp.
void OpenNN::DataSet::convert_angular_variable_degrees | ( | const size_t & | variable_index | ) |
Replaces a given angular variable expressed in degrees by the sinus and cosinus of that variable. This solves the discontinuity associated with angular variables. Note that this method modifies the number of variables.
variable_index | Index of angular variable. |
Definition at line 4142 of file data_set.cpp.
void OpenNN::DataSet::convert_angular_variable_radians | ( | const size_t & | variable_index | ) |
Replaces a given angular variable expressed in radians by the sinus and cosinus of that variable. This solves the discontinuity associated with angular variables. Note that this method modifies the number of variables.
variable_index | Index of angular variable. |
Definition at line 4188 of file data_set.cpp.
void OpenNN::DataSet::convert_angular_variables | ( | void | ) |
Replaces a given set of angular variables by the sinus and cosinus of that variable, according to the angular units used. This solves the discontinuity associated with angular variables. Note that this method modifies the number of variables.
Definition at line 4329 of file data_set.cpp.
void OpenNN::DataSet::convert_angular_variables_degrees | ( | const Vector< size_t > & | indices | ) |
Replaces a given set of angular variables expressed in degrees by the sinus and cosinus of that variable. This solves the discontinuity associated with angular variables. Note that this method modifies the number of variables.
indices | Indices of angular variables. |
Definition at line 4234 of file data_set.cpp.
void OpenNN::DataSet::convert_angular_variables_radians | ( | const Vector< size_t > & | indices | ) |
Replaces a given set of angular variables expressed in radians by the sinus and cosinus of that variable. This solves the discontinuity associated with angular variables. Note that this method modifies the number of variables.
indices | Indices of angular variables. |
Definition at line 4282 of file data_set.cpp.
void OpenNN::DataSet::convert_autoassociation | ( | void | ) |
void OpenNN::DataSet::convert_time_series | ( | void | ) |
Arranges an input-target matrix from a time series matrix, according to the number of lags.
Definition at line 3607 of file data_set.cpp.
|
private |
Returns the number of tokens in the first line of the data file. That will be interpreted as the number of columns in the data file.
Definition at line 3045 of file data_set.cpp.
size_t OpenNN::DataSet::count_tokens | ( | std::string & | str | ) | const |
Returns the number of strings delimited by separator. If separator does not match anywhere in the string, this method returns 0.
str | String to be tokenized. |
Definition at line 4446 of file data_set.cpp.
Vector< size_t > OpenNN::DataSet::filter_data | ( | const Vector< double > & | minimums, |
const Vector< double > & | maximums | ||
) |
Unuses those instances with values outside a defined range.
minimums | Vector of minimum values in the range. The size must be equal to the number of variables. |
maximums | Vector of maximum values in the range. The size must be equal to the number of variables. |
Definition at line 4072 of file data_set.cpp.
void OpenNN::DataSet::from_XML | ( | const tinyxml2::XMLDocument & | data_set_document | ) |
Deserializes a TinyXML document into this data set object.
data_set_document | XML document containing the member data. |
Definition at line 2601 of file data_set.cpp.
const Vector< size_t > & OpenNN::DataSet::get_angular_variables | ( | void | ) | const |
Returns the indices of the angular variables in the data set. When loading a data set with angular variables, a transformation of the data will be performed in order to avoid discontinuities (from 359 degrees to 1 degree).
Definition at line 452 of file data_set.cpp.
const bool & OpenNN::DataSet::get_autoassociation | ( | void | ) | const |
Returns true if the data set will be used for an autoassociation application, and false otherwise. In an autoassociation problem the target data is equal to the input data.
Definition at line 440 of file data_set.cpp.
|
private |
Returns the index of a variable when reading the data file.
nominal_labels | Values of all nominal variables in the data file. |
column_index | Index of column. |
Definition at line 2992 of file data_set.cpp.
const Matrix< double > & OpenNN::DataSet::get_data | ( | void | ) | const |
Returns a reference to the data matrix in the data set. The number of rows is equal to the number of instances. The number of columns is equal to the number of variables.
Definition at line 265 of file data_set.cpp.
const bool & OpenNN::DataSet::get_display | ( | void | ) | const |
Returns true if messages from this class can be displayed on the screen, or false if messages from this class can't be displayed on the screen.
Definition at line 243 of file data_set.cpp.
Matrix< double > OpenNN::DataSet::get_generalization_input_data | ( | void | ) | const |
Returns a matrix with generalization instances and input variables. The number of rows is the number of generalization instances. The number of columns is the number of input variables.
Definition at line 621 of file data_set.cpp.
Matrix< double > OpenNN::DataSet::get_generalization_target_data | ( | void | ) | const |
Returns a matrix with generalization instances and target variables. The number of rows is the number of generalization instances. The number of columns is the number of target variables.
Definition at line 637 of file data_set.cpp.
Vector< double > OpenNN::DataSet::get_instance | ( | const size_t & | i | ) | const |
Returns the inputs and target values of a single instance in the data set.
i | Index of the instance. |
Definition at line 684 of file data_set.cpp.
Vector< double > OpenNN::DataSet::get_instance | ( | const size_t & | instance_index, |
const Vector< size_t > & | variables_indices | ||
) | const |
Returns the inputs and target values of a single instance in the data set.
instance_index | Index of the instance. |
variables_indices | Indices of the variables. |
Definition at line 717 of file data_set.cpp.
|
static |
Returns a value of the scaling-unscaling method enumeration from a string containing the name of that method.
scaling_unscaling_method | String with the name of the scaling and unscaling method. |
Definition at line 473 of file data_set.cpp.
Vector< std::string > OpenNN::DataSet::get_tokens | ( | const std::string & | str | ) | const |
Splits the string into substrings (tokens) wherever separator occurs, and returns a vector with those strings. If separator does not match anywhere in the string, this method returns a single-element list containing this string.
str | String to be tokenized. |
Definition at line 4497 of file data_set.cpp.
std::string OpenNN::DataSet::get_trimmed | ( | const std::string & | str | ) | const |
Returns a string that has whitespace removed from the start and the end. This includes the ASCII characters "\t", "\n", "\v", "\f", "\r", and " ".
str | String to be checked. |
Definition at line 4585 of file data_set.cpp.
Vector< double > OpenNN::DataSet::get_variable | ( | const size_t & | i | ) | const |
Returns all the instances of a single variable in the data set.
i | Index of the variable. |
Definition at line 749 of file data_set.cpp.
Vector< double > OpenNN::DataSet::get_variable | ( | const size_t & | variable_index, |
const Vector< size_t > & | instances_indices | ||
) | const |
Returns a given set of instances of a single variable in the data set.
variable_index | Index of the variable. |
instances_indices | Indices of the instances. |
Definition at line 782 of file data_set.cpp.
bool OpenNN::DataSet::has_data | ( | void | ) | const |
Returns true if the data matrix is not empty (it has not been loaded), and false otherwise.
Definition at line 4051 of file data_set.cpp.
void OpenNN::DataSet::initialize_data | ( | const double & | new_value | ) |
Initializes the data matrix with a given value.
new_value | Initialization value. |
Definition at line 2447 of file data_set.cpp.
bool OpenNN::DataSet::is_mixed | ( | const Vector< std::string > & | v | ) | const |
Returns true if some the elements in a string list are numeric and some others are not numeric.
v | String list to be checked. |
Definition at line 4660 of file data_set.cpp.
bool OpenNN::DataSet::is_not_numeric | ( | const Vector< std::string > & | v | ) | const |
Returns true if none element in a string list is numeric, and false otherwise.
v | String list to be checked. |
Definition at line 4641 of file data_set.cpp.
bool OpenNN::DataSet::is_numeric | ( | const std::string & | str | ) | const |
Returns true if the string passed as argument represents a number, and false otherwise.
str | String to be checked. |
Definition at line 4542 of file data_set.cpp.
bool OpenNN::DataSet::is_numeric | ( | const Vector< std::string > & | v | ) | const |
Returns true if all the elements in a string list are numeric, and false otherwise.
v | String list to be checked. |
Definition at line 4622 of file data_set.cpp.
void OpenNN::DataSet::load | ( | const std::string & | file_name | ) |
Loads the members of a data set object from a XML-type file:
Please mind about the file format. This is specified in the User's Guide.
file_name | Name of data set XML-type file. |
Definition at line 2880 of file data_set.cpp.
Assignment operator. It assigns to the current object the members of an existing data set object.
other_data_set | Data set object to be assigned. |
Definition at line 143 of file data_set.cpp.
bool OpenNN::DataSet::operator== | ( | const DataSet & | other_data_set | ) | const |
Equal to operator. It compares this object with another object of the same class. It returns true if the members of the two objects have the same values, and false otherwise. @ param other_data_set Data set object to be compared with.
Definition at line 179 of file data_set.cpp.
std::string OpenNN::DataSet::prepend | ( | const std::string & | pre, |
const std::string & | str | ||
) | const |
Prepends the string pre to the beginning of the string str and returns the whole string.
pre | String to be prepended. |
str | original string. |
Definition at line 4607 of file data_set.cpp.
void OpenNN::DataSet::print_data_preview | ( | void | ) | const |
Prints to the sceen a preview of the data matrix, i.e., the first, second and last instances
Definition at line 2917 of file data_set.cpp.
void OpenNN::DataSet::randomize_data_normal | ( | const double & | mean = 0.0 , |
const double & | standard_deviation = 1.0 |
||
) |
Initializes the data matrix with random values chosen from a normal distribution with given mean and standard deviation.
Definition at line 2469 of file data_set.cpp.
void OpenNN::DataSet::randomize_data_uniform | ( | const double & | minimum = -1.0 , |
const double & | maximum = 1.0 |
||
) |
Initializes the data matrix with random values chosen from a uniform distribution with given minimum and maximum.
Definition at line 2458 of file data_set.cpp.
|
private |
Sets the values of a single instance in the data matrix from a line in the data file.
line | Data file line. |
nominal_labels | Values of all nominal variables in the data file. |
instance_index | Index of instance. |
Definition at line 3210 of file data_set.cpp.
void OpenNN::DataSet::save | ( | const std::string & | file_name | ) | const |
Saves the members of a data set object to a XML-type file in an XML-type format.
file_name | Name of data set XML-type file. |
Definition at line 2845 of file data_set.cpp.
void OpenNN::DataSet::scale_data | ( | const std::string & | scaling_unscaling_method_string, |
const Vector< Statistics< double > > & | data_statistics | ||
) |
Scales the data matrix. The method to be used is that in the scaling and unscaling method variable.
scaling_unscaling_method_string | String with the name of the scaling-unscaling method (MinimumMaximum or MeanStandardDeviation). |
data_statistics | Vector of statistics structures for all the variables in the data set. The size of that vector must be equal to the number of variables. |
Definition at line 1944 of file data_set.cpp.
Vector< Statistics< double > > OpenNN::DataSet::scale_data | ( | const std::string & | scaling_unscaling_method | ) |
Calculates the data statistics, scales the data with that values and returns the statistics. The method to be used is that in the scaling and unscaling method variable.
Definition at line 1980 of file data_set.cpp.
void OpenNN::DataSet::scale_data_mean_standard_deviation | ( | const Vector< Statistics< double > > & | data_statistics | ) |
Scales the data matrix with given mean and standard deviation values. It updates the data matrix.
data_statistics | Vector of statistics structures for all the variables in the data set. The size of that vector must be equal to the number of variables. |
Definition at line 1817 of file data_set.cpp.
Vector< Statistics< double > > OpenNN::DataSet::scale_data_mean_standard_deviation | ( | void | ) |
Scales the data using the mean and standard deviation method, and the mean and standard deviation values calculated from the data matrix. It also returns the statistics from all columns.
Definition at line 1879 of file data_set.cpp.
void OpenNN::DataSet::scale_data_minimum_maximum | ( | const Vector< Statistics< double > > & | data_statistics | ) |
Scales the data matrix with given minimum and maximum values. It updates the data matrix.
data_statistics | Vector of statistics structures for all the variables in the data set. The size of that vector must be equal to the number of variables. |
Definition at line 1896 of file data_set.cpp.
Vector< Statistics< double > > OpenNN::DataSet::scale_data_minimum_maximum | ( | void | ) |
Scales the data using the minimum and maximum method, and the minimum and maximum values calculated from the data matrix. It also returns the statistics from all columns.
Definition at line 1863 of file data_set.cpp.
Vector< Statistics< double > > OpenNN::DataSet::scale_inputs | ( | const std::string & | scaling_unscaling_method | ) |
Calculates the input and target variables statistics. Then it scales the input variables with that values. The method to be used is that in the scaling and unscaling method variable. Finally, it returns the statistics.
Definition at line 2118 of file data_set.cpp.
void OpenNN::DataSet::scale_inputs | ( | const std::string & | scaling_unscaling_method, |
const Vector< Statistics< double > > & | inputs_statistics | ||
) |
Calculates the input and target variables statistics. Then it scales the input variables with that values. The method to be used is that in the scaling and unscaling method variable.
Definition at line 2155 of file data_set.cpp.
void OpenNN::DataSet::scale_inputs_mean_standard_deviation | ( | const Vector< Statistics< double > > & | inputs_statistics | ) |
Scales the input variables with given mean and standard deviation values. It updates the input variables of the data matrix.
inputs_statistics | Vector of statistics structures for the input variables. The size of that vector must be equal to the number of inputs. |
Definition at line 2022 of file data_set.cpp.
Vector< Statistics< double > > OpenNN::DataSet::scale_inputs_mean_standard_deviation | ( | void | ) |
Scales the input variables with the calculated mean and standard deviation values from the data matrix. It updates the input variables of the data matrix. It also returns a vector of vectors with the variables statistics.
Definition at line 2036 of file data_set.cpp.
void OpenNN::DataSet::scale_inputs_minimum_maximum | ( | const Vector< Statistics< double > > & | inputs_statistics | ) |
Scales the input variables with given minimum and maximum values. It updates the input variables of the data matrix.
inputs_statistics | Vector of statistics structures for all the inputs in the data set. The size of that vector must be equal to the number of input variables. |
Definition at line 2070 of file data_set.cpp.
Vector< Statistics< double > > OpenNN::DataSet::scale_inputs_minimum_maximum | ( | void | ) |
Scales the input variables with the calculated minimum and maximum values from the data matrix. It updates the input variables of the data matrix. It also returns a vector of vectors with the minimum and maximum values of the input variables.
Definition at line 2084 of file data_set.cpp.
Vector< Statistics< double > > OpenNN::DataSet::scale_targets | ( | const std::string & | scaling_unscaling_method | ) |
Calculates the input and target variables statistics. Then it scales the target variables with that values. The method to be used is that in the scaling and unscaling method variable. Finally, it returns the statistics.
Definition at line 2289 of file data_set.cpp.
void OpenNN::DataSet::scale_targets | ( | const std::string & | scaling_unscaling_method, |
const Vector< Statistics< double > > & | targets_statistics | ||
) |
It scales the input variables with that values. The method to be used is that in the scaling and unscaling method variable.
Definition at line 2325 of file data_set.cpp.
void OpenNN::DataSet::scale_targets_mean_standard_deviation | ( | const Vector< Statistics< double > > & | targets_statistics | ) |
Scales the target variables with given mean and standard deviation values. It updates the target variables of the data matrix.
targets_statistics | Vector of statistics structures for all the targets in the data set. The size of that vector must be equal to the number of target variables. |
Definition at line 2193 of file data_set.cpp.
Vector< Statistics< double > > OpenNN::DataSet::scale_targets_mean_standard_deviation | ( | void | ) |
Scales the target variables with the calculated mean and standard deviation values from the data matrix. It updates the target variables of the data matrix. It also returns a vector of statistics structures with the basic statistics of all the variables.
Definition at line 2207 of file data_set.cpp.
void OpenNN::DataSet::scale_targets_minimum_maximum | ( | const Vector< Statistics< double > > & | targets_statistics | ) |
Scales the target variables with given minimum and maximum values. It updates the target variables of the data matrix.
targets_statistics | Vector of statistics structures for all the targets in the data set. The size of that vector must be equal to the number of target variables. |
Definition at line 2241 of file data_set.cpp.
Vector< Statistics< double > > OpenNN::DataSet::scale_targets_minimum_maximum | ( | void | ) |
Scales the target variables with the calculated minimum and maximum values from the data matrix. It updates the target variables of the data matrix. It also returns a vector of vectors with the statistics of the input target variables.
Definition at line 2272 of file data_set.cpp.
void OpenNN::DataSet::scrub_missing_values | ( | void | ) |
General method for dealing with missing values. It switches among the different scrubbing methods available, according to the corresponding value in the missing values object.
Definition at line 4407 of file data_set.cpp.
void OpenNN::DataSet::set | ( | const Matrix< double > & | new_data | ) |
Sets all variables from a data matrix.
new_data | Data matrix. |
Definition at line 833 of file data_set.cpp.
void OpenNN::DataSet::set | ( | const size_t & | new_variables_number, |
const size_t & | new_instances_number | ||
) |
Sets new numbers of instances and variables in the inputs targets data set. All the instances are set for training. All the variables are set as inputs.
new_variables_number | Number of variables. |
new_instances_number | Number of instances. |
Definition at line 859 of file data_set.cpp.
void OpenNN::DataSet::set | ( | const size_t & | new_inputs_number, |
const size_t & | new_targets_number, | ||
const size_t & | new_instances_number | ||
) |
Sets new numbers of instances and inputs and target variables in the data set. The variables in the data set are the number of inputs plus the number of targets.
new_inputs_number | Number of input variables. |
new_targets_number | Number of target variables. |
new_instances_number | Number of instances. |
Definition at line 910 of file data_set.cpp.
void OpenNN::DataSet::set | ( | const DataSet & | other_data_set | ) |
Sets the members of this data set object with those from another data set object.
other_data_set | Data set object to be copied. |
Definition at line 933 of file data_set.cpp.
void OpenNN::DataSet::set | ( | const tinyxml2::XMLDocument & | data_set_document | ) |
Sets the data set members from a XML document.
data_set_document | TinyXML document containing the member data. |
Definition at line 960 of file data_set.cpp.
void OpenNN::DataSet::set | ( | const std::string & | file_name | ) |
Sets the data set members by loading them from a XML file.
file_name | Data set XML file_name. |
Definition at line 973 of file data_set.cpp.
void OpenNN::DataSet::set_angular_variables | ( | const Vector< size_t > & | new_angular_variables | ) |
Sets the indices of those variables which represent angles.
new_angular_variables | Indices of angular variables. |
Definition at line 1201 of file data_set.cpp.
void OpenNN::DataSet::set_autoassociation | ( | const bool & | new_autoassociation | ) |
Sets a new autoasociation flag. If the new value is true, the data will be processed for autoassociation when loading. That is, the data file will contain the input data. The target data will be created as being equal to the input data. If the autoassociation value is set to false, the data from the file will not be processed.
new_autoassociation | Autoassociation value. |
Definition at line 1190 of file data_set.cpp.
void OpenNN::DataSet::set_data | ( | const Matrix< double > & | new_data | ) |
Sets a new data matrix. The number of rows must be equal to the number of instances. The number of columns must be equal to the number of variables. Indices of all training, generalization and testing instances and inputs and target variables do not change.
new_data | Data matrix. |
Definition at line 1025 of file data_set.cpp.
void OpenNN::DataSet::set_data_file_name | ( | const std::string & | new_data_file_name | ) |
Sets the name of the data file. It also loads the data from that file. Moreover, it sets the variables and instances objects.
new_data_file_name | Name of the file containing the data. |
Definition at line 1078 of file data_set.cpp.
void OpenNN::DataSet::set_default | ( | void | ) |
void OpenNN::DataSet::set_display | ( | const bool & | new_display | ) |
Sets a new display value. If it is set to true messages from this class are to be displayed on the screen; if it is set to false messages from this class are not to be displayed on the screen.
new_display | Display value. |
Definition at line 986 of file data_set.cpp.
Performs a first data file read in which the format is checked, and the numbers of variables, instances and missing values are set.
Definition at line 3344 of file data_set.cpp.
void OpenNN::DataSet::set_instance | ( | const size_t & | instance_index, |
const Vector< double > & | instance | ||
) |
Sets new inputs and target values of a single instance in the data set.
instance_index | Index of the instance. |
instance | New inputs and target values of the instance. |
Definition at line 1258 of file data_set.cpp.
void OpenNN::DataSet::set_instances_number | ( | const size_t & | new_instances_number | ) |
Sets a new number of instances in the data set. All instances are also set for training. The indices of the inputs and target variables do not change.
new_instances_number | Number of instances. |
Definition at line 1225 of file data_set.cpp.
void OpenNN::DataSet::set_lags_number | ( | const size_t & | new_lags_number | ) |
Sets a new number of lags to be defined for a time series prediction application. When loading the data file, the time series data will be modified according to this number.
new_lags_number | Number of lags (x-1, ..., x-l) to be used. |
Definition at line 1176 of file data_set.cpp.
void OpenNN::DataSet::set_missing_values_label | ( | const std::string & | new_missing_values_label | ) |
Sets a new label for the missing values.
new_missing_values_label | Label for the missing values. |
Definition at line 1146 of file data_set.cpp.
void OpenNN::DataSet::set_separator | ( | const Separator & | new_separator | ) |
Sets a new separator.
new_separator | Separator value. |
Definition at line 1099 of file data_set.cpp.
void OpenNN::DataSet::set_separator | ( | const std::string & | new_separator | ) |
Sets a new separator from a string.
new_separator | String with the separator value. |
Definition at line 1110 of file data_set.cpp.
void OpenNN::DataSet::set_variables_number | ( | const size_t & | new_variables_number | ) |
Sets a new number of input variables in the data set. The indices of the training, generalization and testing instances do not change. All variables are set as inputs.
new_variables_number | Number of variables. |
Definition at line 1242 of file data_set.cpp.
void OpenNN::DataSet::subtract_instance | ( | const size_t & | instance_index | ) |
Substracts the inputs-targets instance with a given index from the data set. All instances are also set for training. Note that resizing is here necessary and therefore computationally expensive.
instance_index | Index of instance to be removed. |
Definition at line 1344 of file data_set.cpp.
void OpenNN::DataSet::subtract_variable | ( | const size_t & | variable_index | ) |
Removes a variable with given index from the data matrix.
variable_index | Index of variable to be subtracted. |
Definition at line 1418 of file data_set.cpp.
void OpenNN::DataSet::trim | ( | std::string & | str | ) | const |
Removes whitespaces from the start and the end of the string passed as argument. This includes the ASCII characters "\t", "\n", "\v", "\f", "\r", and " ".
str | String to be checked. |
Definition at line 4569 of file data_set.cpp.
void OpenNN::DataSet::unscale_data_mean_standard_deviation | ( | const Vector< Statistics< double > > & | data_statistics | ) |
Unscales the data matrix with given mean and standard deviation values. It updates the data matrix.
data_statistics | Vector of statistics structures for all the variables in the data set. The size of that vector must be equal to the number of variables. |
Definition at line 2363 of file data_set.cpp.
void OpenNN::DataSet::unscale_data_minimum_maximum | ( | const Vector< Statistics< double > > & | data_statistics | ) |
Unscales the data matrix with given minimum and maximum values. It updates the data matrix.
data_statistics | Vector of statistics structures for all the variables in the data set. The size of that vector must be equal to the number of variables. |
Definition at line 2376 of file data_set.cpp.
void OpenNN::DataSet::unscale_inputs_mean_standard_deviation | ( | const Vector< Statistics< double > > & | data_statistics | ) |
Unscales the input variables with given mean and standard deviation values. It updates the input variables of the data matrix.
data_statistics | Vector of statistics structures for all the variables in the data set. The size of that vector must be equal to the number of variables. |
Definition at line 2389 of file data_set.cpp.
void OpenNN::DataSet::unscale_inputs_minimum_maximum | ( | const Vector< Statistics< double > > & | data_statistics | ) |
Unscales the input variables with given minimum and maximum values. It updates the input variables of the data matrix.
data_statistics | Vector of statistics structures for all the data in the data set. The size of that vector must be equal to the number of variables. |
Definition at line 2404 of file data_set.cpp.
void OpenNN::DataSet::unscale_targets_mean_standard_deviation | ( | const Vector< Statistics< double > > & | data_statistics | ) |
Unscales the target variables with given mean and standard deviation values. It updates the target variables of the data matrix.
data_statistics | Vector of statistics structures for all the variables in the data set. The size of that vector must be equal to the number of variables. |
Definition at line 2419 of file data_set.cpp.
void OpenNN::DataSet::unscale_targets_minimum_maximum | ( | const Vector< Statistics< double > > & | data_statistics | ) |
Unscales the target variables with given minimum and maximum values. It updates the target variables of the data matrix.
data_statistics | Vector of statistics structures for all the variables. The size of that vector must be equal to the number of variables. |
Definition at line 2434 of file data_set.cpp.
Vector< size_t > OpenNN::DataSet::unuse_constant_variables | ( | void | ) |
Removes the input of target indices of that variables with zero standard deviation. It might change the size of the vectors containing the inputs and targets indices.
Definition at line 1456 of file data_set.cpp.
Vector< size_t > OpenNN::DataSet::unuse_repeated_instances | ( | void | ) |
Removes the training, generalization and testing indices of that instances which are repeated in the data matrix. It might change the size of the vectors containing the training, generalization and testing indices.
Definition at line 1497 of file data_set.cpp.
|
private |
Data Matrix. The number of rows is the number of instances. The number of columns is the number of variables.
Definition at line 426 of file data_set.h.