16 #include "missing_values.h"
43 set(new_instances_number, new_variables_number);
57 set(missing_values_document);
97 if(
this != &other_missing_values)
167 size_t variable_index;
169 for(
size_t i = 0; i < missing_values_number; i++)
171 variable_index =
items[i].variable_index;
173 missing_values_numbers[variable_index]++;
176 return(missing_values_numbers);
203 if(index >= missing_values_number)
205 std::ostringstream buffer;
207 buffer <<
"OpenNN Exception: MissingValues class.\n"
208 <<
"const Item& get_item(const size_t&) const method.\n"
209 <<
"Index (" << index <<
") must be less than number of missing values (" << missing_values_number <<
").\n";
211 throw std::logic_error(buffer.str());
216 return(
items[index]);
246 return(
"NoScrubbing");
250 std::ostringstream buffer;
252 buffer <<
"OpenNN Exception: MissingValues class.\n"
253 <<
"std::string write_scrubbing_method(void) const method.\n"
254 <<
"Unknown scrubbing method.\n";
256 throw std::logic_error(buffer.str());
277 return(
"no scrubbing");
281 std::ostringstream buffer;
283 buffer <<
"OpenNN Exception: MissingValues class.\n"
284 <<
"std::string write_scrubbing_method_text(void) const method.\n"
285 <<
"Unknown scrubbing method.\n";
287 throw std::logic_error(buffer.str());
423 if(index >= missing_values_number)
425 std::ostringstream buffer;
427 buffer <<
"OpenNN Exception: MissingValues class.\n"
428 <<
"void set_item(const size_t&, const size_t&, const size_t&) method.\n"
429 <<
"Index (" << index <<
") must be less than number of missing values (" << missing_values_number <<
").\n";
431 throw std::logic_error(buffer.str());
436 std::ostringstream buffer;
438 buffer <<
"OpenNN Exception: MissingValues class.\n"
439 <<
"void set_item(const size_t&, const size_t&, const size_t&) method.\n"
440 <<
"Index of instance (" << instance_index <<
") must be less than number of instances (" <<
instances_number <<
").\n";
442 throw std::logic_error(buffer.str());
447 std::ostringstream buffer;
449 buffer <<
"OpenNN Exception: MissingValues class.\n"
450 <<
"void set_item(const size_t&, const size_t&, const size_t&) method.\n"
451 <<
"Index of variable (" << variable_index <<
") must be less than number of variables (" <<
variables_number <<
").\n";
453 throw std::logic_error(buffer.str());
458 items[index].instance_index = instance_index;
459 items[index].variable_index = variable_index;
475 std::ostringstream buffer;
477 buffer <<
"OpenNN Exception: MissingValues class.\n"
478 <<
"void append(const size_t&, const size_t&) method.\n"
479 <<
"Index of instance (" << instance_index <<
") must be less than number of instances (" <<
instances_number <<
").\n";
481 throw std::logic_error(buffer.str());
486 std::ostringstream buffer;
488 buffer <<
"OpenNN Exception: MissingValues class.\n"
489 <<
"void append(const size_t&, const size_t&) method.\n"
490 <<
"Index of variable (" << variable_index <<
") must be less than number of instances (" <<
variables_number <<
").\n";
492 throw std::logic_error(buffer.str());
497 Item item(instance_index, variable_index);
499 items.push_back(item);
510 items.set(new_missing_values_number);
532 if(new_scrubbing_method ==
"Unuse")
536 else if(new_scrubbing_method ==
"Mean")
542 std::ostringstream buffer;
544 buffer <<
"OpenNN Exception: MissingValues class.\n"
545 <<
"void new_scrubbing_method(const std::string&) method.\n"
546 <<
"Unknown scrubbing method: " << new_scrubbing_method <<
".\n";
548 throw std::logic_error(buffer.str());
585 for(
size_t i = 0; i < missing_values_number; i++)
587 if(
items[i].instance_index == instance_index)
610 for(
size_t i = 0; i < missing_values_number; i++)
612 if(
items[i].instance_index == instance_index)
616 if(
items[i].variable_index == variables_indices[j])
643 for(
size_t i = 0; i < missing_values_number; i++)
645 if(
items[i].instance_index == instance_index &&
items[i].variable_index == variable_index)
665 for(
size_t i = 0; i < missing_values_number; i++)
669 missing_instances.push_back(
items[i].instance_index);
673 return(missing_instances);
697 for(
size_t i = 0; i < missing_values_number; i++)
701 missing_variables.push_back(
items[i].variable_index);
705 return(missing_variables);
759 Vector<Item> autoassociation_items(missing_values_number);
776 size_t variable_index;
777 size_t instance_index;
779 for(
size_t i = 0; i < missing_values_number; i++)
781 variable_index =
items[i].variable_index;
782 instance_index =
items[i].instance_index;
784 missing_indices[variable_index].push_back(instance_index);
787 return(missing_indices);
799 tinyxml2::XMLDocument* document =
new tinyxml2::XMLDocument;
801 std::ostringstream buffer;
805 tinyxml2::XMLElement* missing_values_element = document->NewElement(
"MissingValues");
807 document->InsertFirstChild(missing_values_element);
809 tinyxml2::XMLElement* element = NULL;
810 tinyxml2::XMLText* text = NULL;
816 element = document->NewElement(
"InstancesNumber");
817 missing_values_element->LinkEndChild(element);
822 text = document->NewText(buffer.str().c_str());
823 element->LinkEndChild(text);
828 element = document->NewElement(
"VariablesNumber");
829 missing_values_element->LinkEndChild(element);
834 text = document->NewText(buffer.str().c_str());
835 element->LinkEndChild(text);
840 element = document->NewElement(
"ScrubbingMethod");
841 missing_values_element->LinkEndChild(element);
844 element->LinkEndChild(text);
849 element = document->NewElement(
"MissingValuesNumber");
850 missing_values_element->LinkEndChild(element);
853 buffer << missing_values_number;
855 text = document->NewText(buffer.str().c_str());
856 element->LinkEndChild(text);
859 for(
size_t i = 0; i < missing_values_number; i++)
861 element = document->NewElement(
"Item");
862 element->SetAttribute(
"Index", (
unsigned)i+1);
863 missing_values_element->LinkEndChild(element);
867 tinyxml2::XMLElement* instance_index_element = document->NewElement(
"InstanceIndex");
868 element->LinkEndChild(instance_index_element);
871 buffer <<
items[i].instance_index;
873 text = document->NewText(buffer.str().c_str());
874 instance_index_element->LinkEndChild(text);
878 tinyxml2::XMLElement* variable_index_element = document->NewElement(
"VariableIndex");
879 element->LinkEndChild(variable_index_element);
882 buffer <<
items[i].variable_index;
884 text = document->NewText(buffer.str().c_str());
885 variable_index_element->LinkEndChild(text);
890 element = document->NewElement(
"Display");
891 missing_values_element->LinkEndChild(element);
896 text = document->NewText(buffer.str().c_str());
897 element->LinkEndChild(text);
912 std::ostringstream buffer;
916 const tinyxml2::XMLElement* missing_values_element = document.FirstChildElement(
"MissingValues");
918 if(!missing_values_element)
920 buffer <<
"OpenNN Exception: MissingValues class.\n"
921 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
922 <<
"Pointer to MissingValues element is NULL.\n";
924 throw std::logic_error(buffer.str());
929 const tinyxml2::XMLElement* instances_number_element = missing_values_element->FirstChildElement(
"InstancesNumber");
931 if(!instances_number_element)
933 buffer <<
"OpenNN Exception: MissingValues class.\n"
934 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
935 <<
"Pointer to instances number is NULL.\n";
937 throw std::logic_error(buffer.str());
944 const tinyxml2::XMLElement* variables_number_element = missing_values_element->FirstChildElement(
"VariablesNumber");
946 if(!variables_number_element)
948 buffer <<
"OpenNN Exception: MissingValues class.\n"
949 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
950 <<
"Pointer to variables number is NULL.\n";
952 throw std::logic_error(buffer.str());
959 const tinyxml2::XMLElement* scrubbing_method_element = missing_values_element->FirstChildElement(
"ScrubbingMethod");
961 if(!scrubbing_method_element)
963 buffer <<
"OpenNN Exception: MissingValues class.\n"
964 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
965 <<
"Pointer to scrubbing method element is NULL.\n";
967 throw std::logic_error(buffer.str());
970 const std::string scrubbing_method_string = scrubbing_method_element->GetText();
976 const tinyxml2::XMLElement* missing_values_number_element = missing_values_element->FirstChildElement(
"MissingValuesNumber");
978 if(!missing_values_number_element)
980 buffer <<
"OpenNN Exception: MissingValues class.\n"
981 <<
"void from_XML(const tinyxml2::XMLDocument&) method.\n"
982 <<
"Pointer to missing values number is NULL.\n";
984 throw std::logic_error(buffer.str());
987 const size_t missing_values_number = atoi(missing_values_number_element->GetText());
991 if(missing_values_number <= 0)
1000 const tinyxml2::XMLElement* start_element = missing_values_number_element;
1002 for(
size_t i = 0; i < missing_values_number; i++)
1004 const tinyxml2::XMLElement* item_element = start_element->NextSiblingElement(
"Item");
1005 start_element = item_element;
1009 buffer <<
"OpenNN Exception: MissingValues class.\n"
1010 <<
"void from_XML(const tinyxml2::XMLElement*) method.\n"
1011 <<
"Item " << i+1 <<
" is NULL.\n";
1013 throw std::logic_error(buffer.str());
1016 item_element->QueryUnsignedAttribute(
"Index", &index);
1020 buffer <<
"OpenNN Exception: MissingValues class.\n"
1021 <<
"void from_XML(const tinyxml2::XMLElement*) method.\n"
1022 <<
"Index " << index <<
" is not correct.\n";
1024 throw std::logic_error(buffer.str());
1029 const tinyxml2::XMLElement* instance_index_element = item_element->FirstChildElement(
"InstanceIndex");
1031 if(!instance_index_element)
1033 buffer <<
"OpenNN Exception: MissingValues class.\n"
1034 <<
"void from_XML(const tinyxml2::XMLElement*) method.\n"
1035 <<
"Pointer to instance index element is NULL.\n";
1037 throw std::logic_error(buffer.str());
1040 const size_t instance_index = atoi(instance_index_element->GetText());
1042 items[i].instance_index = instance_index;
1046 const tinyxml2::XMLElement* variable_index_element = item_element->FirstChildElement(
"VariableIndex");
1048 if(!instance_index_element)
1050 buffer <<
"OpenNN Exception: MissingValues class.\n"
1051 <<
"void from_XML(const tinyxml2::XMLElement*) method.\n"
1052 <<
"Pointer to variable index element is NULL.\n";
1054 throw std::logic_error(buffer.str());
1057 const size_t variable_index = atoi(variable_index_element->GetText());
1059 items[i].variable_index = variable_index;
1070 std::ostringstream buffer;
1074 buffer <<
"Missing values object\n"
1077 <<
"Missing values number: " << missing_values_number <<
"\n";
1079 for(
size_t i = 0; i < missing_values_number; i++)
1081 buffer <<
"Missing value " << i+1 <<
":\n"
1082 <<
"Instance index: " <<
items[i].instance_index <<
"\n"
1083 <<
"Variable index: " <<
items[i].variable_index <<
"\n";
1088 buffer <<
"Display: " <<
display <<
"\n";
1090 return(buffer.str());
std::string write_scrubbing_method_text(void) const
Returns a string with the name of the method used for scrubbing, as paragaph text.
void convert_time_series(const size_t &)
ScrubbingMethod get_scrubbing_method(void) const
Returns the method to be used for dealing with the missing values.
bool display
Display messages to screen.
MissingValues & operator=(const MissingValues &)
ScrubbingMethod
Enumeration of available activation functions for the perceptron neuron model.
void set_instances_number(const size_t &)
Sets the number of data set instances in this object.
void set_item(const size_t &, const size_t &, const size_t &)
Sets the indices.
Vector< size_t > arrange_missing_instances(void) const
Returns a vector with the indices of those instances with missing values.
bool operator==(const MissingValues &) const
const Vector< Item > & get_items(void) const
Returns a constant reference to the vector of missing value items.
void convert_autoassociation(void)
bool has_missing_values(void) const
size_t variables_number
Number of variables.
tinyxml2::XMLDocument * to_XML(void) const
Vector< size_t > get_missing_values_numbers(void) const
bool contains(const T &) const
Returns true if the vector contains a certain value, and false otherwise.
Vector< Item > items
Missing values.
void print(void) const
Prints to the screen information about the missing values object.
Vector< size_t > arrange_missing_variables(void) const
Returns a vector with the indices of those variables with missing values.
void set_display(const bool &)
void append(const size_t &, const size_t &)
void set_items(const Vector< Item > &)
Sets a new vector of missing value items.
const Item & get_item(const size_t &) const
const bool & get_display(void) const
void set_missing_values_number(const size_t &)
void set(void)
Sets a missing values object with zero instances, variables and missing values.
bool is_missing_value(const size_t &, const size_t &) const
ScrubbingMethod scrubbing_method
Method for handling missing values.
std::string to_string(void) const
Returns a string representation of the current MissingValues object.
size_t get_instances_number(void) const
Returns the number of instances in the data set.
void set_variables_number(const size_t &)
Sets the number of data set variables in this object.
void from_XML(const tinyxml2::XMLDocument &)
virtual ~MissingValues(void)
Destructor.
size_t count_missing_instances(void) const
Returns the number of instances with missing values.
size_t get_variables_number(void) const
Returns the number of variables in the data set.
std::string write_scrubbing_method(void) const
Returns a string with the name of the method used for scrubbing.
size_t instances_number
Number of instances.
void set_scrubbing_method(const ScrubbingMethod &)
size_t get_missing_values_number(void) const
Returns the number of missing values in the data set.
Vector< Vector< size_t > > arrange_missing_indices(void) const