Signature | Description |
---|---|
enum class fill_policy : unsigned char { value = 1, fill_forward = 2, fill_backward = 3, linear_interpolate = 4, // Using the index as X coordinate linear_extrapolate = 5, // Using the index as X coordinate mid_point = 6, // Mid-point of x and y }; |
This policy determines how to fill missing values in the DataFrame value: Fill all the missing values, in a given column, with the given value. fill_forward: Fill the missing values, in a given column, with the last valid value before the missing value fill_backward: Fill the missing values, in a given column, with the first valid value after the missing value linear_interpolate: linear_extrapolate: Use the index column as X coordinate and the given column as Y coordinate And do interpolation/extrapolation as follows: X - X1 Y = Y1 + ----------- * (Y2 - Y1) X2 - X1 |
Signature | Description | Parameters |
---|---|---|
template<typename T, size_t N> void fill_missing(const std::array |
It fills all the "missing values" with the given values, and/or using the given method (See fill_policy above). Missing is determined by being NaN for types that have NaN. For types without NaN (e.g. string), default value is considered missing value |
T: Type of the column(s) in col_names array N: Size of col_names and values array col_names: An array of names specifying the columns to fill. policy: Specifies the method to use to fill the missing values. For example; forward fill, values, etc. values: If the policy is "values", use these values to fill the missing holes. Each value corresponds to the same index in the col_names array. limit: Specifies how many values to fill. Default is -1 meaning fill all missing values. |
static void test_fill_missing_values() { std::cout << "\nTesting fill_missing(values) ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 }; std::vector<double> d1 = { 1, 2, 3, 4, std::numeric_limits<double>::quiet_NaN(), 6, 7, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 11, 12, 13, 14 }; std::vector<double> d2 = { 8, 9, std::numeric_limits<double>::quiet_NaN(), 11, 12, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 20, 22, 23, 30, 31, std::numeric_limits<double>::quiet_NaN(), 1.89 }; std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(), 16, std::numeric_limits<double>::quiet_NaN(), 18, 19, 16, std::numeric_limits<double>::quiet_NaN(), 0.34, 1.56, 0.34, 2.3, 0.34, std::numeric_limits<double>::quiet_NaN() }; std::vector<int> i1 = { 22, std::numeric_limits<int>::quiet_NaN(), std::numeric_limits<int>::quiet_NaN(), 25, std::numeric_limits<int>::quiet_NaN() }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_4", i1)); std::vector<std::string> s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo" }; df.load_column("col_str", std::move(s1), nan_policy::dont_pad_with_nans); std::cout << "Original DF:" << std::endl; df.write<std::ostream, int, double, std::string>(std::cout); df.fill_missing<double, 3>({ "col_1", "col_2", "col_3" }, fill_policy::value, { 1001, 1002, 1003 }, 3); df.fill_missing<std::string, 1>({ "col_str" }, fill_policy::value, { "XXXXXX" }); std::cout << "After fill missing with values DF:" << std::endl; df.write<std::ostream, int, double, std::string>(std::cout); } // ----------------------------------------------------------------------------- static void test_fill_missing_fill_forward() { std::cout << "\nTesting fill_missing(fill_forward) ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 }; std::vector<double> d1 = { 1, 2, 3, 4, std::numeric_limits<double>::quiet_NaN(), 6, 7, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 11, 12, 13, 14 }; std::vector<double> d2 = { 8, 9, std::numeric_limits<double>::quiet_NaN(), 11, 12, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 20, 22, 23, 30, 31, std::numeric_limits<double>::quiet_NaN(), 1.89 }; std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(), 16, std::numeric_limits<double>::quiet_NaN(), 18, 19, 16, std::numeric_limits<double>::quiet_NaN(), 0.34, 1.56, 0.34, 2.3, 0.34, std::numeric_limits<double>::quiet_NaN() }; std::vector<int> i1 = { 22, std::numeric_limits<int>::quiet_NaN(), std::numeric_limits<int>::quiet_NaN(), 25, std::numeric_limits<int>::quiet_NaN() }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_4", i1)); std::vector<std::string> s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo" }; df.load_column("col_str", std::move(s1), nan_policy::dont_pad_with_nans); std::cout << "Original DF:" << std::endl; df.write<std::ostream, int, double, std::string>(std::cout); df.fill_missing<double, 3>({ "col_1", "col_2", "col_3" }, fill_policy::fill_forward, { }, 3); df.fill_missing<std::string, 1>({ "col_str" }, fill_policy::fill_forward, { }, 3); std::cout << "After fill missing with values DF:" << std::endl; df.write<std::ostream, int, double, std::string>(std::cout); } // ----------------------------------------------------------------------------- static void test_fill_missing_fill_backward() { std::cout << "\nTesting fill_missing(fill_backward) ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 }; std::vector<double> d1 = { 1, 2, 3, 4, std::numeric_limits<double>::quiet_NaN(), 6, 7, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 11, 12, 13, 14 }; std::vector<double> d2 = { 8, 9, std::numeric_limits<double>::quiet_NaN(), 11, 12, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 20, 22, 23, 30, 31, std::numeric_limits<double>::quiet_NaN(), 1.89 }; std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(), 16, std::numeric_limits<double>::quiet_NaN(), 18, 19, 16, std::numeric_limits<double>::quiet_NaN(), 0.34, 1.56, 0.34, 2.3, 0.34, std::numeric_limits<double>::quiet_NaN() }; std::vector<int> i1 = { 22, std::numeric_limits<int>::quiet_NaN(), std::numeric_limits<int>::quiet_NaN(), 25, std::numeric_limits<int>::quiet_NaN() }; std::vector<std::string> s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo", "pppp", "2222", "aaaa", "dddd" }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_str", s1), std::make_pair("col_4", i1)); std::cout << "Original DF:" << std::endl; df.write<std::ostream, int, double, std::string>(std::cout); df.fill_missing<double, 3>({ "col_1", "col_2", "col_3" }, fill_policy::fill_backward); df.fill_missing<std::string, 1>({ "col_str" }, fill_policy::fill_backward); std::cout << "After fill missing with values DF:" << std::endl; df.write<std::ostream, int, double, std::string>(std::cout); } // ----------------------------------------------------------------------------- static void test_fill_missing_fill_linear_interpolation() { std::cout << "\nTesting fill_missing(linear_interpolate) ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 }; std::vector<double> d1 = { 1, 2, 3, 4, std::numeric_limits<double>::quiet_NaN(), 6, 7, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 11, 12, 13, 14 }; std::vector<double> d2 = { 8, 9, std::numeric_limits<double>::quiet_NaN(), 11, 12, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 20, 22, 23, 30, 31, std::numeric_limits<double>::quiet_NaN(), 1.89 }; std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(), 16, std::numeric_limits<double>::quiet_NaN(), 18, 19, 16, std::numeric_limits<double>::quiet_NaN(), 0.34, 1.56, 0.34, 2.3, 0.34, std::numeric_limits<double>::quiet_NaN() }; std::vector<int> i1 = { 22, std::numeric_limits<int>::quiet_NaN(), std::numeric_limits<int>::quiet_NaN(), 25, std::numeric_limits<int>::quiet_NaN() }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_4", i1)); std::cout << "Original DF:" << std::endl; df.write<std::ostream, int, double>(std::cout); df.fill_missing<double, 3>({ "col_1", "col_2", "col_3" }, fill_policy::linear_interpolate); std::cout << "After fill missing with values DF:" << std::endl; df.write<std::ostream, int, double>(std::cout); } // ----------------------------------------------------------------------------- static void test_fill_missing_mid_point() { std::cout << "\nTesting fill_missing(mid_point) ..." << std::endl; std::vector<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 }; std::vector<double> d1 = { 1, 2, 3, 4, std::numeric_limits<double>::quiet_NaN(), 6, 7, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 11, 12, 13, 14 }; std::vector<double> d2 = { 8, 9, std::numeric_limits<double>::quiet_NaN(), 11, 12, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 20, 22, 23, 30, 31, std::numeric_limits<double>::quiet_NaN(), 1.89 }; std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(), 16, std::numeric_limits<double>::quiet_NaN(), 18, 19, 16, std::numeric_limits<double>::quiet_NaN(), 0.34, 1.56, 0.34, 2.3, 0.34, std::numeric_limits<double>::quiet_NaN() }; std::vector<int> i1 = { 22, std::numeric_limits<int>::quiet_NaN(), std::numeric_limits<int>::quiet_NaN(), 25, std::numeric_limits<int>::quiet_NaN() }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_4", i1)); std::vector<std::string> s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo" }; df.load_column("col_str", std::move(s1), nan_policy::dont_pad_with_nans); // std::cout << "Original DF:" << std::endl; // df.write<std::ostream, int, double, std::string>(std::cout); df.fill_missing<double, 3>({ "col_1", "col_2", "col_3" }, fill_policy::mid_point); std::cout << "After fill missing with values DF:" << std::endl; df.write<std::ostream, int, double, std::string>(std::cout); }