Signature Description

enum class fill_policy : unsigned char  {
    // Fill all missing values with the given substitute
    value = 1,

    // Fill the missing values with the previous value
    fill_forward = 2,

    // Fill the missing values with the next value
    fill_backward = 3,

    //           X - X1
    // Y = Y1 + --------- * (Y2 - Y1)
    //           X2 - X1
    // Use the index column as X coordinate and the given column as Y coordinate
    linear_interpolate = 4,
    linear_extrapolate = 5,

    // Fill missing values with mid-point of surrounding values
    mid_point = 6,
};
This policy determines how to fill missing values in the DataFrame

Signature Description Parameters

template<typename T, size_t N>
void
fill_missing(const std::array col_names,
             fill_policy policy,
             const std::array values = { },
             int limit = -1);
        
It fills all the "missing values" with the given values, and/or using the given method (See fill_policy above). Missing is determined by being NaN for types that have NaN. For types without NaN (e.g. string), default value is considered missing value T: Type of the column(s) in col_names array
N: Size of col_names and values array col_names: An array of names specifying the columns to fill.
policy: Specifies the method to use to fill the missing values. For example; forward fill, values, etc.
values: If the policy is "values", use these values to fill the missing holes. Each value corresponds to the same index in the col_names array.
limit: Specifies how many values to fill. Default is -1 meaning fill all missing values.
static void test_fill_missing_values()  {

    std::cout << "\nTesting fill_missing(values) ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<double> d1 = { 1, 2, 3, 4,
                               std::numeric_limits<double>::quiet_NaN(),
                               6, 7,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12, 13, 14 };
    std::vector<double> d2 = { 8, 9,
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               20, 22, 23, 30, 31,
                               std::numeric_limits<double>::quiet_NaN(),
                               1.89 };
    std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(),
                               16,
                               std::numeric_limits<double>::quiet_NaN(),
                               18, 19, 16,
                               std::numeric_limits<double>::quiet_NaN(),
                               0.34, 1.56, 0.34, 2.3, 0.34,
                               std::numeric_limits<double>::quiet_NaN() };
    std::vector<int>    i1 = { 22,
                               std::numeric_limits<int>::quiet_NaN(),
                               std::numeric_limits<int>::quiet_NaN(),
                               25,
                               std::numeric_limits<int>::quiet_NaN() };
    MyDataFrame         df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_4", i1));

    std::vector<std::string>    s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo" };

    df.load_column("col_str", std::move(s1), nan_policy::dont_pad_with_nans);

    std::cout << "Original DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);

    df.fill_missing<double, 3>({ "col_1", "col_2", "col_3" }, fill_policy::value, { 1001, 1002, 1003 }, 3);
    df.fill_missing<std::string, 1>({ "col_str" }, fill_policy::value, { "XXXXXX" });

    std::cout << "After fill missing with values DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);
}

// -----------------------------------------------------------------------------

static void test_fill_missing_fill_forward()  {

    std::cout << "\nTesting fill_missing(fill_forward) ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<double> d1 = { 1, 2, 3, 4,
                               std::numeric_limits<double>::quiet_NaN(),
                               6, 7,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12, 13, 14 };
    std::vector<double> d2 = { 8, 9,
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               20, 22, 23, 30, 31,
                               std::numeric_limits<double>::quiet_NaN(),
                               1.89 };
    std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(),
                               16,
                               std::numeric_limits<double>::quiet_NaN(),
                               18, 19, 16,
                               std::numeric_limits<double>::quiet_NaN(),
                               0.34, 1.56, 0.34, 2.3, 0.34,
                               std::numeric_limits<double>::quiet_NaN() };
    std::vector<int>    i1 = { 22,
                               std::numeric_limits<int>::quiet_NaN(),
                               std::numeric_limits<int>::quiet_NaN(),
                               25,
                               std::numeric_limits<int>::quiet_NaN() };
    MyDataFrame         df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_4", i1));

    std::vector<std::string>    s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo" };

    df.load_column("col_str", std::move(s1), nan_policy::dont_pad_with_nans);

    std::cout << "Original DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);

    df.fill_missing<double, 3>({ "col_1", "col_2", "col_3" }, fill_policy::fill_forward, { }, 3);
    df.fill_missing<std::string, 1>({ "col_str" }, fill_policy::fill_forward, {  }, 3);

    std::cout << "After fill missing with values DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);
}

// -----------------------------------------------------------------------------

static void test_fill_missing_fill_backward()  {

    std::cout << "\nTesting fill_missing(fill_backward) ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<double> d1 = { 1, 2, 3, 4,
                               std::numeric_limits<double>::quiet_NaN(),
                               6, 7,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12, 13, 14 };
    std::vector<double> d2 = { 8, 9,
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               20, 22, 23, 30, 31,
                               std::numeric_limits<double>::quiet_NaN(),
                               1.89 };
    std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(),
                               16,
                               std::numeric_limits<double>::quiet_NaN(),
                               18, 19, 16,
                               std::numeric_limits<double>::quiet_NaN(),
                               0.34, 1.56, 0.34, 2.3, 0.34,
                               std::numeric_limits<double>::quiet_NaN() };
    std::vector<int>    i1 = { 22,
                               std::numeric_limits<int>::quiet_NaN(),
                               std::numeric_limits<int>::quiet_NaN(),
                               25,
                               std::numeric_limits<int>::quiet_NaN() };
    std::vector<std::string>    s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo", "pppp", "2222", "aaaa", "dddd" };
    MyDataFrame         df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_str", s1),
                 std::make_pair("col_4", i1));

    std::cout << "Original DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);

    df.fill_missing<double, 3>({ "col_1", "col_2", "col_3" }, fill_policy::fill_backward);
    df.fill_missing<std::string, 1>({ "col_str" }, fill_policy::fill_backward);

    std::cout << "After fill missing with values DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);
}

// -----------------------------------------------------------------------------

static void test_fill_missing_fill_linear_interpolation()  {

    std::cout << "\nTesting fill_missing(linear_interpolate) ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<double> d1 = { 1, 2, 3, 4,
                               std::numeric_limits<double>::quiet_NaN(),
                               6, 7,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12, 13, 14 };
    std::vector<double> d2 = { 8, 9,
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               20, 22, 23, 30, 31,
                               std::numeric_limits<double>::quiet_NaN(),
                               1.89 };
    std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(),
                               16,
                               std::numeric_limits<double>::quiet_NaN(),
                               18, 19, 16,
                               std::numeric_limits<double>::quiet_NaN(),
                               0.34, 1.56, 0.34, 2.3, 0.34,
                               std::numeric_limits<double>::quiet_NaN() };
    std::vector<int>    i1 = { 22,
                               std::numeric_limits<int>::quiet_NaN(),
                               std::numeric_limits<int>::quiet_NaN(),
                               25,
                               std::numeric_limits<int>::quiet_NaN() };
    MyDataFrame         df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_4", i1));

    std::cout << "Original DF:" << std::endl;
    df.write<std::ostream, int, double>(std::cout);

    df.fill_missing<double, 3>({ "col_1", "col_2", "col_3" }, fill_policy::linear_interpolate);

    std::cout << "After fill missing with values DF:" << std::endl;
    df.write<std::ostream, int, double>(std::cout);
}

// -----------------------------------------------------------------------------

static void test_fill_missing_mid_point()  {

    std::cout << "\nTesting fill_missing(mid_point) ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    std::vector<double> d1 = { 1, 2, 3, 4,
                               std::numeric_limits<double>::quiet_NaN(),
                               6, 7,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12, 13, 14 };
    std::vector<double> d2 = { 8, 9,
                               std::numeric_limits<double>::quiet_NaN(),
                               11, 12,
                               std::numeric_limits<double>::quiet_NaN(),
                               std::numeric_limits<double>::quiet_NaN(),
                               20, 22, 23, 30, 31,
                               std::numeric_limits<double>::quiet_NaN(),
                               1.89 };
    std::vector<double> d3 = { std::numeric_limits<double>::quiet_NaN(),
                               16,
                               std::numeric_limits<double>::quiet_NaN(),
                               18, 19, 16,
                               std::numeric_limits<double>::quiet_NaN(),
                               0.34, 1.56, 0.34, 2.3, 0.34,
                               std::numeric_limits<double>::quiet_NaN() };
    std::vector<int>    i1 = { 22,
                               std::numeric_limits<int>::quiet_NaN(),
                               std::numeric_limits<int>::quiet_NaN(),
                               25,
                               std::numeric_limits<int>::quiet_NaN() };
    MyDataFrame         df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_4", i1));

    std::vector<std::string>    s1 = { "qqqq", "wwww", "eeee", "rrrr", "tttt", "yyyy", "iiii", "oooo" };

    df.load_column("col_str", std::move(s1), nan_policy::dont_pad_with_nans);

    // std::cout << "Original DF:" << std::endl;
    // df.write<std::ostream, int, double, std::string>(std::cout);

    df.fill_missing<double, 3>({ "col_1", "col_2", "col_3" }, fill_policy::mid_point);

    std::cout << "After fill missing with values DF:" << std::endl;
    df.write<std::ostream, int, double, std::string>(std::cout);
}