Signature | Description | Parameters |
---|---|---|
template<typename F, typename T, typename ... Ts> DataFrame groupby (F &&func, const char *gb_col_name = nullptr, sort_state already_sorted = sort_state::not_sorted) const; |
Groupby copies the DataFrame into a temp DataFrame and sorts the temp df by gb_col_name before performing groupby. If gb_col_name is null, it groups by index. |
F: type functor to be applied to columns to group by T: type of the groupby column. In case of index, it is type of index Ts: List of the types of all data columns. A type should be specified in the list only once. func: The functor to do the groupby. Specs for the functor is in a separate doc. already_sorted: If the DataFrame is already sorted by gb_col_name, this will save the expensive sort operation |
template<typename F, typename T, typename ... Ts> std::future<DataFrame> groupby_async (F &&func, const char *gb_col_name = nullptr, sort_state already_sorted = sort_state::not_sorted) const; |
Same as groupby() above, but executed asynchronously |
std::vector<unsigned long> ulgvec2 = { 123450, 123451, 123452, 123450, 123455, 123450, 123449, 123448, 123451, 123452, 123452, 123450, 123455, 123450, 123454, 123453, 123456, 123457, 123458, 123459, 123460, 123441, 123442, 123432, 123433, 123434, 123435, 123436 }; std::vector<unsigned long> xulgvec2 = ulgvec2; std::vector<int> intvec2 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 22, 23, 24, 25, 30, 33, 34, 35, 36, 40, 45, 46 }; std::vector<double> xdblvec2 = { 1.2345, 2.2345, 3.2345, 4.2345, 5.2345, 3.0, 0.9999, 10.0, 4.25, 0.009, 1.111, 8.0, 2.2222, 3.3333, 11.0, 5.25, 1.009, 2.111, 9.0, 3.2222, 4.3333, 12.0, 6.25, 2.009, 3.111, 10.0, 4.2222, 5.3333 }; std::vector<double> dblvec22 = { 0.998, 0.3456, 0.056, 0.15678, 0.00345, 0.923, 0.06743, 0.1, 0.0056, 0.07865, -0.9999, 0.0111, 0.1002, -0.8888, 0.14, 0.0456, 0.078654, -0.8999, 0.01119, 0.8002, -0.9888, 0.2, 0.1056, 0.87865, -0.6999, 0.4111, 0.1902, -0.4888 }; std::vector<std::string> strvec2 = { "4% of something", "Description 4/5", "This is bad", "3.4% of GDP", "Market drops", "Market pulls back", "$15 increase", "Running fast", "C++14 development", "Some explanation", "More strings", "Bonds vs. Equities", "Almost done", "Here comes the sun", "XXXX1", "XXXX04", "XXXX2", "XXXX3", "XXXX4", "XXXX4", "XXXX5", "XXXX6", "XXXX7", "XXXX10", "XXXX11", "XXXX01", "XXXX02", "XXXX03" }; MyDataFrame dfx; dfx.load_data(std::move(ulgvec2), std::make_pair("xint_col", intvec2), std::make_pair("dbl_col", xdblvec2), std::make_pair("dbl_col_2", dblvec22), std::make_pair("str_col", strvec2), std::make_pair("ul_col", xulgvec2)); dfx.write<std::ostream, int, unsigned long, double, std::string>(std::cout); const MyDataFrame dfxx = dfx.groupby<GroupbySum, unsigned long, int, unsigned long, std::string, double>(GroupbySum()); dfxx.write<std::ostream, int, unsigned long, double, std::string>(std::cout); const MyDataFrame dfxx2 = dfx.groupby<GroupbySum, std::string, int, unsigned long, std::string, double>(GroupbySum(), "str_col"); dfxx2.write<std::ostream, int, unsigned long, double, std::string>(std::cout); std::future<MyDataFrame> gb_fut = dfx.groupby_async<GroupbySum, double, int, unsigned long, std::string, double>(GroupbySum(), "dbl_col_2"); const MyDataFrame dfxx3 = gb_fut.get(); dfxx3.write<std::ostream, int, unsigned long, double, std::string>(std::cout); std::cout << "\nTesting Bucketize() ..." << std::endl; const MyDataFrame::IndexType interval = 4; std::future<MyDataFrame> b_fut = dfx.bucketize_async<GroupbySum, int, unsigned long, std::string, double>(GroupbySum(), interval); const MyDataFrame buck_df = b_fut.get(); buck_df.write<std::ostream, int, unsigned long, double, std::string>(std::cout, true);