#include "CommonHelp.h" void groupby_help(void) { cerr << "\nTool: bedtools groupby " << endl; cerr << "Version: " << VERSION << "\n"; cerr << "Summary: Summarizes a dataset column based upon" << endl; cerr << "\t common column groupings. Akin to the SQL \"group by\" command." << endl << endl; cerr << "Usage:\t " << "bedtools groupby" << " -g [group_column(s)] -c [op_column(s)] -o [ops] " << endl; cerr << "\t " << "cat [FILE] | " << "bedtools groupby" << " -g [group_column(s)] -c [op_column(s)] -o [ops] " << endl << endl; cerr << "Options: " << endl; cerr << "\t-i\t\t" << "Input file. Assumes \"stdin\" if omitted." << endl << endl; cerr << "\t-g -grp\t\t" << "Specify the columns (1-based) for the grouping." << endl; cerr << "\t\t\tThe columns must be comma separated." << endl; cerr << "\t\t\t- Default: 1,2,3" << endl << endl; cerr << "\t-c -opCols\t" << "Specify the column (1-based) that should be summarized." << endl; cerr << "\t\t\t- Required." << endl << endl; cerr << "\t-o -ops\t\t" << "Specify the operation that should be applied to opCol." << endl; cerr << "\t\t\tValid operations:" << endl; cerr << "\t\t\t sum, count, count_distinct, min, max," << endl; cerr << "\t\t\t mean, median, mode, antimode," << endl; cerr << "\t\t\t stdev, sstdev (sample standard dev.)," << endl; cerr << "\t\t\t collapse (i.e., print a comma separated list (duplicates allowed)), " << endl; cerr << "\t\t\t distinct (i.e., print a comma separated list (NO duplicates allowed)), " << endl; cerr << "\t\t\t distinct_sort_num (as distinct, but sorted numerically, ascending), " << endl; cerr << "\t\t\t distinct_sort_num_desc (as distinct, but sorted numerically, descending), " << endl; cerr << "\t\t\t concat (i.e., merge values into a single, non-delimited string), " << endl; cerr << "\t\t\t freqdesc (i.e., print desc. list of values:freq)" << endl; cerr << "\t\t\t freqasc (i.e., print asc. list of values:freq)" << endl; cerr << "\t\t\t first (i.e., print first value)" << endl; cerr << "\t\t\t last (i.e., print last value)" << endl; cerr << "\t\t\t- Default: sum" << endl << endl; cerr << "\t\tIf there is only column, but multiple operations, all operations will be" << endl; cerr << "\t\tapplied on that column. Likewise, if there is only one operation, but" << endl; cerr << "\t\tmultiple columns, that operation will be applied to all columns." << endl; cerr << "\t\tOtherwise, the number of columns must match the the number of operations," << endl; cerr << "\t\tand will be applied in respective order." << endl; cerr << "\t\tE.g., \"-c 5,4,6 -o sum,mean,count\" will give the sum of column 5," << endl; cerr << "\t\tthe mean of column 4, and the count of column 6." << endl; cerr << "\t\tThe order of output columns will match the ordering given in the command." << endl << endl<