/* * Copyright (c) 1997-1999 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ /* * planner.c -- find the optimal plan */ /* $Id: rplanner.c,v 1.23 1999/10/26 04:54:29 stevenj Exp $ */ #ifdef FFTW_USING_CILK #include #include #endif #include #include #include #include extern fftw_codelet_desc *rfftw_config[]; /* global from rconfig.c */ extern fftw_rgeneric_codelet fftw_hc2hc_forward_generic; extern fftw_rgeneric_codelet fftw_hc2hc_backward_generic; fftw_plan_hook_ptr rfftw_plan_hook = (fftw_plan_hook_ptr) NULL; /* timing rfftw plans: */ static double rfftw_measure_runtime(fftw_plan plan, fftw_real *in, int istride, fftw_real *out, int ostride) { fftw_time begin, end, start; double t, tmin; int i, iter; int n; int repeat; int howmany = plan->vector_size; n = plan->n; iter = 1; for (;;) { tmin = 1.0E10; for (i = 0; i < n * howmany; ++i) in[istride * i] = 0.0; start = fftw_get_time(); /* repeat the measurement FFTW_TIME_REPEAT times */ for (repeat = 0; repeat < FFTW_TIME_REPEAT; ++repeat) { begin = fftw_get_time(); for (i = 0; i < iter; ++i) rfftw(plan, howmany, in, istride, istride, out, ostride, ostride); end = fftw_get_time(); t = fftw_time_to_sec(fftw_time_diff(end, begin)); if (t < tmin) tmin = t; /* do not run for too long */ t = fftw_time_to_sec(fftw_time_diff(end, start)); if (t > FFTW_TIME_LIMIT) break; } if (tmin >= FFTW_TIME_MIN) break; iter *= 2; } tmin /= (double) iter; return tmin; } /* auxiliary functions */ static void rcompute_cost(fftw_plan plan, fftw_real *in, int istride, fftw_real *out, int ostride) { if (plan->flags & FFTW_MEASURE) plan->cost = rfftw_measure_runtime(plan, in, istride, out, ostride); else { double c; c = plan->n * fftw_estimate_node(plan->root) * plan->vector_size; plan->cost = c; } } static void run_plan_hooks(fftw_plan p) { if (rfftw_plan_hook && p) { fftw_complete_twiddle(p->root, p->n); rfftw_plan_hook(p); } } /* macrology */ #define FOR_ALL_RCODELETS(p) \ fftw_codelet_desc **__q, *p; \ for (__q = &rfftw_config[0]; (p = (*__q)); ++__q) /****************************************** * Recursive planner * ******************************************/ static fftw_plan rplanner(fftw_plan *table, int n, fftw_direction dir, int flags, int vector_size, fftw_real *, int, fftw_real *, int); /* * the planner consists of two parts: one that tries to * use accumulated wisdom, and one that does not. * A small driver invokes both parts in sequence */ /* planner with wisdom: look up the codelet suggested by the wisdom */ static fftw_plan rplanner_wisdom(fftw_plan *table, int n, fftw_direction dir, int flags, int vector_size, fftw_real *in, int istride, fftw_real *out, int ostride) { fftw_plan best = (fftw_plan) 0; fftw_plan_node *node; int have_wisdom; enum fftw_node_type wisdom_type; int wisdom_signature; fftw_recurse_kind wisdom_recurse_kind; /* see if we remember any wisdom for this case */ have_wisdom = fftw_wisdom_lookup(n, flags, dir, RFFTW_WISDOM, istride, ostride, &wisdom_type, &wisdom_signature, &wisdom_recurse_kind, 0); if (!have_wisdom) return best; if (wisdom_type == FFTW_REAL2HC || wisdom_type == FFTW_HC2REAL) { FOR_ALL_RCODELETS(p) { if (p->dir == dir && p->type == wisdom_type) { /* see if wisdom applies */ if (wisdom_signature == p->signature && p->size == n) { if (wisdom_type == FFTW_REAL2HC) node = fftw_make_node_real2hc(n, p); else node = fftw_make_node_hc2real(n, p); best = fftw_make_plan(n, dir, node, flags, p->type, p->signature, FFTW_NORMAL_RECURSE, vector_size); fftw_use_plan(best); run_plan_hooks(best); return best; } } } } if (wisdom_type == FFTW_HC2HC) { FOR_ALL_RCODELETS(p) { if (p->dir == dir && p->type == wisdom_type) { /* see if wisdom applies */ if (wisdom_signature == p->signature && p->size > 1 && (n % p->size) == 0) { fftw_plan r = rplanner(table, n / p->size, dir, flags | FFTW_NO_VECTOR_RECURSE, wisdom_recurse_kind == FFTW_VECTOR_RECURSE ? p->size : vector_size, in, istride, out, ostride); if (!r) continue; node = fftw_make_node_hc2hc(n, dir, p, r->root, flags); best = fftw_make_plan(n, dir, node, flags, p->type, p->signature, wisdom_recurse_kind, vector_size); fftw_use_plan(best); run_plan_hooks(best); fftw_destroy_plan_internal(r); return best; } } } } /* * BUG (or: TODO) Can we have generic wisdom? This is probably * an academic question */ return best; } /* * planner with no wisdom: try all combinations and pick * the best */ static fftw_plan rplanner_normal(fftw_plan *table, int n, fftw_direction dir, int flags, int vector_size, fftw_real *in, int istride, fftw_real *out, int ostride) { fftw_plan best = (fftw_plan) 0; fftw_plan newplan; fftw_plan_node *node; /* see if we have any codelet that solves the problem */ { FOR_ALL_RCODELETS(p) { if (p->dir == dir && (p->type == FFTW_REAL2HC || p->type == FFTW_HC2REAL)) { if (p->size == n) { if (p->type == FFTW_REAL2HC) node = fftw_make_node_real2hc(n, p); else node = fftw_make_node_hc2real(n, p); newplan = fftw_make_plan(n, dir, node, flags, p->type, p->signature, FFTW_NORMAL_RECURSE, vector_size); fftw_use_plan(newplan); run_plan_hooks(newplan); rcompute_cost(newplan, in, istride, out, ostride); best = fftw_pick_better(newplan, best); } } } } /* Then, try all available twiddle codelets */ { FOR_ALL_RCODELETS(p) { if (p->dir == dir && p->type == FFTW_HC2HC) { if ((n % p->size) == 0 && p->size > 1 && (!best || n != p->size)) { fftw_plan r = rplanner(table, n / p->size, dir, flags | FFTW_NO_VECTOR_RECURSE, vector_size, in, istride, out, ostride); if (!r) continue; node = fftw_make_node_hc2hc(n, dir, p, r->root, flags); newplan = fftw_make_plan(n, dir, node, flags, p->type, p->signature, FFTW_NORMAL_RECURSE, vector_size); fftw_use_plan(newplan); run_plan_hooks(newplan); fftw_destroy_plan_internal(r); rcompute_cost(newplan, in, istride, out, ostride); best = fftw_pick_better(newplan, best); } } } } /* try vector recursion unless prohibited by the flags: */ if (! (flags & FFTW_NO_VECTOR_RECURSE)) { FOR_ALL_RCODELETS(p) { if (p->dir == dir && p->type == FFTW_HC2HC) { if ((n % p->size) == 0 && p->size > 1 && (!best || n != p->size)) { fftw_plan r = rplanner(table, n / p->size, dir, flags | FFTW_NO_VECTOR_RECURSE, p->size, in, istride, out, ostride); if (!r) continue; node = fftw_make_node_hc2hc(n, dir, p, r->root, flags); newplan = fftw_make_plan(n, dir, node, flags, p->type, p->signature, FFTW_VECTOR_RECURSE, vector_size); fftw_use_plan(newplan); run_plan_hooks(newplan); fftw_destroy_plan_internal(r); rcompute_cost(newplan, in, istride, out, ostride); best = fftw_pick_better(newplan, best); } } } } /* * Resort to generic codelets for unknown factors, but only if * n is odd--the rgeneric codelets can't handle even n's. */ if (n % 2 != 0) { fftw_rgeneric_codelet *codelet = (dir == FFTW_FORWARD ? fftw_hc2hc_forward_generic : fftw_hc2hc_backward_generic); int size, prev_size = 0, remaining_factors = n; fftw_plan r; while (remaining_factors > 1) { size = fftw_factor(remaining_factors); remaining_factors /= size; /* don't try the same factor more than once */ if (size == prev_size) continue; prev_size = size; /* Look for codelets corresponding to this factor. */ { FOR_ALL_RCODELETS(p) { if (p->dir == dir && p->type == FFTW_HC2HC && p->size == size) { size = 0; break; } } } /* * only try a generic/rader codelet if there were no * twiddle codelets for this factor */ if (!size) continue; r = rplanner(table, n / size, dir, flags | FFTW_NO_VECTOR_RECURSE, vector_size, in, istride, out, ostride); node = fftw_make_node_rgeneric(n, size, dir, codelet, r->root, flags); newplan = fftw_make_plan(n, dir, node, flags, FFTW_RGENERIC, 0, FFTW_NORMAL_RECURSE, vector_size); fftw_use_plan(newplan); run_plan_hooks(newplan); fftw_destroy_plan_internal(r); rcompute_cost(newplan, in, istride, out, ostride); best = fftw_pick_better(newplan, best); } } return best; } static fftw_plan rplanner(fftw_plan *table, int n, fftw_direction dir, int flags, int vector_size, fftw_real *in, int istride, fftw_real *out, int ostride) { fftw_plan best = (fftw_plan) 0; if (vector_size > 1) flags |= FFTW_NO_VECTOR_RECURSE; /* see if plan has already been computed */ best = fftw_lookup(table, n, flags, vector_size); if (best) { fftw_use_plan(best); return best; } /* try a wise plan */ best = rplanner_wisdom(table, n, dir, flags, vector_size, in, istride, out, ostride); if (!best) { /* No wisdom. Plan normally. */ best = rplanner_normal(table, n, dir, flags, vector_size, in, istride, out, ostride); } if (best) { fftw_insert(table, best); /* remember the wisdom */ fftw_wisdom_add(n, flags, dir, RFFTW_WISDOM, istride, ostride, best->wisdom_type, best->wisdom_signature, best->recurse_kind); } return best; } fftw_plan rfftw_create_plan_specific(int n, fftw_direction dir, int flags, fftw_real *in, int istride, fftw_real *out, int ostride) { fftw_plan table; fftw_plan p1; /* validate parameters */ if (n <= 0) return (fftw_plan) 0; #ifndef FFTW_ENABLE_VECTOR_RECURSE /* TEMPORARY: disable vector recursion until it is more tested. */ flags |= FFTW_NO_VECTOR_RECURSE; #endif if ((dir != FFTW_FORWARD) && (dir != FFTW_BACKWARD)) return (fftw_plan) 0; fftw_make_empty_table(&table); p1 = rplanner(&table, n, dir, flags, 1, in, istride, out, ostride); fftw_destroy_table(&table); if (p1) fftw_complete_twiddle(p1->root, n); return p1; } fftw_plan rfftw_create_plan(int n, fftw_direction dir, int flags) { fftw_real *tmp_in; fftw_real *tmp_out; fftw_plan p; if (flags & FFTW_MEASURE) { tmp_in = (fftw_real *) fftw_malloc(2 * n * sizeof(fftw_real)); if (!tmp_in) return 0; tmp_out = tmp_in + n; p = rfftw_create_plan_specific(n, dir, flags, tmp_in, 1, tmp_out, 1); fftw_free(tmp_in); } else p = rfftw_create_plan_specific(n, dir, flags, (fftw_real *) 0, 1, (fftw_real *) 0, 1); return p; } void rfftw_destroy_plan(fftw_plan plan) { fftw_destroy_plan_internal(plan); } void rfftw_fprint_plan(FILE *f, fftw_plan p) { fftw_fprint_plan(f, p); } void rfftw_print_plan(fftw_plan p) { rfftw_fprint_plan(stdout, p); }