/* * Copyright (c) 1997-1999 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include "fftw_cilk.cilkh" #define NUM_ITER 5000000L #define N_TESTS_1D 16 #define N_TESTS_3D 9 extern void initialize_fft_data(fftw_complex * arr, long n); cilk int main(int argc, char **argv) { int n1[N_TESTS_1D] = { 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288 }; int n3[N_TESTS_3D][3] = { { 16, 16, 16 }, { 24, 24, 24 }, { 32, 32, 32 }, { 49, 49, 49 }, { 64, 64, 64 }, { 80, 80, 80 }, {100,100,100 }, { 128, 128, 128 }, {256, 256, 256 }, }; int i, test, iter, max_iter; Cilk_time start_t, end_t, init_t; fftw_complex *cin, *out; double time_scale, time1, time2; int max_size; fftw_plan plan; fftwnd_plan plan_nd; Cilk_time begin_cp, end_cp; Cilk_time begin_work, end_work; /*************** Benchmark fftw_cilk ****************/ max_size = 0; for (i = 0; i < N_TESTS_1D; ++i) if (n1[i] > max_size) max_size = n1[i]; cin = Cilk_malloc(max_size * sizeof(fftw_complex)); out = Cilk_malloc(max_size * sizeof(fftw_complex)); if (!cin || !out) { printf("Not enough memory! At least %d bytes needed.\n", max_size * sizeof(fftw_complex) * 2); exit(1); } printf("%15s%20s%20s%20s\n", "Array Size", "FFTW", "FFTW_CILK", "Speedup Factor"); for (test = 0; test < N_TESTS_1D; ++test) { printf("%15d", n1[test]); fflush(stdout); plan = fftw_create_plan(n1[test], FFTW_FORWARD, FFTW_MEASURE); max_iter = NUM_ITER / (n1[test] * log(2.0 * n1[test])); if (max_iter < 1) max_iter = 1; time_scale = 1.0e6 / (max_iter * (log(n1[test])/log(2.0) * n1[test])); initialize_fft_data(cin, n1[test]); start_t = Cilk_get_wall_time(); for (iter = 0; iter < max_iter; ++iter) initialize_fft_data(cin, n1[test]); end_t = Cilk_get_wall_time(); init_t = end_t - start_t; /* Time FFTW: */ initialize_fft_data(cin, n1[test]); fftw(plan, 1, cin, 1, 0, out, 1, 0); start_t = Cilk_get_wall_time(); for (iter = 0; iter < max_iter; ++iter) { initialize_fft_data(cin, n1[test]); fftw(plan, 1, cin, 1, 0, out, 1, 0); } end_t = Cilk_get_wall_time(); printf("%20g", time1 = Cilk_wall_time_to_sec(end_t - start_t - init_t) * time_scale); fflush(stdout); /* Time Cilk FFTW: */ initialize_fft_data(cin, n1[test]); spawn fftw_cilk(plan, 1, cin, 1, 0, out, 1, 0); sync; start_t = Cilk_get_wall_time(); for (iter = 0; iter < max_iter; ++iter) { initialize_fft_data(cin, n1[test]); spawn fftw_cilk(plan, 1, cin, 1, 0, out, 1, 0); sync; } end_t = Cilk_get_wall_time(); printf("%20g", time2 = Cilk_wall_time_to_sec(end_t - start_t - init_t) * time_scale); printf("%20g\n",time1/time2); fflush(stdout); #if CILK_CRITICAL_PATH > 0 /* if critical path is measured */ /* measure work and CP */ sync; begin_cp = Cilk_user_critical_path; begin_work = Cilk_user_work; spawn fftw_cilk(plan, 1, cin, 1, 0, out, 1, 0); sync; end_cp = Cilk_user_critical_path; end_work = Cilk_user_work; printf("Work = %f s\n", Cilk_time_to_sec(end_work - begin_work)); printf("Critical path = %f s\n", Cilk_time_to_sec(end_cp - begin_cp)); printf("Average parallelism = %f\n", Cilk_time_to_sec(end_work - begin_work) / Cilk_time_to_sec(end_cp - begin_cp)); #endif /* Done. */ fftw_destroy_plan(plan); } Cilk_free(cin); Cilk_free(out); /*************** Benchmark fftwnd_cilk ****************/ printf("\n"); max_size = 0; for (i = 0; i < N_TESTS_3D; ++i) if (n3[i][0]*n3[i][1]*n3[i][2] > max_size) max_size = n3[i][0]*n3[i][1]*n3[i][2]; cin = Cilk_malloc(max_size * sizeof(fftw_complex)); if (!cin) { printf("Not enough memory! At least %d bytes needed.\n", max_size * sizeof(fftw_complex)); exit(1); } printf("%15s%20s%20s%20s\n", "Array Size", "FFTWND", "FFTWND_CILK", "Speedup Factor"); for (test = 0; test < N_TESTS_3D; ++test) { int N; { char s[20]; sprintf(s,"%dx%dx%d",n3[test][0],n3[test][1],n3[test][2]); printf("%15s",s); } fflush(stdout); plan_nd = fftwnd_create_plan(3,n3[test], FFTW_FORWARD, FFTW_IN_PLACE | FFTW_MEASURE); N = n3[test][0]*n3[test][1]*n3[test][2]; max_iter = NUM_ITER / (N * log(2.0 * N)); if (max_iter < 1) max_iter = 1; time_scale = 1.0e6 / (max_iter * (log(N)/log(2.0) * N)); initialize_fft_data(cin, N); start_t = Cilk_get_wall_time(); for (iter = 0; iter < max_iter; ++iter) initialize_fft_data(cin, N); end_t = Cilk_get_wall_time(); init_t = end_t - start_t; /* Time FFTW: */ initialize_fft_data(cin, N); fftwnd(plan_nd, 1, cin, 1, 0, out, 1, 0); start_t = Cilk_get_wall_time(); for (iter = 0; iter < max_iter; ++iter) { initialize_fft_data(cin, N); fftwnd(plan_nd, 1, cin, 1, 0, out, 1, 0); } end_t = Cilk_get_wall_time(); printf("%20g", time1=Cilk_wall_time_to_sec(end_t - start_t - init_t) * time_scale); fflush(stdout); /* Time Cilk FFTW: */ initialize_fft_data(cin, N); spawn fftwnd_cilk(plan_nd, 1, cin, 1, 0, 0, 0, 0); sync; start_t = Cilk_get_wall_time(); for (iter = 0; iter < max_iter; ++iter) { initialize_fft_data(cin, N); spawn fftwnd_cilk(plan_nd, 1, cin, 1, 0, 0, 0, 0); sync; } end_t = Cilk_get_wall_time(); printf("%20g", time2=Cilk_wall_time_to_sec(end_t - start_t - init_t) * time_scale); /* Done. */ printf("%20g\n",time1/time2); fflush(stdout); fftwnd_destroy_plan(plan_nd); } Cilk_free(cin); return 0; } void initialize_fft_data(fftw_complex * arr, long n) { long i; for (i = 0; i < n; i++) { /* initialize to some arbitrary values: */ c_re(arr[i]) = 0.56923456; c_im(arr[i]) = 0.23858572; } }