Hi everyone,
I'm trying to run the MKL fftw wrapper on MIC, but it doesn't work. The code compiles and I can run the binary, but since I don't have the offload report I guess that code don't was really executed on MIC. The code is this:
#include <stdlib.h> #include <stdio.h> #include <fftw3.h> // MAX 480 #define N1 1000 #define N2 1000 #define N3 1000 double getTimeElapsed(struct timeval end, struct timeval start) { return (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec) / 1000000.00; } int compute3DFFT(fftw_complex *dest, fftw_complex *src, int dim1, int dim2, int dim3, int type, unsigned int flags ) { if( !dest || !src || dim1<1 || dim2<1 || dim3<1 ) return 0; //fftw_plan_with_nthreads(244); fftw_plan p = fftw_plan_dft_3d(dim1, dim2, dim3, src, dest, type, flags); #pragma offload target(mic:0) nocopy(p) { fftw_execute(p); } fftw_destroy_plan(p); return 1; } void print(fftw_complex *f, int n){ int i; for(i=0; i<n; i++) printf("%d: (%g,%gi)\n", i, f[i][0], f[i][1]); } int main(void) { int i; fftw_complex *src, *dest; double time_elapsed; struct timeval start, end; gettimeofday(&start, NULL); //fftw_init_threads(); src = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N1 * N2 * N3); dest = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N1 * N2 * N3); for(i=0; i<N1*N2*N3; i++) { src[i][0] = i + 1.0; src[i][1] = i + 2.0; } //print(src, N1*N2*N3); compute3DFFT(dest, src, N1, N2, N3, FFTW_FORWARD, FFTW_ESTIMATE); /*printf("\n"); print(dest, N1*N2*N3);*/ fftw_free(src); fftw_free(dest); gettimeofday(&end, NULL); time_elapsed = getTimeElapsed(end, start); printf("Time: %.3f seconds\n", time_elapsed); return 0; }
I already changed nocopy(p) to in(p) and I get some warnings but the code executed in the same way.
I compiled with this command:
icc -o Xeon fftw_libmic.c -L$MKLROOT/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_intel_thread -lpthread -lm -openmp -I$MKLROOT/include/fftw -offload-attribute-target=mic -offload-option,mic,compiler," -L$MKLROOT/lib/mic -lmkl_intel_lp64 -lmkl_core -lmkl_intel_thread"
I also make export MKL_MIC_ENABLE=1 and export OFFLOAD_REPORT=2
Like I said, this code runs but only on CPU.
Can you help me ?
Best Regards