Quantcast
Channel: Intel® Many Integrated Core Architecture
Viewing all articles
Browse latest Browse all 1347

Dynamic allocation problems on Xeon Phi

$
0
0

I am creating a simple matrix multiplication procedure, operating on the Intel Xeon Phi architecture.

I am using, aligned data. However, if the matrices are allocated using dynamic memory (posix_memalign), the computation incurs in a severe slow down, i.e. for TYPE=float and 512x512 matrices takes ~0.55s in the dynamic case while in the other case ~0.07s.

On a different architecture (Intel Xeon E5-2650 @ 2.00GHz), the problem changes because the static allocated case doesn't calculate the matrix (it gives me all zeros when i print a random position of C, I think because the #pragma simd. Anyway, the dynamic allocating case takes about 0.08s.

Here is the code, i also attached the optimization reports of static & dynamic cases:

#define ROW 512
#define COLWIDTH 512
#define REPEATNTIMES 512

#include <sys/time.h>
#include <stdio.h>
#include <math.h>
#include <stdlib.h>

#define FTYPE float
#define ALIGNMENT 128
double clock_it(void)
{
        double duration = 0.0;
        struct timeval start;

        gettimeofday(&start, NULL);
        duration = (double)(start.tv_sec + start.tv_usec/1000000.0);
        return duration;
}

int main()
{
        double execTime = 0.0;
        double startTime, endTime;

    int k, size1, size2, i, j;

#ifdef STACK
	printf("Using Stack!\n");
        FTYPE a[ROW][COLWIDTH];
        FTYPE b[ROW][COLWIDTH];
        FTYPE c[ROW][COLWIDTH];
        for(i=0; i<ROW; i++){
                for(j=0; j<COLWIDTH; j++){
                        a[i][j] = 1.0f;
                        b[i][j] = 1.0f;
                        c[i][j] = 0.0f;
                }
        }
#else
     	printf("Using Heap!\n");
        FTYPE **a;
        posix_memalign((void **) &a, ALIGNMENT, sizeof(FTYPE*)*ROW);
        FTYPE **b;
        posix_memalign((void **) &b, ALIGNMENT, sizeof(FTYPE*)*ROW);
        FTYPE **c;
        posix_memalign((void **) &c, ALIGNMENT, sizeof(FTYPE*)*ROW);
        for(i=0; i<ROW; i++){
                posix_memalign((void **) &a[i], ALIGNMENT, sizeof(FTYPE)*COLWIDTH);
                posix_memalign((void **) &b[i], ALIGNMENT, sizeof(FTYPE)*COLWIDTH);
                posix_memalign((void **) &c[i], ALIGNMENT, sizeof(FTYPE)*COLWIDTH);
                for(j=0; j<COLWIDTH; j++){
                        a[i][j] = 1.0f;
                        b[i][j] = 1.0f;
                        c[i][j] = 0.0f;
                }
        }
#endif
      	size1 = ROW;
        size2 = COLWIDTH;
        printf("\nROW:%d COL: %d\n",ROW,COLWIDTH);

        //start timing the matrix multiply code
        startTime = clock_it();
        #ifndef STACK
        __assume_aligned(a, ALIGNMENT);
        __assume_aligned(b, ALIGNMENT);
        __assume_aligned(c, ALIGNMENT);
        #endif
	#pragma vector aligned
        for (i = 0; i < REPEATNTIMES; i++) {
                #pragma vector aligned
                for (k = 0; k < size1; k++) {
                        #pragma simd
                        #pragma vector aligned
                        for (j = 0;j < size2; j++) {
                                #ifndef STACK
                                        __assume_aligned(a[i], ALIGNMENT);
                                        __assume_aligned(b[k], ALIGNMENT);
                                        __assume_aligned(c[i], ALIGNMENT);
                                #endif
                                c[i][j] += a[i][k] * b[k][j];
                        }
                }
        }

	endTime = clock_it();
        execTime = endTime - startTime;

        printf("Execution time is %2.3f seconds\n", execTime);
        printf("GigaFlops = %f\n", (((double)REPEATNTIMES * (double)COLWIDTH * (double)ROW * 2.0) / (double)(execTime))/1000000000.0);
        printf("Random c_i,j %f\n", c[rand()%512][rand()%512]);
	return 0;
}

Any help is appreciated!

AllegatoDimensione
ScaricaSta.txt2.79 KB
ScaricaDyn.txt1.06 KB

Viewing all articles
Browse latest Browse all 1347

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>