9 void MatrixOut( float *M, int MatrixSize )
13 void MatrixInitial( float *M, int MatrixSize )
17 for ( i=0; i<MatrixSize; i++ )
18 for ( j=0; j<MatrixSize; j++ )
20 M[i*MatrixSize+j] = (double)rand()/(double)RAND_MAX;
21 M[i*MatrixSize+j] = i + j;
26 void MatrixMultOnCPU( float *M, float *N, float *P, int MatrixSize )
31 for ( i=0; i<MatrixSize; i++ )
32 for ( j=0; j<MatrixSize; j++ )
35 for ( k=0; k<MatrixSize; k++ )
36 temp = temp + M[i*MatrixSize+k] * N[k*MatrixSize+j];
37 P[i*MatrixSize+j] = temp;
43 // use share memory for efficiency by avoiding
44 // high latenccy global memory
45 __global__ void MatrixMultKernel( float *Md, float *Nd, float *Pd,
46 int num_block, int MatrixSize)
48 int bx, by, tx, ty, Row, Col, k, m;
50 __shared__ float Mds[TILE_SIZE][TILE_SIZE];
51 __shared__ float Nds[TILE_SIZE][TILE_SIZE];
57 void MatrixMultOnDevice( float *M, float *N, float *P,
62 int main ( int argc, char *argv[] )
66 float *M, *N, *P, *PCheck;
67 struct timeval t1, t2, t3, t4;
68 float time_CPU, time_GPU;
73 MatrixSize = atoi( argv[1] );
74 printf("\n Matrix dimension: %d\n", MatrixSize );
76 // memory space for the matrices
77 size = MatrixSize * MatrixSize * sizeof(float);
78 M = (float *)malloc( size );
79 N = (float *)malloc( size );
80 P = (float *)malloc( size );
81 PCheck = (float *)malloc( size );