/* Some more advanced techniques with MPI. Our goal will be to take a large 2D matrix of values and successively Average them until a stable state is reached. To do so efficiently we will learn the following techniques: 1. Creating a 2D mesh of processes to reduce communication cost 2. User-defined MPI Datatypes for faster communication of non- contiguous data 3. Reducing communication startup cost using a technique similar to thread pools. 4. Work effectively with a sub-matrix structure using precise pointer arithemetics. */ #include #include #include #include /* size of NxN matrix */ #define N 100 // MAKE SURE THESE ARE STATIC: double A[N][N]; // these should really be dynamically allocated double B[N][N]; static double Q[N/2][N/2]; /* one quadrant */ /* macro to compute start address of quadrant in mesh */ #define QUADRANT(M,y,x) (M[y*N/2]+(x*N/2)) int mycoord[2]; /* stores mesh coordinates */ #define YCORD mycoord[0] #define XCORD mycoord[1] MPI_Comm MESH; MPI_Datatype QUAD; // quadrant submatrix MPI_Datatype COLUMN; // one column in quadrant int main(int argc, char** argv) { int rank, size, interval, remainder, i, j; double time1, time2, time3; // for timing measurements MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_size(MPI_COMM_WORLD,&size); MPI_Request ireq[128]; // , asynch request, assume size<128 MPI_Status stat; // status of asynch communication /*** Create 2x2 Mesh ***/ int mrank; // my rank within MESH communicator int dimensions[2] = {2,2}; // dimensions of mesh int wraparound[2] = {0,0}; // no wraparound int wrank, erank, srank, nrank; // rank of west,east,south,north neighbors MPI_Cart_create(MPI_COMM_WORLD,2,dimensions,wraparound,1,&MESH); MPI_Comm_rank(MESH,&mrank); // my rank within MESH communicator MPI_Cart_coords(MESH,mrank,2,mycoord); // my y,x coordinates // printf("rank %d in COMM_WORLD is rank %d in MESH\n",rank,mrank); printf("mesh rank %d has y,x coordinates %d,%d\n",mrank,YCORD,XCORD); // find rank of neighbors, -1 means neighbor doesn't exist MPI_Cart_shift(MESH,0,1,&nrank,&srank); MPI_Cart_shift(MESH,1,1,&wrank,&erank); printf("%d's north,south,west,east neighbors have ranks %d,%d,%d,%d\n", mrank,nrank,srank,wrank,erank); fflush(stdout); int locate[2][2]; // stores ranks of processes in mesh by coordinates locate[0][0] = 0; // this is hard-coded here, but really should be locate[0][1] = 1; // dynamically determined. locate[1][0] = 2; locate[1][1] = 3; /*** Define MPI_Datatype for sub-matrix and sub-matrix column ***/ MPI_Type_vector(N/2,N/2,N,MPI_DOUBLE,&QUAD); MPI_Type_vector(N/2,1,N,MPI_DOUBLE,&COLUMN); MPI_Type_commit(&QUAD); MPI_Type_commit(&COLUMN); /* rank-0 process divides A into quads and sends them to other processes */ MPI_Request req[3]; if (rank==0) { /* MPI_Send(QUADRANT(A,0,1),1,QUAD,locate[0][1],0,MESH); MPI_Send(QUADRANT(A,1,0),1,QUAD,locate[1][0],0,MESH); MPI_Send(QUADRANT(A,1,1),1,QUAD,locate[1][1],0,MESH); */ MPI_Isend(QUADRANT(A,0,1),1,QUAD,locate[0][1],0,MESH,req); MPI_Isend(QUADRANT(A,1,0),1,QUAD,locate[1][0],0,MESH,req+1); MPI_Isend(QUADRANT(A,1,1),1,QUAD,locate[1][1],0,MESH,req+2); MPI_Waitany(3,req,&j,&stat); // join on sends printf("0: sent to %d\n",j); fflush(stdout); MPI_Waitany(3,req,&j,&stat); // join on sends printf("0: sent to %d\n",j); fflush(stdout); MPI_Waitany(3,req,&j,&stat); // join on sends printf("0: sent to %d\n",j); fflush(stdout); // copy first quadrant locally: for (i=0;i