What is the source code for Strassen's matrix multiplication program?
#include #include #include #include "2DArray.h"#define GRAIN 1024 /* product size below which matmultleaf is used */void seqMatMult(int m, int n, int p, double** A, double** B, double** C){for (int i = 0; i < m; i++)for (int j = 0; j < n; j++){C[i][j] = 0.0;for (int k = 0; k < p; k++)C[i][j] += A[i][k]*B[k][j];}}void matmultleaf(int mf, int ml, int nf, int nl, int pf, int pl, double **A, double **B, double **C)/*subroutine that uses the simple triple loop to multiplya submatrix from A with a submatrix from B and store theresult in a submatrix of C.*/// mf, ml; /* first and last+1 i index */// nf, nl; /* first and last+1 j index */// pf, pl; /* first and last+1 k index */{for (int i = mf; i < ml; i++)for (int j = nf; j < nl; j++)for (int k = pf; k < pl; k++)C[i][j] += A[i][k]*B[k][j];}void copyQtrMatrix(double **X, int m, double **Y, int mf, int nf){for (int i = 0; i < m; i++)X[i] = &Y[mf+i][nf];}void AddMatBlocks(double **T, int m, int n, double **X, double **Y){for (int i = 0; i < m; i++)for (int j = 0; j < n; j++)T[i][j] = X[i][j] + Y[i][j];}void SubMatBlocks(double **T, int m, int n, double **X, double **Y){for (int i = 0; i < m; i++)for (int j = 0; j < n; j++)T[i][j] = X[i][j] - Y[i][j];}void strassenMMult(int mf, int ml, int nf, int nl, int pf, int pl, double **A, double **B, double **C){if ((ml-mf)*(nl-nf)*(pl-pf) < GRAIN)matmultleaf(mf, ml, nf, nl, pf, pl, A, B, C);else {int m2 = (ml-mf)/2;int n2 = (nl-nf)/2;int p2 = (pl-pf)/2;double **M1 = Allocate2DArray< double >(m2, n2);double **M2 = Allocate2DArray< double >(m2, n2);double **M3 = Allocate2DArray< double >(m2, n2);double **M4 = Allocate2DArray< double >(m2, n2);double **M5 = Allocate2DArray< double >(m2, n2);double **M6 = Allocate2DArray< double >(m2, n2);double **M7 = Allocate2DArray< double >(m2, n2);double **A11 = new double*[m2];double **A12 = new double*[m2];double **A21 = new double*[m2];double **A22 = new double*[m2];double **B11 = new double*[p2];double **B12 = new double*[p2];double **B21 = new double*[p2];double **B22 = new double*[p2];double **C11 = new double*[m2];double **C12 = new double*[m2];double **C21 = new double*[m2];double **C22 = new double*[m2];double **tAM1 = Allocate2DArray< double >(m2, p2);double **tBM1 = Allocate2DArray< double >(p2, n2);double **tAM2 = Allocate2DArray< double >(m2, p2);double **tBM3 = Allocate2DArray< double >(p2, n2);double **tBM4 = Allocate2DArray< double >(p2, n2);double **tAM5 = Allocate2DArray< double >(m2, p2);double **tAM6 = Allocate2DArray< double >(m2, p2);double **tBM6 = Allocate2DArray< double >(p2, n2);double **tAM7 = Allocate2DArray< double >(m2, p2);double **tBM7 = Allocate2DArray< double >(p2, n2);copyQtrMatrix(A11, m2, A, mf, pf);copyQtrMatrix(A12, m2, A, mf, p2);copyQtrMatrix(A21, m2, A, m2, pf);copyQtrMatrix(A22, m2, A, m2, p2);copyQtrMatrix(B11, p2, B, pf, nf);copyQtrMatrix(B12, p2, B, pf, n2);copyQtrMatrix(B21, p2, B, p2, nf);copyQtrMatrix(B22, p2, B, p2, n2);copyQtrMatrix(C11, m2, C, mf, nf);copyQtrMatrix(C12, m2, C, mf, n2);copyQtrMatrix(C21, m2, C, m2, nf);copyQtrMatrix(C22, m2, C, m2, n2);// M1 = (A11 + A22)*(B11 + B22)AddMatBlocks(tAM1, m2, p2, A11, A22);AddMatBlocks(tBM1, p2, n2, B11, B22);strassenMMult(0, m2, 0, n2, 0, p2, tAM1, tBM1, M1);//M2 = (A21 + A22)*B11AddMatBlocks(tAM2, m2, p2, A21, A22);strassenMMult(0, m2, 0, n2, 0, p2, tAM2, B11, M2);//M3 = A11*(B12 - B22)SubMatBlocks(tBM3, p2, n2, B12, B22);strassenMMult(0, m2, 0, n2, 0, p2, A11, tBM3, M3);//M4 = A22*(B21 - B11)SubMatBlocks(tBM4, p2, n2, B21, B11);strassenMMult(0, m2, 0, n2, 0, p2, A22, tBM4, M4);//M5 = (A11 + A12)*B22AddMatBlocks(tAM5, m2, p2, A11, A12);strassenMMult(0, m2, 0, n2, 0, p2, tAM5, B22, M5);//M6 = (A21 - A11)*(B11 + B12)SubMatBlocks(tAM6, m2, p2, A21, A11);AddMatBlocks(tBM6, p2, n2, B11, B12);strassenMMult(0, m2, 0, n2, 0, p2, tAM6, tBM6, M6);//M7 = (A12 - A22)*(B21 + B22)SubMatBlocks(tAM7, m2, p2, A12, A22);AddMatBlocks(tBM7, p2, n2, B21, B22);strassenMMult(0, m2, 0, n2, 0, p2, tAM7, tBM7, M7);for (int i = 0; i < m2; i++)for (int j = 0; j < n2; j++) {C11[i][j] = M1[i][j] + M4[i][j] - M5[i][j] + M7[i][j];C12[i][j] = M3[i][j] + M5[i][j];C21[i][j] = M2[i][j] + M4[i][j];C22[i][j] = M1[i][j] - M2[i][j] + M3[i][j] + M6[i][j];}Free2DArray< double >(M1);Free2DArray< double >(M2);Free2DArray< double >(M3);Free2DArray< double >(M4);Free2DArray< double >(M5);Free2DArray< double >(M6);Free2DArray< double >(M7);delete[] A11; delete[] A12; delete[] A21; delete[] A22;delete[] B11; delete[] B12; delete[] B21; delete[] B22;delete[] C11; delete[] C12; delete[] C21; delete[] C22;Free2DArray< double >(tAM1);Free2DArray< double >(tBM1);Free2DArray< double >(tAM2);Free2DArray< double >(tBM3);Free2DArray< double >(tBM4);Free2DArray< double >(tAM5);Free2DArray< double >(tAM6);Free2DArray< double >(tBM6);Free2DArray< double >(tAM7);Free2DArray< double >(tBM7);}}void matmultS(int m, int n, int p, double **A, double **B, double **C){int i,j;for (i=0; i < m; i++)for (j=0; j < n; j++)C[i][j] = 0;strassenMMult(0, m, 0, n, 0, p, A, B, C);}int CheckResults(int m, int n, double **C, double **C1){#define THRESHOLD 0.001//// May need to take into consideration the floating point roundoff error// due to parallel execution//for (int i = 0; i < m; i++) {for (int j = 0; j < n; j++) {if (abs(C[i][j] - C1[i][j]) > THRESHOLD ) {printf("%f %f\n", C[i][j], C1[i][j]);return 1;}}}return 0;}int main(int argc, char* argv[]){clock_t before, after;int M = atoi(argv[1]);int N = atoi(argv[2]);int P = atoi(argv[3]);double **A = Allocate2DArray< double >(M, P);double **B = Allocate2DArray< double >(P, N);double **C = Allocate2DArray< double >(M, N);double **C4 = Allocate2DArray< double >(M, N);int i, j;for (i = 0; i < M; i++) {for (j = 0; j < P; j++) {A[i][j] = 5.0 - ((double)(rand()%100) / 10.0);}}for (i = 0; i < P; i++) {for (j = 0; j < N; j++) {B[i][j] = 5.0 - ((double)(rand()%100) / 10.0);}}printf("Execute Standard matmult\n\n");before = clock();seqMatMult(M, N, P, A, B, C);after = clock();printf("Standard matrix function done in %7.2f secs\n\n\n",(float)(after - before)/ CLOCKS_PER_SEC);before = clock();matmultS(M, N, P, A, B, C4);after = clock();printf("Strassen matrix function done in %7.2f secs\n\n\n",(float)(after - before)/ CLOCKS_PER_SEC);if (CheckResults(M, N, C, C4))printf("Error in matmultS\n\n");elseprintf("OKAY\n\n");Free2DArray(A);Free2DArray(B);Free2DArray(C);Free2DArray(C4);return 0;}