answersLogoWhite

0

#include
#include
#include
#include "2DArray.h"

#define GRAIN 1024 /* product size below which matmultleaf is used */

void seqMatMult(int m, int n, int p, double** A, double** B, double** C)
{
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
{
C[i][j] = 0.0;
for (int k = 0; k < p; k++)
C[i][j] += A[i][k]*B[k][j];
}
}


void matmultleaf(int mf, int ml, int nf, int nl, int pf, int pl, double **A, double **B, double **C)
/*
subroutine that uses the simple triple loop to multiply
a submatrix from A with a submatrix from B and store the
result in a submatrix of C.
*/
// mf, ml; /* first and last+1 i index */
// nf, nl; /* first and last+1 j index */
// pf, pl; /* first and last+1 k index */
{
for (int i = mf; i < ml; i++)
for (int j = nf; j < nl; j++)
for (int k = pf; k < pl; k++)
C[i][j] += A[i][k]*B[k][j];
}


void copyQtrMatrix(double **X, int m, double **Y, int mf, int nf)
{
for (int i = 0; i < m; i++)
X[i] = &Y[mf+i][nf];
}

void AddMatBlocks(double **T, int m, int n, double **X, double **Y)
{
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
T[i][j] = X[i][j] + Y[i][j];
}

void SubMatBlocks(double **T, int m, int n, double **X, double **Y)
{
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
T[i][j] = X[i][j] - Y[i][j];
}


void strassenMMult(int mf, int ml, int nf, int nl, int pf, int pl, double **A, double **B, double **C)
{
if ((ml-mf)*(nl-nf)*(pl-pf) < GRAIN)
matmultleaf(mf, ml, nf, nl, pf, pl, A, B, C);

else {
int m2 = (ml-mf)/2;
int n2 = (nl-nf)/2;
int p2 = (pl-pf)/2;

double **M1 = Allocate2DArray< double >(m2, n2);
double **M2 = Allocate2DArray< double >(m2, n2);
double **M3 = Allocate2DArray< double >(m2, n2);
double **M4 = Allocate2DArray< double >(m2, n2);
double **M5 = Allocate2DArray< double >(m2, n2);
double **M6 = Allocate2DArray< double >(m2, n2);
double **M7 = Allocate2DArray< double >(m2, n2);

double **A11 = new double*[m2];
double **A12 = new double*[m2];
double **A21 = new double*[m2];
double **A22 = new double*[m2];

double **B11 = new double*[p2];
double **B12 = new double*[p2];
double **B21 = new double*[p2];
double **B22 = new double*[p2];

double **C11 = new double*[m2];
double **C12 = new double*[m2];
double **C21 = new double*[m2];
double **C22 = new double*[m2];

double **tAM1 = Allocate2DArray< double >(m2, p2);
double **tBM1 = Allocate2DArray< double >(p2, n2);
double **tAM2 = Allocate2DArray< double >(m2, p2);
double **tBM3 = Allocate2DArray< double >(p2, n2);
double **tBM4 = Allocate2DArray< double >(p2, n2);
double **tAM5 = Allocate2DArray< double >(m2, p2);
double **tAM6 = Allocate2DArray< double >(m2, p2);
double **tBM6 = Allocate2DArray< double >(p2, n2);
double **tAM7 = Allocate2DArray< double >(m2, p2);
double **tBM7 = Allocate2DArray< double >(p2, n2);

copyQtrMatrix(A11, m2, A, mf, pf);
copyQtrMatrix(A12, m2, A, mf, p2);
copyQtrMatrix(A21, m2, A, m2, pf);
copyQtrMatrix(A22, m2, A, m2, p2);

copyQtrMatrix(B11, p2, B, pf, nf);
copyQtrMatrix(B12, p2, B, pf, n2);
copyQtrMatrix(B21, p2, B, p2, nf);
copyQtrMatrix(B22, p2, B, p2, n2);

copyQtrMatrix(C11, m2, C, mf, nf);
copyQtrMatrix(C12, m2, C, mf, n2);
copyQtrMatrix(C21, m2, C, m2, nf);
copyQtrMatrix(C22, m2, C, m2, n2);

// M1 = (A11 + A22)*(B11 + B22)
AddMatBlocks(tAM1, m2, p2, A11, A22);
AddMatBlocks(tBM1, p2, n2, B11, B22);
strassenMMult(0, m2, 0, n2, 0, p2, tAM1, tBM1, M1);

//M2 = (A21 + A22)*B11
AddMatBlocks(tAM2, m2, p2, A21, A22);
strassenMMult(0, m2, 0, n2, 0, p2, tAM2, B11, M2);

//M3 = A11*(B12 - B22)
SubMatBlocks(tBM3, p2, n2, B12, B22);
strassenMMult(0, m2, 0, n2, 0, p2, A11, tBM3, M3);

//M4 = A22*(B21 - B11)
SubMatBlocks(tBM4, p2, n2, B21, B11);
strassenMMult(0, m2, 0, n2, 0, p2, A22, tBM4, M4);

//M5 = (A11 + A12)*B22
AddMatBlocks(tAM5, m2, p2, A11, A12);
strassenMMult(0, m2, 0, n2, 0, p2, tAM5, B22, M5);

//M6 = (A21 - A11)*(B11 + B12)
SubMatBlocks(tAM6, m2, p2, A21, A11);
AddMatBlocks(tBM6, p2, n2, B11, B12);
strassenMMult(0, m2, 0, n2, 0, p2, tAM6, tBM6, M6);

//M7 = (A12 - A22)*(B21 + B22)
SubMatBlocks(tAM7, m2, p2, A12, A22);
AddMatBlocks(tBM7, p2, n2, B21, B22);
strassenMMult(0, m2, 0, n2, 0, p2, tAM7, tBM7, M7);

for (int i = 0; i < m2; i++)
for (int j = 0; j < n2; j++) {
C11[i][j] = M1[i][j] + M4[i][j] - M5[i][j] + M7[i][j];
C12[i][j] = M3[i][j] + M5[i][j];
C21[i][j] = M2[i][j] + M4[i][j];
C22[i][j] = M1[i][j] - M2[i][j] + M3[i][j] + M6[i][j];
}

Free2DArray< double >(M1);
Free2DArray< double >(M2);
Free2DArray< double >(M3);
Free2DArray< double >(M4);
Free2DArray< double >(M5);
Free2DArray< double >(M6);
Free2DArray< double >(M7);

delete[] A11; delete[] A12; delete[] A21; delete[] A22;
delete[] B11; delete[] B12; delete[] B21; delete[] B22;
delete[] C11; delete[] C12; delete[] C21; delete[] C22;

Free2DArray< double >(tAM1);
Free2DArray< double >(tBM1);
Free2DArray< double >(tAM2);
Free2DArray< double >(tBM3);
Free2DArray< double >(tBM4);
Free2DArray< double >(tAM5);
Free2DArray< double >(tAM6);
Free2DArray< double >(tBM6);
Free2DArray< double >(tAM7);
Free2DArray< double >(tBM7);
}
}

void matmultS(int m, int n, int p, double **A, double **B, double **C)
{
int i,j;
for (i=0; i < m; i++)
for (j=0; j < n; j++)
C[i][j] = 0;
strassenMMult(0, m, 0, n, 0, p, A, B, C);
}


int CheckResults(int m, int n, double **C, double **C1)
{
#define THRESHOLD 0.001
//
// May need to take into consideration the floating point roundoff error
// due to parallel execution
//
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
if (abs(C[i][j] - C1[i][j]) > THRESHOLD ) {
printf("%f %f\n", C[i][j], C1[i][j]);
return 1;
}
}
}
return 0;
}



int main(int argc, char* argv[])
{
clock_t before, after;

int M = atoi(argv[1]);
int N = atoi(argv[2]);
int P = atoi(argv[3]);

double **A = Allocate2DArray< double >(M, P);
double **B = Allocate2DArray< double >(P, N);
double **C = Allocate2DArray< double >(M, N);
double **C4 = Allocate2DArray< double >(M, N);

int i, j;

for (i = 0; i < M; i++) {
for (j = 0; j < P; j++) {
A[i][j] = 5.0 - ((double)(rand()%100) / 10.0);
}
}

for (i = 0; i < P; i++) {
for (j = 0; j < N; j++) {
B[i][j] = 5.0 - ((double)(rand()%100) / 10.0);
}
}

printf("Execute Standard matmult\n\n");
before = clock();
seqMatMult(M, N, P, A, B, C);
after = clock();
printf("Standard matrix function done in %7.2f secs\n\n\n",(float)(after - before)/ CLOCKS_PER_SEC);

before = clock();
matmultS(M, N, P, A, B, C4);
after = clock();
printf("Strassen matrix function done in %7.2f secs\n\n\n",(float)(after - before)/ CLOCKS_PER_SEC);

if (CheckResults(M, N, C, C4))
printf("Error in matmultS\n\n");
else
printf("OKAY\n\n");

Free2DArray(A);
Free2DArray(B);
Free2DArray(C);
Free2DArray(C4);

return 0;
}

User Avatar

Wiki User

15y ago

What else can I help you with?

Related Questions

What is a program written in high level language called?

the program written in high level language is called "source program"


In c language what is source program?

Source program or source code in any language is the code you write to make the program do what you want. Things like: #include &lt;stdio.h&gt; void main (); and so on are all pieces of source-code or source program


Differences between source program and object program?

SOURCE PROGRAM=A set of instructions of the high level language used to code problems to find its solution on a computer is referred as source program. OBJECT PROGRAM=The computer translates the source program into machine language program called object program by using an interpreter or compiler is called object program.


A source program is the program written in English language?

No.


Are source program and source code the same?

Yes.


Source code for n dimensional matrix operations?

See Numpy (a Python library for general N-dimensional matrix operations): http://numpy.org/


Function to translate source program to object program?

computer


Where do we write main function in a c program?

Into the source program.


Does interpreter generate an object program from the source program?

No, that's what the compiler does.


When you compile a program do you have to give a command to print the source program?

No. You can compile without printing the source. Indeed, I know of no compiler that would allow a program's source to be printed while it is being compiled. They are completely separate and unrelated tasks.


What are the two parts of compilation?

1) source program to object program 2)object program to object program output


What is the compiler that is used in C?

A program that translates source program into object code.