#include
#include
#include
#include "2DArray.h"
#define GRAIN 1024 /* product size below which matmultleaf is used */
void seqMatMult(int m, int n, int p, double** A, double** B, double** C)
{
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
{
C[i][j] = 0.0;
for (int k = 0; k < p; k++)
C[i][j] += A[i][k]*B[k][j];
}
}
void matmultleaf(int mf, int ml, int nf, int nl, int pf, int pl, double **A, double **B, double **C)
/*
subroutine that uses the simple triple loop to multiply
a submatrix from A with a submatrix from B and store the
result in a submatrix of C.
*/
// mf, ml; /* first and last+1 i index */
// nf, nl; /* first and last+1 j index */
// pf, pl; /* first and last+1 k index */
{
for (int i = mf; i < ml; i++)
for (int j = nf; j < nl; j++)
for (int k = pf; k < pl; k++)
C[i][j] += A[i][k]*B[k][j];
}
void copyQtrMatrix(double **X, int m, double **Y, int mf, int nf)
{
for (int i = 0; i < m; i++)
X[i] = &Y[mf+i][nf];
}
void AddMatBlocks(double **T, int m, int n, double **X, double **Y)
{
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
T[i][j] = X[i][j] + Y[i][j];
}
void SubMatBlocks(double **T, int m, int n, double **X, double **Y)
{
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
T[i][j] = X[i][j] - Y[i][j];
}
void strassenMMult(int mf, int ml, int nf, int nl, int pf, int pl, double **A, double **B, double **C)
{
if ((ml-mf)*(nl-nf)*(pl-pf) < GRAIN)
matmultleaf(mf, ml, nf, nl, pf, pl, A, B, C);
else {
int m2 = (ml-mf)/2;
int n2 = (nl-nf)/2;
int p2 = (pl-pf)/2;
double **M1 = Allocate2DArray< double >(m2, n2);
double **M2 = Allocate2DArray< double >(m2, n2);
double **M3 = Allocate2DArray< double >(m2, n2);
double **M4 = Allocate2DArray< double >(m2, n2);
double **M5 = Allocate2DArray< double >(m2, n2);
double **M6 = Allocate2DArray< double >(m2, n2);
double **M7 = Allocate2DArray< double >(m2, n2);
double **A11 = new double*[m2];
double **A12 = new double*[m2];
double **A21 = new double*[m2];
double **A22 = new double*[m2];
double **B11 = new double*[p2];
double **B12 = new double*[p2];
double **B21 = new double*[p2];
double **B22 = new double*[p2];
double **C11 = new double*[m2];
double **C12 = new double*[m2];
double **C21 = new double*[m2];
double **C22 = new double*[m2];
double **tAM1 = Allocate2DArray< double >(m2, p2);
double **tBM1 = Allocate2DArray< double >(p2, n2);
double **tAM2 = Allocate2DArray< double >(m2, p2);
double **tBM3 = Allocate2DArray< double >(p2, n2);
double **tBM4 = Allocate2DArray< double >(p2, n2);
double **tAM5 = Allocate2DArray< double >(m2, p2);
double **tAM6 = Allocate2DArray< double >(m2, p2);
double **tBM6 = Allocate2DArray< double >(p2, n2);
double **tAM7 = Allocate2DArray< double >(m2, p2);
double **tBM7 = Allocate2DArray< double >(p2, n2);
copyQtrMatrix(A11, m2, A, mf, pf);
copyQtrMatrix(A12, m2, A, mf, p2);
copyQtrMatrix(A21, m2, A, m2, pf);
copyQtrMatrix(A22, m2, A, m2, p2);
copyQtrMatrix(B11, p2, B, pf, nf);
copyQtrMatrix(B12, p2, B, pf, n2);
copyQtrMatrix(B21, p2, B, p2, nf);
copyQtrMatrix(B22, p2, B, p2, n2);
copyQtrMatrix(C11, m2, C, mf, nf);
copyQtrMatrix(C12, m2, C, mf, n2);
copyQtrMatrix(C21, m2, C, m2, nf);
copyQtrMatrix(C22, m2, C, m2, n2);
// M1 = (A11 + A22)*(B11 + B22)
AddMatBlocks(tAM1, m2, p2, A11, A22);
AddMatBlocks(tBM1, p2, n2, B11, B22);
strassenMMult(0, m2, 0, n2, 0, p2, tAM1, tBM1, M1);
//M2 = (A21 + A22)*B11
AddMatBlocks(tAM2, m2, p2, A21, A22);
strassenMMult(0, m2, 0, n2, 0, p2, tAM2, B11, M2);
//M3 = A11*(B12 - B22)
SubMatBlocks(tBM3, p2, n2, B12, B22);
strassenMMult(0, m2, 0, n2, 0, p2, A11, tBM3, M3);
//M4 = A22*(B21 - B11)
SubMatBlocks(tBM4, p2, n2, B21, B11);
strassenMMult(0, m2, 0, n2, 0, p2, A22, tBM4, M4);
//M5 = (A11 + A12)*B22
AddMatBlocks(tAM5, m2, p2, A11, A12);
strassenMMult(0, m2, 0, n2, 0, p2, tAM5, B22, M5);
//M6 = (A21 - A11)*(B11 + B12)
SubMatBlocks(tAM6, m2, p2, A21, A11);
AddMatBlocks(tBM6, p2, n2, B11, B12);
strassenMMult(0, m2, 0, n2, 0, p2, tAM6, tBM6, M6);
//M7 = (A12 - A22)*(B21 + B22)
SubMatBlocks(tAM7, m2, p2, A12, A22);
AddMatBlocks(tBM7, p2, n2, B21, B22);
strassenMMult(0, m2, 0, n2, 0, p2, tAM7, tBM7, M7);
for (int i = 0; i < m2; i++)
for (int j = 0; j < n2; j++) {
C11[i][j] = M1[i][j] + M4[i][j] - M5[i][j] + M7[i][j];
C12[i][j] = M3[i][j] + M5[i][j];
C21[i][j] = M2[i][j] + M4[i][j];
C22[i][j] = M1[i][j] - M2[i][j] + M3[i][j] + M6[i][j];
}
Free2DArray< double >(M1);
Free2DArray< double >(M2);
Free2DArray< double >(M3);
Free2DArray< double >(M4);
Free2DArray< double >(M5);
Free2DArray< double >(M6);
Free2DArray< double >(M7);
delete[] A11; delete[] A12; delete[] A21; delete[] A22;
delete[] B11; delete[] B12; delete[] B21; delete[] B22;
delete[] C11; delete[] C12; delete[] C21; delete[] C22;
Free2DArray< double >(tAM1);
Free2DArray< double >(tBM1);
Free2DArray< double >(tAM2);
Free2DArray< double >(tBM3);
Free2DArray< double >(tBM4);
Free2DArray< double >(tAM5);
Free2DArray< double >(tAM6);
Free2DArray< double >(tBM6);
Free2DArray< double >(tAM7);
Free2DArray< double >(tBM7);
}
}
void matmultS(int m, int n, int p, double **A, double **B, double **C)
{
int i,j;
for (i=0; i < m; i++)
for (j=0; j < n; j++)
C[i][j] = 0;
strassenMMult(0, m, 0, n, 0, p, A, B, C);
}
int CheckResults(int m, int n, double **C, double **C1)
{
#define THRESHOLD 0.001
//
// May need to take into consideration the floating point roundoff error
// due to parallel execution
//
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
if (abs(C[i][j] - C1[i][j]) > THRESHOLD ) {
printf("%f %f\n", C[i][j], C1[i][j]);
return 1;
}
}
}
return 0;
}
int main(int argc, char* argv[])
{
clock_t before, after;
int M = atoi(argv[1]);
int N = atoi(argv[2]);
int P = atoi(argv[3]);
double **A = Allocate2DArray< double >(M, P);
double **B = Allocate2DArray< double >(P, N);
double **C = Allocate2DArray< double >(M, N);
double **C4 = Allocate2DArray< double >(M, N);
int i, j;
for (i = 0; i < M; i++) {
for (j = 0; j < P; j++) {
A[i][j] = 5.0 - ((double)(rand()%100) / 10.0);
}
}
for (i = 0; i < P; i++) {
for (j = 0; j < N; j++) {
B[i][j] = 5.0 - ((double)(rand()%100) / 10.0);
}
}
printf("Execute Standard matmult\n\n");
before = clock();
seqMatMult(M, N, P, A, B, C);
after = clock();
printf("Standard matrix function done in %7.2f secs\n\n\n",(float)(after - before)/ CLOCKS_PER_SEC);
before = clock();
matmultS(M, N, P, A, B, C4);
after = clock();
printf("Strassen matrix function done in %7.2f secs\n\n\n",(float)(after - before)/ CLOCKS_PER_SEC);
if (CheckResults(M, N, C, C4))
printf("Error in matmultS\n\n");
else
printf("OKAY\n\n");
Free2DArray(A);
Free2DArray(B);
Free2DArray(C);
Free2DArray(C4);
return 0;
}
the program written in high level language is called "source program"
computer
1) source program to object program 2)object program to object program output
There are so many programming languages that it is impossible to tell without actually seeing the source program in question.
What type of a program is used in order to enter C source code
the program written in high level language is called "source program"
Source program or source code in any language is the code you write to make the program do what you want. Things like: #include <stdio.h> void main (); and so on are all pieces of source-code or source program
SOURCE PROGRAM=A set of instructions of the high level language used to code problems to find its solution on a computer is referred as source program. OBJECT PROGRAM=The computer translates the source program into machine language program called object program by using an interpreter or compiler is called object program.
No.
Yes.
See Numpy (a Python library for general N-dimensional matrix operations): http://numpy.org/
computer
Into the source program.
No, that's what the compiler does.
No. You can compile without printing the source. Indeed, I know of no compiler that would allow a program's source to be printed while it is being compiled. They are completely separate and unrelated tasks.
1) source program to object program 2)object program to object program output
A program that translates source program into object code.