#include "nsbbsc.h" void BSC_MatMult_CaABbC_double( const int mb, const int n, const int kb, const double alpha, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const int lb, const double *b, const int ldb, const double beta, double *c, const int ldc, const int ind_base) { double t; const double *pb; double *pc=c; const double *pval; const double *ptmp; /* VECDEL */ int i,j,jb,je,block; int cs,bs,br; int index; int l; /* VECDEL */ int ii,jj; int m=mb*lb; int k=kb*lb; int mm=lb*lb; bindx-=ind_base; for (l=0;l!=n;l++) /* VECDEL MMBETADEL */ for (i=0;i!=m;i++) *pc++ *= beta; /* MMBETADEL */ for (i=0;i!=kb;i++) { /* For each block column i */ jb = bpntrb[i]; /* beginning block in column */ je = bpntre[i]; /* ending block in column */ bs = i*lb; /* start of b for this block operation */ for (j=jb;j!=je;j++) { /* For each block j in this column */ index = bindx[j]; cs=(index-1)*lb; /* start of c */ pval = &val[(j-1)*mm]; /* pointer to start of block */ ptmp = pval; /* copy for later use VECDEL */ /* GEMM call inlined here: */ pb = &b[bs]; /* pointer to start of b */ pc = &c[cs]; /* pointer to start of c */ for (l=0;l!=n;l++) { /* For each column of c VECDEL */ for (jj=0;jj!=lb;jj++) { /* For each column of block */ if( pb[jj] != 0.0 ) { /* If non-zero multiplier */ t = alpha * pb[jj]; for (ii=0;ii!=lb;ii++) { /* For each element in column */ pc[ii] += t* (*pval++); /* update element of c */ } } else { pval+=lb; /* Skip this column of block */ } } pb+=ldb; pc+=ldc; pval=ptmp; /* to next column of c;VECDEL */ } /* VECDEL */ } } } void BSC_MatMult_CaATBbC_double( const int mb, const int n, const int kb, const double alpha, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const int lb, const double *b, const int ldb, const double beta, double *c, const int ldc, const int ind_base) { double t; const double *pb; double *pc=c; const double *pval; const double *ptmp; /* VECDEL */ int i,j,jb,je,block; int cs,bs,cr; int index; int l; /* VECDEL */ int ii,jj; int m=mb*lb; int k=kb*lb; int mm=lb*lb; bindx-=ind_base; for (l=0;l!=n;l++) /* VECDEL MMBETADEL */ for (i=0;i!=k;i++) *pc++ *= beta; /* MMBETADEL */ for (i=0;i!=kb;i++) { /* For each block row i */ jb = bpntrb[i]; /* beginning block in row */ je = bpntre[i]; /* ending block in row */ cs = i*lb; /* start of c for this block operation */ for (j=jb;j!=je;j++) { /* For each block j in this row */ index = bindx[j]; bs=(index-1)*lb; /* start of b */ pval = &val[(j-1)*mm]; /* pointer to start of block */ ptmp = pval; /* copy for later use VECDEL */ /* GEMM call inlined here: */ pb = &b[bs]; /* pointer to start of b */ pc = &c[cs]; /* pointer to start of c */ for (l=0;l!=n;l++) { /* For each column of c VECDEL */ for (jj=0;jj!=lb;jj++) { /* For each row of block */ t = 0; for (ii=0;ii!=lb;ii++) { /* accummulate dot with b */ t += alpha * pb[ii] * (*pval++); } pc[jj] += t; /* update element of c */ } pb+=ldb; pc+=ldc; pval=ptmp; /* to next column of c;VECDEL */ } /* VECDEL */ } } } void BSCsymm_MatMult_CaABbC_double( const int mb, const int n, const int kb, const double alpha, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const int lb, const double *b, const int ldb, const double beta, double *c, const int ldc, const int ind_base) { double t; const double *pb; double *pc=c; const double *pval; const double *ptmp; int i,j,jb,je,block; int cs,css,cr,bs,br; int index; int l; /* VECDEL */ int ii,jj; int m=mb*lb; int k=kb*lb; int mm=lb*lb; bindx-=ind_base; for (l=0;l!=n;l++) /* VECDEL MMBETADEL */ for (i=0;i!=m;i++) *pc++ *= beta; /* MMBETADEL */ for (i=0;i!=kb;i++) { /* For each block column i */ jb = bpntrb[i]; /* beginning block in column */ je = bpntre[i]; /* ending block in column */ bs = i*lb; /* start of b for this block operation */ for (j=jb;j!=je;j++) { /* For each block j in this row */ index = bindx[j]; cs=(index-1)*lb; /* start of c */ pval = &val[(j-1)*mm]; /* pointer to start of block */ ptmp = pval; /* copy for later use */ /* GEMM call inlined here: */ pb = &b[bs]; /* pointer to start of b */ pc = &c[cs]; /* pointer to start of c */ for (l=0;l!=n;l++) { /* For each column of c VECDEL */ for (jj=0;jj!=lb;jj++) { /* For each column of block */ if( pb[jj] != 0.0 ) { /* If non-zero multiplier */ t = alpha * pb[jj]; for (ii=0;ii!=lb;ii++) { /* For each element in column */ pc[ii] += t* (*pval++); /* update element of c */ } } else { pval+=lb; /* Skip this column of block */ } } pb+=ldb; pc+=ldc; pval=ptmp; /* to next column of c;VECDEL */ } /* VECDEL */ /* If on diagonal, done with block. Go to next. */ if ( index == i + ind_base ) { continue; } /* Else, perform mult with transpose of this block */ pval = ptmp; /* pointer to start of block */ /* GEMM call inlined here: */ pb = &b[cs]; /* pointer to start of b */ pc = &c[bs]; /* pointer to start of c */ for (l=0;l!=n;l++) { /* For each column of c VECDEL */ for (jj=0;jj!=lb;jj++) { /* For each row of block */ t = 0; for (ii=0;ii!=lb;ii++) { /* accummulate dot with b */ t += alpha * pb[ii] * (*pval++); } pc[jj] += t; /* update element of c */ } pb+=ldb; pc+=ldc; pval=ptmp; /* to next column of c;VECDEL */ } /* VECDEL */ } } } void BSCskew_MatMult_CaABbC_double( const int mb, const int n, const int kb, const double alpha, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const int lb, const double *b, const int ldb, const double beta, double *c, const int ldc, const int ind_base) { double t; const double *pb; double *pc=c; const double *pval; const double *ptmp; int i,j,jb,je,block; int cs,css,cr,bs,br; int index; int l; /* VECDEL */ int ii,jj; int m=mb*lb; int k=kb*lb; int mm=lb*lb; bindx-=ind_base; for (l=0;l!=n;l++) /* VECDEL MMBETADEL */ for (i=0;i!=m;i++) *pc++ *= beta; /* MMBETADEL */ for (i=0;i!=kb;i++) { /* For each block column i */ jb = bpntrb[i]; /* beginning block in column */ je = bpntre[i]; /* ending block in column */ bs = i*lb; /* start of b for this block operation */ for (j=jb;j!=je;j++) { /* For each block j in this row */ index = bindx[j]; /* If on diagonal, assume zero. Go to next. */ if ( index == i + ind_base ) { continue; } cs=(index-1)*lb; /* start of c */ pval = &val[(j-1)*mm]; /* pointer to start of block */ ptmp = pval; /* copy for later use */ /* GEMM call inlined here: */ pb = &b[bs]; /* pointer to start of b */ pc = &c[cs]; /* pointer to start of c */ for (l=0;l!=n;l++) { /* For each column of c VECDEL */ for (jj=0;jj!=lb;jj++) { /* For each column of block */ if( pb[jj] != 0.0 ) { /* If non-zero multiplier */ t = alpha * pb[jj]; for (ii=0;ii!=lb;ii++) { /* For each element in column */ pc[ii] += t* (*pval++); /* update element of c */ } } else { pval+=lb; /* Skip this column of block */ } } pb+=ldb; pc+=ldc; pval=ptmp; /* to next column of c;VECDEL */ } /* VECDEL */ /* Else, perform mult with transpose of this block */ pval = ptmp; /* pointer to start of block */ /* GEMM call inlined here: */ pb = &b[cs]; /* pointer to start of b */ pc = &c[bs]; /* pointer to start of c */ for (l=0;l!=n;l++) { /* For each column of c VECDEL */ for (jj=0;jj!=lb;jj++) { /* For each row of block */ t = 0; for (ii=0;ii!=lb;ii++) { /* accummulate dot with b */ t -= alpha * pb[ii] * (*pval++); } pc[jj] += t; /* update element of c */ } pb+=ldb; pc+=ldc; pval=ptmp; /* to next column of c;VECDEL */ } /* VECDEL */ } } } void BSCskew_MatMult_CaATBbC_double( const int mb, const int n, const int kb, const double alpha, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const int lb, const double *b, const int ldb, const double beta, double *c, const int ldc, const int ind_base) { double t; const double *pb; double *pc=c; const double *pval; int i,j,jb,je,block; int cs,css,cr,bs,br; int index; int ii,jj; int m=mb*lb; int k=kb*lb; int mm=lb*lb; const double *ptmp; int l; /* VECDEL */ bindx-=ind_base; for (l=0;l!=n;l++) /* VECDEL MMBETADEL */ for (i=0;i!=k;i++) *pc++ *= beta; /* MMBETADEL */ for (i=0;i!=kb;i++) { /* For each block column i */ jb = bpntrb[i]; /* beginning block in column */ je = bpntre[i]; /* ending block in column */ bs = i*lb; /* start of b for this block operation */ for (j=jb;j!=je;j++) { /* For each block j in this row */ index = bindx[j]; /* If on diagonal, done with block. Go to next. */ if ( index == i + ind_base ) { continue; } cs=(index-1)*lb; /* start of c */ pval = &val[(j-1)*mm]; /* pointer to start of block */ ptmp = pval; /* copy for later use */ /* GEMM call inlined here: */ pb = &b[bs]; /* pointer to start of b */ pc = &c[cs]; /* pointer to start of c */ for (l=0;l!=n;l++) { /* For each column of c VECDEL */ for (jj=0;jj!=lb;jj++) { /* For each column of block */ if( pb[jj] != 0.0 ) { /* If non-zero multiplier */ t = alpha * pb[jj]; for (ii=0;ii!=lb;ii++) { /* For each element in column */ pc[ii] -= t* (*pval++); /* update element of c */ } } else { pval+=lb; /* Skip this column of block */ } } pb+=ldb; pc+=ldc; pval=ptmp; /* to next column of c;VECDEL */ } /* VECDEL */ /* Else, perform mult with transpose of this block */ pval = ptmp; /* pointer to start of block */ /* GEMM call inlined here: */ pb = &b[cs]; /* pointer to start of b */ pc = &c[bs]; /* pointer to start of c */ for (l=0;l!=n;l++) { /* For each column of c VECDEL */ for (jj=0;jj!=lb;jj++) { /* For each row of block */ t = 0; for (ii=0;ii!=lb;ii++) { /* accummulate dot with b */ t += alpha * pb[ii] * (*pval++); } pc[jj] += t; /* update element of c */ } pb+=ldb; pc+=ldc; pval=ptmp; /* to next column of c;VECDEL */ } /* VECDEL */ } } }