[Scipy-svn] r3784 - trunk/scipy/sparse/sparsetools
scipy-svn@scip...
scipy-svn@scip...
Sat Jan 5 00:05:59 CST 2008
Author: wnbell
Date: 2008-01-05 00:05:54 -0600 (Sat, 05 Jan 2008)
New Revision: 3784
Modified:
trunk/scipy/sparse/sparsetools/fixed_size.h
trunk/scipy/sparse/sparsetools/sparsetools.h
Log:
added fixed size matrix matrix products for fast BSR * BSR
Modified: trunk/scipy/sparse/sparsetools/fixed_size.h
===================================================================
--- trunk/scipy/sparse/sparsetools/fixed_size.h 2008-01-04 21:33:02 UTC (rev 3783)
+++ trunk/scipy/sparse/sparsetools/fixed_size.h 2008-01-05 06:05:54 UTC (rev 3784)
@@ -2,7 +2,7 @@
#define FIXED_SIZE_H
/*
- * templates for fixed size array and vector arithmetic
+ * templates for fixed size vector and matrix arithmetic
*
*/
@@ -12,95 +12,139 @@
* Dot Product
*
*/
-template<int N, class T>
+template<int N, int SX, int SY, class T>
class _dot
{
public:
- inline T operator()(const T * V1, const T * V2)
+ inline T operator()(const T * X, const T * Y)
{
- _dot<N-1,T> d;
- return (*V1) * (*V2) + d(V1 + 1, V2 + 1);
+ _dot<N-1,SX,SY,T> d;
+ return (*X) * (*Y) + d(X + SX, Y + SY);
}
};
-template<class T>
-class _dot<1,T>
+template<int SX, int SY, class T>
+class _dot<1,SX,SY,T>
{
public:
- inline T operator()(const T * V1, const T * V2)
+ inline T operator()(const T * X, const T * Y)
{
- return (*V1) * (*V2);
+ return (*X) * (*Y);
}
};
-template<int N, class T>
-inline T dot(const T * V1, const T * V2)
+template<int N, int SX, int SY, class T>
+inline T dot(const T * X, const T * Y)
{
- _dot<N,T> d;
- return d(V1, V2);
+ _dot<N,SX,SY,T> d;
+ return d(X, Y);
}
/*
- * Matrix Vector Product
+ * Matrix Vector Product Y = A*X
*
*/
-template<int M, int N, class T>
+template<int M, int N, int SX, int SY, class T>
class _matvec
{
public:
inline void operator()(const T * A, const T * X, T * Y)
{
- *Y += dot<N,T>(A,X);
- _matvec<M-1,N,T> d;
- d(A + N, X, Y + 1);
+ *Y += dot<N,1,SX,T>(A,X);
+ _matvec<M-1,N,SX,SY,T> d;
+ d(A + N, X, Y + SY);
}
};
-template<int N, class T>
-class _matvec<1,N,T>
+
+template<int N, int SX, int SY, class T>
+class _matvec<1,N,SX,SY,T>
{
public:
inline void operator()(const T * A, const T * X, T * Y)
{
- *Y += dot<N,T>(A,X);
+ *Y += dot<N,1,SX,T>(A,X);
}
};
-template<int M, int N, class T>
+template<int M, int N, int SX, int SY, class T>
inline void matvec(const T * A, const T * X, T * Y)
{
- _matvec<M,N,T> d;
+ _matvec<M,N,SX,SY,T> d;
d(A,X,Y);
}
+/*
+ * Matrix Matrix Product C = A*B
+ *
+ * C is L*N
+ * A is L*M
+ * B is M*N
+ *
+ */
+template<int L, int M, int N, int U, class T>
+class _matmat
+{
+ public:
+ inline void operator()(const T * A, const T * B, T * C)
+ {
+ matvec<L,M,N,N>(A,B,C);
+
+ _matmat<L,M,N,U-1,T> d;
+ d(A, B + 1, C + 1);
+ }
+};
+template<int L, int M, int N, class T>
+class _matmat<L,M,N,0,T>
+{
+ public:
+ inline void operator()(const T * A, const T * B, T * C)
+ {
+ matvec<L,M,N,N>(A,B,C);
+ }
+};
+template<int L, int M, int N, class T>
+inline void matmat(const T * A, const T * B, T * C)
+{
+ _matmat<L,M,N,N-1,T> d;
+ d(A,B,C);
+}
+
+
+
+/*
+ * Binary vector operation Z = op(X,Y)
+ *
+ */
+
template<int N, class T, class bin_op>
class _vec_binop_vec
{
public:
- inline void operator()(const T * V1, const T * V2, T * V3, const bin_op& op)
+ inline void operator()(const T * X, const T * Y, T * Z, const bin_op& op)
{
- *V3 = op( *V1, *V2 );
+ *Z = op( *X, *Y );
_vec_binop_vec<N-1,T,bin_op> d;
- d(V1 + 1, V2 + 1, V3 + 1, op);
+ d(X + 1, Y + 1, Z + 1, op);
}
};
template<class T, class bin_op>
class _vec_binop_vec<1,T,bin_op>
{
public:
- inline void operator()(const T * V1, const T * V2, T * V3, const bin_op& op)
+ inline void operator()(const T * X, const T * Y, T * Z, const bin_op& op)
{
- *V3 = op( *V1, *V2 );
+ *Z = op( *X, *Y );
}
};
template<int N, class T, class bin_op>
-inline void vec_binop_vec(const T * V1, const T * V2, T * V3, const bin_op& op)
+inline void vec_binop_vec(const T * X, const T * Y, T * Z, const bin_op& op)
{
_vec_binop_vec<N,T,bin_op> d;
- d(V1,V2,V3,op);
+ d(X,Y,Z,op);
}
Modified: trunk/scipy/sparse/sparsetools/sparsetools.h
===================================================================
--- trunk/scipy/sparse/sparsetools/sparsetools.h 2008-01-04 21:33:02 UTC (rev 3783)
+++ trunk/scipy/sparse/sparsetools/sparsetools.h 2008-01-05 06:05:54 UTC (rev 3784)
@@ -462,6 +462,74 @@
Cp[i+1] = nnz;
}
}
+
+
+
+template <class I, class T, int R, int C, int N>
+void bsr_matmat_pass2_fixed(const I n_brow, const I n_bcol,
+ const I Ap[], const I Aj[], const T Ax[],
+ const I Bp[], const I Bj[], const T Bx[],
+ I Cp[], I Cj[], T Cx[])
+{
+ const I RC = R*C;
+ const I RN = R*N;
+ const I NC = N*C;
+ const I SIZE = RC*Cp[n_brow];
+
+ for(I i = 0; i < SIZE; i++){
+ Cx[i] = 0;
+ }
+
+ std::vector<I> next(n_bcol,-1);
+ std::vector<T*> mats(n_bcol);
+
+
+ I nnz = 0;
+
+ Cp[0] = 0;
+
+ for(I i = 0; i < n_brow; i++){
+ I head = -2;
+ I length = 0;
+
+ I jj_start = Ap[i];
+ I jj_end = Ap[i+1];
+ for(I jj = jj_start; jj < jj_end; jj++){
+ I j = Aj[jj];
+
+ I kk_start = Bp[j];
+ I kk_end = Bp[j+1];
+ for(I kk = kk_start; kk < kk_end; kk++){
+ I k = Bj[kk];
+
+ if(next[k] == -1){
+ next[k] = head;
+ head = k;
+ Cj[nnz] = k;
+ mats[k] = Cx + RC*nnz;
+ nnz++;
+ length++;
+ }
+
+ const T * A = Ax + jj*RN;
+ const T * B = Bx + kk*NC;
+ T * result = mats[k];
+ matmat<R,C,N>(A,B,result);
+ }
+ }
+
+ for(I jj = 0; jj < length; jj++){
+ I temp = head;
+ head = next[head];
+ next[temp] = -1; //clear arrays
+ }
+
+ }
+
+}
+
+#define F(X,Y,Z) bsr_matmat_pass2_fixed<I,T,X,Y,Z>
+
template <class I, class T>
void bsr_matmat_pass2(const I n_brow, const I n_bcol,
const I R, const I C, const I N,
@@ -469,21 +537,55 @@
const I Bp[], const I Bj[], const T Bx[],
I Cp[], I Cj[], T Cx[])
{
+ assert(R > 0 && C > 0 && N > 0);
+
+#ifdef TESTING
+ void (*dispatch[4][4][4])(I,I,const I*,const I*,const T*,
+ const I*,const I*,const T*,
+ I*, I*, T*) = \
+ {
+ { { F(1,1,1), F(1,1,2), F(1,1,3), F(1,1,4) },
+ { F(1,2,1), F(1,2,2), F(1,2,3), F(1,2,4) },
+ { F(1,3,1), F(1,3,2), F(1,3,3), F(1,3,4) },
+ { F(1,4,1), F(1,4,2), F(1,4,3), F(1,4,4) },
+ },
+ { { F(2,1,1), F(2,1,2), F(2,1,3), F(2,1,4) },
+ { F(2,2,1), F(2,2,2), F(2,2,3), F(2,2,4) },
+ { F(2,3,1), F(2,3,2), F(2,3,3), F(2,3,4) },
+ { F(2,4,1), F(2,4,2), F(2,4,3), F(2,4,4) },
+ },
+ { { F(3,1,1), F(3,1,2), F(3,1,3), F(3,1,4) },
+ { F(3,2,1), F(3,2,2), F(3,2,3), F(3,2,4) },
+ { F(3,3,1), F(3,3,2), F(3,3,3), F(3,3,4) },
+ { F(3,4,1), F(3,4,2), F(3,4,3), F(3,4,4) },
+ },
+ { { F(4,1,1), F(4,1,2), F(4,1,3), F(4,1,4) },
+ { F(4,2,1), F(4,2,2), F(4,2,3), F(4,2,4) },
+ { F(4,3,1), F(4,3,2), F(4,3,3), F(4,3,4) },
+ { F(4,4,1), F(4,4,2), F(4,4,3), F(4,4,4) },
+ }
+ };
+
+ if (R <= 4 && C <= 4 && N <= 4){
+ dispatch[R-1][N-1][C-1](n_brow,n_bcol,Ap,Aj,Ax,Bp,Bj,Bx,Cp,Cj,Cx);
+ return;
+ }
+#endif
+
const I RC = R*C;
const I RN = R*N;
const I NC = N*C;
const I SIZE = RC*Cp[n_brow];
+
for(I i = 0; i < SIZE; i++){
Cx[i] = 0;
}
std::vector<I> next(n_bcol,-1);
std::vector<T*> mats(n_bcol);
-
I nnz = 0;
-
Cp[0] = 0;
for(I i = 0; i < n_brow; i++){
@@ -518,34 +620,24 @@
result[C*r + c] += A[N*r + n] * B[C*n + c];
}
- //std::cout << "result[" << r << "," << c << "] = " << result[C*r + c] << std::endl;
}
}
}
}
for(I jj = 0; jj < length; jj++){
-
- //if(is_nonzero_block(result,RC)){
- // Cj[nnz] = head;
- // Cx[nnz] = sums[head];
- // nnz++;
- //}
-
I temp = head;
head = next[head];
-
next[temp] = -1; //clear arrays
}
- //Cp[i+1] = nnz;
}
}
+#undef F
-
template <class I, class T>
bool is_nonzero_block(const T block[], const I blocksize){
for(I i = 0; i < blocksize; i++){
@@ -1148,7 +1240,7 @@
for(I i = 0; i < n_brow; i++) {
for(I jj = Ap[i]; jj < Ap[i+1]; jj++) {
I j = Aj[jj];
- matvec<R,C>(Ax + jj*R*C, Xx + j*C, Yx + i*R);
+ matvec<R,C,1,1>(Ax + jj*R*C, Xx + j*C, Yx + i*R);
}
}
}
More information about the Scipy-svn
mailing list