[Scipy-svn] r3694 - trunk/scipy/sparse

scipy-svn@scip... scipy-svn@scip...
Thu Dec 20 15:42:33 CST 2007


Author: wnbell
Date: 2007-12-20 15:42:31 -0600 (Thu, 20 Dec 2007)
New Revision: 3694

Added:
   trunk/scipy/sparse/block.py
   trunk/scipy/sparse/bsr.py
Log:
add bsr_matrix format


Added: trunk/scipy/sparse/block.py
===================================================================
--- trunk/scipy/sparse/block.py	2007-12-20 21:41:58 UTC (rev 3693)
+++ trunk/scipy/sparse/block.py	2007-12-20 21:42:31 UTC (rev 3694)
@@ -0,0 +1,263 @@
+"""base class for block sparse formats"""
+
+from numpy import zeros, intc, array, asarray, arange, diff, tile, rank, \
+        prod
+
+from data import _data_matrix
+from base import isspmatrix, _formats
+from sputils import isshape, getdtype, to_native
+
+class _block_matrix(_data_matrix):
+    def __init__(self, arg1, shape=None, dtype=None, copy=False, blocksize=None):
+        _data_matrix.__init__(self)
+
+        #process blocksize
+        if blocksize is None:
+            blocksize = (1,1)
+        else:
+            if not isshape(blocksize):
+                raise ValueError,'invalid blocksize=%s',blocksize
+            blocksize = tuple(blocksize)
+        
+        if isspmatrix(arg1):
+            if arg1.format == self.format and copy:
+                arg1 = arg1.copy()
+            else:
+                arg1 = getattr(arg1,'to' + self.format)(blocksize=blocksize)
+            self._set_self( arg1 )
+            
+        elif isinstance(arg1,tuple):
+            if isshape(arg1):
+                #it's a tuple of matrix dimensions (M,N)
+                self.shape  = arg1
+                M,N = self.shape
+                self.data   = zeros( (0,) + blocksize, getdtype(dtype, default=float) )
+                self.indices = zeros( 0, dtype=intc )
+                
+                X,Y = blocksize
+                if (M % X) != 0 or (N % Y) != 0:
+                    raise ValueError, 'shape must be multiple of blocksize'
+
+                self.indptr  = zeros(self._swap((M/X,N/Y))[0] + 1, dtype=intc )
+
+            elif len(arg1) == 3:
+                # data,indices,indptr format
+                (data, indices, indptr) = arg1
+                self.indices = array(indices, copy=copy)
+                self.indptr  = array(indptr,  copy=copy)
+                self.data    = array(data,    copy=copy, \
+                        dtype=getdtype(dtype, data))
+            else:
+                raise ValueError, "unrecognized form for" \
+                        " %s_matrix constructor" % self.format
+        else:
+            #must be dense
+            try:
+                arg1 = asarray(arg1)
+            except:
+                raise ValueError, "unrecognized form for" \
+                        " %s_matrix constructor" % self.format
+            from coo import coo_matrix
+            arg1 = self.__class__( coo_matrix(arg1), blocksize=blocksize )
+            self._set_self( arg1 )
+
+        if self.shape is None:
+            if shape is None:
+                #infer shape here
+                raise ValueError,'need to infer shape'
+            else:
+                self.shape = shape
+
+        self.check_format()
+
+    def check_format(self, full_check=True):
+        """check whether the matrix format is valid
+
+            *Parameters*:
+                full_check:
+                    True  - rigorous check, O(N) operations : default
+                    False - basic check, O(1) operations
+
+        """
+
+        #use _swap to determine proper bounds
+        major_name,minor_name = self._swap(('row','column'))
+        major_dim,minor_dim = self._swap(self.shape)
+        major_blk,minor_blk = self._swap(self.blocksize)
+
+        # index arrays should have integer data types
+        if self.indptr.dtype.kind != 'i':
+            warn("indptr array has non-integer dtype (%s)" \
+                    % self.indptr.dtype.name )
+        if self.indices.dtype.kind != 'i':
+            warn("indices array has non-integer dtype (%s)" \
+                    % self.indices.dtype.name )
+
+        # only support 32-bit ints for now
+        self.indptr  = self.indptr.astype(intc)
+        self.indices = self.indices.astype(intc)
+        self.data    = to_native(self.data)
+
+        # check array shapes
+        if (rank(self.indices) != 1) or (rank(self.indptr) != 1):
+            raise ValueError,"indices, and indptr should be rank 1"
+        if rank(self.data) != 3:
+            raise ValueError,"data should be rank 3"
+
+        # check index pointer
+        if (len(self.indptr) != major_dim/major_blk + 1 ):
+            raise ValueError, \
+                "index pointer size (%d) should be (%d)" % \
+                 (len(self.indptr), major_dim/major_blk + 1)
+        if (self.indptr[0] != 0):
+            raise ValueError,"index pointer should start with 0"
+
+        # check index and data arrays
+        if (len(self.indices) != len(self.data)):
+            raise ValueError,"indices and data should have the same size"
+        if (self.indptr[-1] > len(self.indices)):
+            raise ValueError, \
+                  "Last value of index pointer should be less than "\
+                  "the size of index and data arrays"
+
+        self.prune()
+
+        if full_check:
+            #check format validity (more expensive)
+            if self.nnz > 0:
+                if self.indices.max() >= minor_dim/minor_blk:
+                    raise ValueError, "%s index values must be < %d" % \
+                            (minor_name,minor_dim)
+                if self.indices.min() < 0:
+                    raise ValueError, "%s index values must be >= 0" % \
+                            minor_name
+                if diff(self.indptr).min() < 0:
+                    raise ValueError,'index pointer values must form a " \
+                                        "non-decreasing sequence'
+
+
+    def _get_blocksize(self):
+        return self.data.shape[1:]
+    blocksize = property(fget=_get_blocksize)
+    
+    def getnnz(self):
+        X,Y = self.blocksize
+        return self.indptr[-1] * X * Y
+    nnz = property(fget=getnnz)
+    
+    def __repr__(self):
+        nnz = self.getnnz()
+        format = self.getformat()
+        return "<%dx%d sparse matrix of type '%s'\n" \
+               "\twith %d stored elements (blocksize = %dx%d) in %s format>" % \
+               ( self.shape + (self.dtype.type, nnz) + self.blocksize + \
+                 (_formats[format][1],) )
+
+    def _set_self(self, other, copy=False):
+        """take the member variables of other and assign them to self"""
+
+        if copy:
+            other = other.copy()
+
+        self.data      = other.data
+        self.indices   = other.indices
+        self.indptr    = other.indptr
+        self.shape     = other.shape
+
+   
+    #conversion methods
+    def toarray(self):
+        A = self.tocoo(copy=False)
+        M = zeros(self.shape, dtype=self.dtype)
+        M[A.row, A.col] = A.data
+        return M
+
+    def todia(self):
+        return self.tocoo(copy=False).todia()
+    
+    def todok(self):
+        return self.tocoo(copy=False).todok()
+
+    def tocsr(self):
+        return self.tocoo(copy=False).tocsr()
+        #TODO make this more efficient
+
+    def tocsc(self):
+        return self.tocoo(copy=False).tocsc()
+        #TODO make this more efficient
+
+
+
+
+    # methods that modify the internal data structure
+    def sorted_indices(self):
+        """Return a copy of this matrix with sorted indices
+        """
+        A = self.copy()
+        A.sort_indices()
+        return A
+
+        # an alternative that has linear complexity is the following
+        # typically the previous option is faster
+        #return self.toother().toother()
+
+    def sort_indices(self):
+        """Sort the indices of this matrix *in place*
+        """
+        X,Y = self.blocksize
+        M,N = self.shape
+
+        #use CSR.sort_indices to determine a permutation for BSR<->BSC
+        major,minor = self._swap((M/X,N/Y))
+
+        data = arange(len(self.indices), dtype=self.indices.dtype)
+        proxy = csr_matrix((data,self.indices,self.indptr),shape=(major,minor))
+        proxy.sort_indices()
+
+        self.data[:] = self.data[proxy.data]
+        self.indices = proxy.indices
+
+    def prune(self):
+        """ Remove empty space after all non-zero elements.
+        """
+        major_dim = self._swap(self.shape)[0]
+        major_blk = self._swap(self.blocksize)[0]
+
+        if len(self.indptr) != major_dim/major_blk + 1:
+            raise ValueError, "index pointer has invalid length"
+        if len(self.indices) < self.nnz / prod(self.blocksize): 
+            raise ValueError, "indices has too few elements"
+        if self.data.size < self.nnz:
+            raise ValueError, "data array has too few elements"
+        
+        self.data    = self.data[:self.nnz]
+        self.indices = self.indices[:self.nnz]
+
+
+    # needed by _data_matrix
+    def _with_data(self,data,copy=True):
+        """Returns a matrix with the same sparsity structure as self,
+        but with different data.  By default the structure arrays
+        (i.e. .indptr and .indices) are copied.
+        """
+        if copy:
+            return self.__class__((data,self.indices.copy(),self.indptr.copy()), \
+                                   shape=self.shape,dtype=data.dtype)
+        else:
+            return self.__class__((data,self.indices,self.indptr), \
+                                   shape=self.shape,dtype=data.dtype)
+
+
+
+
+
+
+    
+# test with:
+# A = arange(4*6).reshape(4,6) % 5
+# A[0:2,2:4] = 0
+# A[0:2,:]   = 0
+
+
+
+

Added: trunk/scipy/sparse/bsr.py
===================================================================
--- trunk/scipy/sparse/bsr.py	2007-12-20 21:41:58 UTC (rev 3693)
+++ trunk/scipy/sparse/bsr.py	2007-12-20 21:42:31 UTC (rev 3694)
@@ -0,0 +1,165 @@
+"""Compressed Block Sparse Row matrix format
+"""
+
+__all__ = ['bsr_matrix', 'isspmatrix_bsr']
+
+from numpy import zeros, intc, array, asarray, arange, diff, tile, rank, \
+        prod, ravel, empty, matrix, asmatrix
+
+from sparsetools import bsr_matvec
+from block import _block_matrix 
+from base import isspmatrix
+from sputils import isdense, upcast, isscalarlike
+
+class bsr_matrix(_block_matrix):
+    #TODO add docstring
+
+    def __mul__(self, other): # self * other
+        """ Scalar, vector, or matrix multiplication
+        """
+        if isscalarlike(other):
+            return self._with_data(self.data * other)
+        else:
+            return self.dot(other)
+
+    def matvec(self, other, output=None):
+        """Sparse matrix vector product (self * other)
+
+        'other' may be a rank 1 array of length N or a rank 2 array 
+        or matrix with shape (N,1).  
+        
+        If the optional 'output' parameter is defined, it will
+        be used to store the result.  Otherwise, a new vector
+        will be allocated.
+             
+        """
+        if isdense(other):
+            M,N = self.shape
+            X,Y = self.blocksize
+
+            if other.shape != (N,) and other.shape != (N,1):
+                raise ValueError, "dimension mismatch"
+
+    
+            #output array
+            if output is None:
+                y = empty( self.shape[0], dtype=upcast(self.dtype,other.dtype) )
+            else:
+                if output.shape != (M,) and output.shape != (M,1):
+                    raise ValueError, "output array has improper dimensions"
+                if not output.flags.c_contiguous:
+                    raise ValueError, "output array must be contiguous"
+                if output.dtype != upcast(self.dtype,other.dtype):
+                    raise ValueError, "output array has dtype=%s "\
+                            "dtype=%s is required" % \
+                            (output.dtype,upcast(self.dtype,other.dtype))
+                y = output
+            
+            
+            bsr_matvec(M/X, N/Y, X, Y, \
+                self.indptr, self.indices, ravel(self.data), ravel(other), y)
+
+            if isinstance(other, matrix):
+                y = asmatrix(y)
+
+            if other.ndim == 2 and other.shape[1] == 1:
+                # If 'other' was an (nx1) column vector, reshape the result
+                y = y.reshape(-1,1)
+
+            return y
+
+        elif isspmatrix(other):
+            raise TypeError, "use matmat() for sparse * sparse"
+
+        else:
+            raise TypeError, "need a dense vector"
+
+
+
+
+
+
+
+    def transpose(self,copy=False):
+        M,N = self.shape
+            
+        data    = self.data.swapaxes(1,2)
+        indices = self.indices
+        indptr  = self.indptr
+
+        from bsc import bsc_matrix
+        return bsc_matrix( (data,indices,indptr), shape=(N,M), copy=copy)
+   
+    def tocoo(self,copy=True):
+        """Convert this matrix to COOrdinate format.
+
+        When copy=False the data array will be shared between
+        this matrix and the resultant coo_matrix.
+        """
+        
+        M,N = self.shape
+        X,Y = self.blocksize
+
+        row  = (X * arange(M/X)).repeat(diff(self.indptr))
+        row  = row.repeat(X*Y).reshape(-1,X,Y)
+        row += tile( arange(X).reshape(-1,1), (1,Y) )
+        row  = row.reshape(-1) 
+
+        col  = (Y * self.indices).repeat(X*Y).reshape(-1,X,Y)
+        col += tile( arange(Y), (X,1) )
+        col  = col.reshape(-1)
+
+        data = self.data.reshape(-1)
+
+        if copy:
+            data = data.copy()
+
+        from coo import coo_matrix
+        return coo_matrix( (data,(row,col)), shape=self.shape )
+
+
+    def tobsc(self,blocksize=None):
+        if blocksize is None:
+            blocksize = self.blocksize
+        elif blocksize != self.blocksize:
+            return self.tocoo(copy=False).tobsc(blocksize=blocksize)
+
+        #maintian blocksize
+        X,Y = self.blocksize
+        M,N = self.shape
+
+        #use CSR->CSC to determine a permutation for BSR<->BSC
+        from csr import csr_matrix
+        data = arange(len(self.indices), dtype=self.indices.dtype)
+        proxy = csr_matrix((data,self.indices,self.indptr),shape=(M/X,N/Y))
+        proxy = proxy.tocsc()
+
+        data    = self.data[proxy.data] #permute data
+        indices = proxy.indices
+        indptr  = proxy.indptr
+       
+        from bsc import bsc_matrix
+        return bsc_matrix( (data,indices,indptr), shape=self.shape )
+    
+    def tobsr(self,blocksize=None,copy=False):
+
+        if blocksize not in [None, self.blocksize]:
+            return self.tocoo(copy=False).tobsr(blocksize=blocksize)
+        if copy:
+            return self.copy()
+        else:
+            return self
+    
+    # these functions are used by the parent class
+    # to remove redudancy between bsc_matrix and bsr_matrix
+    def _swap(self,x):
+        """swap the members of x if this is a column-oriented matrix
+        """
+        return (x[0],x[1])
+
+
+from sputils import _isinstance
+
+def isspmatrix_bsr(x):
+    return _isinstance(x, bsr_matrix)
+



More information about the Scipy-svn mailing list