[Scipy-svn] r3632 - trunk/scipy/sparse
scipy-svn@scip...
scipy-svn@scip...
Mon Dec 10 22:50:35 CST 2007
Author: wnbell
Date: 2007-12-10 22:50:31 -0600 (Mon, 10 Dec 2007)
New Revision: 3632
Modified:
trunk/scipy/sparse/sparse.py
Log:
refactoring of CSR/CSC
removed use of (otherwise unused) attribute nzmax
Modified: trunk/scipy/sparse/sparse.py
===================================================================
--- trunk/scipy/sparse/sparse.py 2007-12-11 03:45:14 UTC (rev 3631)
+++ trunk/scipy/sparse/sparse.py 2007-12-11 04:50:31 UTC (rev 3632)
@@ -160,16 +160,6 @@
except AttributeError:
raise AttributeError, "nnz not defined"
- def getnzmax(self):
- try:
- nzmax = self.nzmax
- except AttributeError:
- try:
- nzmax = self.nnz
- except AttributeError:
- nzmax = 0
- return nzmax
-
def getformat(self):
try:
format = self.format
@@ -498,7 +488,7 @@
class _cs_matrix(spmatrix):
"""base matrix class for compressed row and column oriented matrices"""
- def __init__(self, arg1, dims=None, nzmax=NZMAX, dtype=None, copy=False):
+ def __init__(self, arg1, dims=None, dtype=None, copy=False):
spmatrix.__init__(self)
if isdense(arg1):
@@ -523,12 +513,9 @@
# create empty matrix
self.shape = arg1 #spmatrix checks for errors here
M, N = self.shape
- self.data = zeros((nzmax,), getdtype(dtype, default=float))
- self.indices = zeros((nzmax,), intc)
- if self.format[-1] == 'r':
- self.indptr = zeros(M+1, dtype='intc')
- else:
- self.indptr = zeros(N+1, dtype='intc')
+ self.data = zeros(0, getdtype(dtype, default=float))
+ self.indices = zeros(0, intc)
+ self.indptr = zeros(self._swap(self.shape)[0] + 1, dtype='intc')
else:
try:
# Try interpreting it as (data, ij)
@@ -561,20 +548,25 @@
if self.shape is None:
# shape not already set, try to infer dimensions
try:
- first_dim = len(self.indptr) - 1
- second_dim = self.indices.max() + 1
+ major_dim = len(self.indptr) - 1
+ minor_dim = self.indices.max() + 1
except:
raise ValueError,'unable to infer matrix dimensions'
else:
- if self.format[-1] == 'r':
- # row oriented matrix
- self.shape = (first_dim,second_dim)
- else:
- # column oriented matrix
- self.shape = (second_dim,first_dim)
+ self.shape = self._swap((major_dim,minor_dim))
self.check_format(full_check=False)
+ def getnnz(self):
+ return self.indptr[-1]
+ nnz = property(fget=getnnz)
+
+ def _get_dtype(self):
+ return self.data.dtype
+ def _set_dtype(self,newtype):
+ self.data.dtype = newtype
+ dtype = property(fget=_get_dtype,fset=_set_dtype)
+
def _set_self(self, other, copy=False):
"""take the member variables of other and assign them to self"""
@@ -585,26 +577,13 @@
self.indices = other.indices
self.indptr = other.indptr
self.shape = other.shape
-
-
- def _get_dtype(self):
- return self.data.dtype
- def _set_dtype(self,newtype):
- self.data.dtype = newtype
- dtype = property(fget=_get_dtype,fset=_set_dtype)
def _check_format(self, full_check):
self.shape = tuple([int(x) for x in self.shape]) # for floats etc.
- assert(self.format in ['csr','csc'])
- if self.format[-1] == 'r':
- primary,secondary = 'row','column'
- indptr_size = self.shape[0] + 1
- indices_bound = self.shape[1]
- else:
- primary,secondary = 'column','row'
- indptr_size = self.shape[1] + 1
- indices_bound = self.shape[0]
+ #use _swap to determine proper bounds
+ major_name,minor_name = self._swap(('row','column'))
+ major_dim,minor_dim = self._swap(self.shape)
# index arrays should have integer data types
if self.indptr.dtype.kind != 'i':
@@ -625,10 +604,10 @@
raise ValueError,"data, indices, and indptr should be rank 1"
# check index pointer
- if (len(self.indptr) != indptr_size ):
+ if (len(self.indptr) != major_dim + 1 ):
raise ValueError, \
"index pointer size (%d) should be (%d)" % \
- (len(self.indptr), indptr_size)
+ (len(self.indptr), major_dim + 1)
if (self.indptr[0] != 0):
raise ValueError,"index pointer should start with 0"
@@ -640,32 +619,24 @@
"Last value of index pointer should be less than "\
"the size of index and data arrays"
- self.nnz = self.indptr[-1]
- self.nzmax = len(self.indices)
+ self.prune()
if full_check:
#check format validity (more expensive)
if self.nnz > 0:
- if amax(self.indices[:self.nnz]) >= indices_bound:
+ if amax(self.indices) >= minor_dim:
raise ValueError, "%s index values must be < %d" % \
- (secondary,indices_bound)
- if amin(self.indices[:self.nnz]) < 0:
+ (minor_name,minor_dim)
+ if amin(self.indices) < 0:
raise ValueError, "%s index values must be >= 0" % \
- secondary
- if numpy.diff(self.indptr).min() < 0:
- raise ValueError,'index pointer values must form a " \
- "non-decreasing sequence'
+ minor_name
+ if numpy.diff(self.indptr).min() < 0:
+ raise ValueError,'index pointer values must form a " \
+ "non-decreasing sequence'
def astype(self, t):
return self._with_data(self.data.astype(t))
- def __repr__(self):
- format = self.getformat()
- return "<%dx%d sparse matrix of type '%s'\n\twith %d stored "\
- "elements (space for %d)\n\tin %s format>" % \
- (self.shape + (self.dtype.type, self.getnnz(), self.nzmax, \
- _formats[format][1]))
-
def _with_data(self,data,copy=True):
"""Returns a matrix with the same sparsity structure as self,
but with different data. By default the structure arrays
@@ -1019,12 +990,11 @@
- csc_matrix(s)
with another sparse matrix s (sugar for .tocsc())
- - csc_matrix((M, N), [nzmax, dtype])
+ - csc_matrix((M, N), [dtype])
to construct a container, where (M, N) are dimensions and
- nzmax, dtype are optional, defaulting to nzmax=sparse.NZMAX
- and dtype='d'.
+ dtype is optional, defaulting to dtype='d'.
- - csc_matrix((data, ij), [(M, N), nzmax])
+ - csc_matrix((data, ij), [(M, N)])
where data, ij satisfy:
a[ij[0, k], ij[1, k]] = data[k]
@@ -1115,14 +1085,12 @@
indxs = numpy.where(row == self.indices[self.indptr[col]:self.indptr[col+1]])
if len(indxs[0]) == 0:
#value not present
- nzmax = self.nzmax
- if (nzmax < self.nnz+1): # need more room
- alloc = max(1, self.allocsize)
- self.data = resize1d(self.data, nzmax + alloc)
- self.indices = resize1d(self.indices, nzmax + alloc)
+ #TODO handle this with concatenation
+ self.data = resize1d(self.data, self.nnz + 1)
+ self.indices = resize1d(self.indices, self.nnz + 1)
newindex = self.indptr[col]
- self.data[newindex+1:] = self.data[newindex:-1]
+ self.data[newindex+1:] = self.data[newindex:-1]
self.indices[newindex+1:] = self.indices[newindex:-1]
self.data[newindex] = val
@@ -1154,15 +1122,10 @@
col = searchsorted(self.indptr, ind+1)-1
return (row, col)
+
def tocsc(self, copy=False):
return self.toself(copy)
- def _toother(self):
- return self.tocsr()
-
- def _tothis(self, other):
- return other.tocsc()
-
def tocsr(self):
indptr = empty(self.shape[0] + 1, dtype=intc)
indices = empty(self.nnz, dtype=intc)
@@ -1174,7 +1137,6 @@
return csr_matrix((data, indices, indptr), self.shape)
-
def tocoo(self,copy=True):
"""Return a COOrdinate representation of this matrix
@@ -1200,15 +1162,15 @@
def prune(self):
""" Remove empty space after all non-zero elements.
"""
- nnz = self.indptr[-1]
- if self.nzmax <= nnz:
- if self.nzmax < nnz:
- raise RuntimeError, "should never have nnz > nzmax"
- return
- self.nnz = nnz
- self.data = self.data[:nnz]
- self.indices = self.indices[:nnz]
- self.nzmax = nnz
+ if len(self.indptr) != self.shape[1] + 1:
+ raise ValueError, "index pointer has invalid length"
+ if len(self.indices) < self.nnz:
+ raise ValueError, "indices array has fewer than nnz elements"
+ if len(self.data) < self.nnz:
+ raise ValueError, "data array has fewer than nnz elements"
+
+ self.data = self.data[:self.nnz]
+ self.indices = self.indices[:self.nnz]
def get_submatrix( self, slice0, slice1 ):
"""Return a submatrix of this matrix (new matrix is created).
@@ -1218,7 +1180,20 @@
slice1, slice0 )
nr, nc = aux[3:]
return self.__class__( aux[:3], dims = (nc, nr) )
+
+ # these functions are used by the parent class (_cs_matrix)
+ # to remove redudancy between csc_matrix and csr_matrix
+ def _swap(self,x):
+ """swap the members of x if this is a column-oriented matrix
+ """
+ return (x[1],x[0])
+ def _toother(self):
+ return self.tocsr()
+
+ def _tothis(self, other):
+ return other.tocsc()
+
class csr_matrix(_cs_matrix):
""" Compressed sparse row matrix
This can be instantiated in several ways:
@@ -1228,12 +1203,11 @@
- csr_matrix(s)
with another sparse matrix s (sugar for .tocsr())
- - csr_matrix((M, N), [nzmax, dtype])
+ - csr_matrix((M, N), [dtype])
to construct a container, where (M, N) are dimensions and
- nzmax, dtype are optional, defaulting to nzmax=sparse.NZMAX
- and dtype='d'.
+ dtype is optional, defaulting to dtype='d'.
- - csr_matrix((data, ij), [dims=(M, N), nzmax=nzmax])
+ - csr_matrix((data, ij), [dims=(M, N)])
where data, ij satisfy:
a[ij[0, k], ij[1, k]] = data[k]
@@ -1324,11 +1298,8 @@
indxs = numpy.where(col == self.indices[self.indptr[row]:self.indptr[row+1]])
if len(indxs[0]) == 0:
#value not present
- nzmax = self.nzmax
- if (nzmax < self.nnz+1): # need more room
- alloc = max(1, self.allocsize)
- self.data = resize1d(self.data, nzmax + alloc)
- self.indices = resize1d(self.indices, nzmax + alloc)
+ self.data = resize1d(self.data, self.nnz + 1)
+ self.indices = resize1d(self.indices, self.nnz + 1)
newindex = self.indptr[row]
self.data[newindex+1:] = self.data[newindex:-1]
@@ -1358,7 +1329,7 @@
def tolil(self):
lil = lil_matrix(self.shape,dtype=self.dtype)
- csr = self.sorted_indices()
+ csr = self.sorted_indices() #lil_matrix needs sorted rows
rows,data = lil.rows,lil.data
ptr,ind,dat = csr.indptr,csr.indices,csr.data
@@ -1369,10 +1340,6 @@
rows[n] = ind[start:end].tolist()
data[n] = dat[start:end].tolist()
- lil.rows = rows
- lil.data = data
- #lil.shape = self.shape
-
return lil
def tocsr(self, copy=False):
@@ -1408,31 +1375,24 @@
return coo_matrix((data,(row,col)), self.shape)
-
- def _toother(self):
- return self.tocsc()
-
- def _tothis(self, other):
- return other.tocsr()
-
def toarray(self):
data = numpy.zeros(self.shape, self.data.dtype)
csrtodense(self.shape[0], self.shape[1], self.indptr, self.indices,
self.data, data)
return data
-
+
def prune(self):
- """ Eliminate non-zero entries, leaving space for at least
- newnzmax elements.
+ """ Remove empty space after all non-zero elements.
"""
- nnz = self.indptr[-1]
- if self.nzmax <= nnz:
- if self.nzmax < nnz:
- raise RuntimeError, "should never have nnz > nzmax"
- return
- self.data = self.data[:nnz]
- self.indices = self.indices[:nnz]
- self.nzmax = nnz
+ if len(self.indptr) != self.shape[0] + 1:
+ raise ValueError, "index pointer has invalid length"
+ if len(self.indices) < self.nnz:
+ raise ValueError, "indices array has fewer than nnz elements"
+ if len(self.data) < self.nnz:
+ raise ValueError, "data array has fewer than nnz elements"
+
+ self.data = self.data[:self.nnz]
+ self.indices = self.indices[:self.nnz]
def get_submatrix( self, slice0, slice1 ):
"""Return a submatrix of this matrix (new matrix is created)..
@@ -1443,17 +1403,20 @@
nr, nc = aux[3:]
return self.__class__( aux[:3], dims = (nr, nc) )
-# This function was for sorting dictionary keys by the second tuple element.
-# (We now use the Schwartzian transform instead for efficiency.)
-# def csc_cmp(x, y):
-# if (x == y): return 0
-# elif (x[1] == y[1]):
-# if (x[0] > y[0]): return 1
-# elif (x[0] == y[0]): return 0
-# else: return -1
-# elif (x[1] > y[1]): return 1
-# else: return -1
+ # these functions are used by the parent class (_cs_matrix)
+ # to remove redudancy between csc_matrix and csr_matrix
+ def _swap(self,x):
+ """swap the members of x if this is a column-oriented matrix
+ """
+ return (x[0],x[1])
+ def _toother(self):
+ return self.tocsc()
+
+ def _tothis(self, other):
+ return other.tocsr()
+
+
# dictionary of keys based matrix
class dok_matrix(spmatrix, dict):
""" A dictionary of keys based matrix. This is an efficient
More information about the Scipy-svn
mailing list