[Scipy-svn] r3758 - in trunk/scipy/io: . matlab tests
scipy-svn@scip...
scipy-svn@scip...
Tue Jan 1 03:33:12 CST 2008
Author: oliphant
Date: 2008-01-01 03:33:02 -0600 (Tue, 01 Jan 2008)
New Revision: 3758
Added:
trunk/scipy/io/matlab/
trunk/scipy/io/matlab/__init__.py
trunk/scipy/io/matlab/mio.py
trunk/scipy/io/matlab/mio4.py
trunk/scipy/io/matlab/mio5.py
trunk/scipy/io/matlab/miobase.py
trunk/scipy/io/matlab/setup.py
Removed:
trunk/scipy/io/datasource.py
trunk/scipy/io/matlab/__init__.py
trunk/scipy/io/matlab/mio.py
trunk/scipy/io/matlab/mio4.py
trunk/scipy/io/matlab/mio5.py
trunk/scipy/io/matlab/miobase.py
trunk/scipy/io/matlab/setup.py
trunk/scipy/io/mio.py
trunk/scipy/io/mio4.py
trunk/scipy/io/mio5.py
trunk/scipy/io/miobase.py
Modified:
trunk/scipy/io/__init__.py
trunk/scipy/io/array_import.py
trunk/scipy/io/data_store.py
trunk/scipy/io/mmio.py
trunk/scipy/io/npfile.py
trunk/scipy/io/pickler.py
trunk/scipy/io/setup.py
trunk/scipy/io/tests/test_mio.py
Log:
Merge in changes from io_new branch -r 3655:3752
Modified: trunk/scipy/io/__init__.py
===================================================================
--- trunk/scipy/io/__init__.py 2008-01-01 02:55:20 UTC (rev 3757)
+++ trunk/scipy/io/__init__.py 2008-01-01 09:33:02 UTC (rev 3758)
@@ -1,24 +1,90 @@
-
#
# io - Data input and output
#
from info import __doc__
+from numpy import deprecate_with_doc
+
+# These are all deprecated (until the end deprecated tag)
+from npfile import npfile
+from data_store import save, load, create_module, create_shelf
+from array_import import read_array, write_array
+from pickler import objload, objsave
+
from numpyio import packbits, unpackbits, bswap, fread, fwrite, \
convert_objectarray
+
+fread = deprecate_with_doc("""
+scipy.io.fread is can be replaced with raw reading capabilities of NumPy
+including fromfile as well as memory-mapping capabilities.
+""")(fread)
+
+fwrite = deprecate_with_doc("""
+scipy.io.fwrite can be replaced with raw writing capabilities of
+NumPy. Also, remember that files can be directly memory-mapped into NumPy
+arrays which is often a better way of reading especially large files.
+
+Look at the tofile methods as well as save and savez for writing arrays into
+easily transported files of data.
+""")(fwrite)
+
+bswap = deprecate_with_doc("""
+scipy.io.bswap is easily replaced with the byteswap method on an array.
+out = scipy.io.bswap(arr) --> out = arr.byteswap(True)
+""")(bswap)
+
+packbits = deprecate_with_doc("""
+The functionality of scipy.io.packbits is now available as numpy.packbits
+The calling convention is a bit different as the 2-d case is not specialized.
+
+However, you can simulate scipy.packbits by raveling the last 2 dimensions
+of the array and calling numpy.packbits with an axis=-1 keyword:
+
+def scipy_packbits(inp):
+ a = np.asarray(inp)
+ if a.ndim < 2:
+ return np.packbits(a)
+ oldshape = a.shape
+ newshape = oldshape[:-2] + (oldshape[-2]*oldshape[-1],)
+ a = np.reshape(a, newshape)
+ return np.packbits(a, axis=-1).ravel()
+""")(packbits)
+
+unpackbits = deprecate_with_doc("""
+The functionality of scipy.io.unpackbits is now available in numpy.unpackbits
+The calling convention is different however as the 2-d case is no longer
+specialized.
+
+Thus, the scipy.unpackbits behavior must be simulated using numpy.unpackbits.
+
+def scipy_unpackbits(inp, els_per_slice, out_type=None):
+ inp = np.asarray(inp)
+ num4els = ((els_per_slice-1) >> 3) + 1
+ inp = np.reshape(inp, (-1,num4els))
+ res = np.unpackbits(inp, axis=-1)[:,:els_per_slice]
+ return res.ravel()
+""")(unpackbits)
+
+convert_objectarray = deprecate_with_doc("""
+The same functionality can be obtained using NumPy string arrays and the
+.astype method (except for the optional missing value feature).
+""")(convert_objectarray)
+
+# end deprecated
+
# matfile read and write
-from mio import *
+from matlab.mio import loadmat, savemat
+
# netCDF file support
-from netcdf import *
-from npfile import npfile
+from netcdf import netcdf_file, netcdf_variable
+
from recaster import sctype_attributes, Recaster
-from array_import import *
-from data_store import *
-from pickler import *
+from data_store import save_as_module
+from mmio import mminfo, mmread, mmwrite
-from mmio import mminfo,mmread,mmwrite
+
__all__ = filter(lambda s:not s.startswith('_'),dir())
from numpy.testing import NumpyTest
test = NumpyTest().test
Modified: trunk/scipy/io/array_import.py
===================================================================
--- trunk/scipy/io/array_import.py 2008-01-01 02:55:20 UTC (rev 3757)
+++ trunk/scipy/io/array_import.py 2008-01-01 09:33:02 UTC (rev 3758)
@@ -17,7 +17,9 @@
# Numpy imports.
import numpy
-from numpy import array, take, concatenate, asarray, real, imag
+
+from numpy import array, take, concatenate, asarray, real, imag, \
+ deprecate_with_doc
# Sadly, this module is still written with typecodes in mind.
from numpy.oldnumeric import Float
@@ -310,6 +312,10 @@
return cols, atype
+@deprecate_with_doc("""
+The functionality of read_array is in numpy.loadtxt which allows the same
+functionality using different syntax.
+""")
def read_array(fileobject, separator=default, columns=default, comment="#",
lines=default, atype=Float, linesep='\n',
rowsize=10000, missing=0):
@@ -437,6 +443,11 @@
return row_sep.join(thestr)
+@deprecate_with_doc("""
+
+This function is replaced by numpy.savetxt which allows the same functionality
+through a different syntax.
+""")
def write_array(fileobject, arr, separator=" ", linesep='\n',
precision=5, suppress_small=0, keep_open=0):
"""Write a rank-2 or less array to file represented by fileobject.
Modified: trunk/scipy/io/data_store.py
===================================================================
--- trunk/scipy/io/data_store.py 2008-01-01 02:55:20 UTC (rev 3757)
+++ trunk/scipy/io/data_store.py 2008-01-01 09:33:02 UTC (rev 3758)
@@ -5,21 +5,27 @@
you to store data to a file and then load it back into the workspace.
When the data is stored, a python module is also created as the
"namespace for the data"
- >>> import data_store
+ >>> import scipy.io
>>> import os
>>> a = 1
- >>> data_store.save('c:/temp/junker',{'a':a})
+ >>> scipy.io.save_as_module('c:/temp/junker',{'a':a})
>>> os.chdir('c:/temp')
>>> import junker
>>> junker.a
1
"""
-__all__ = ['load', 'save', 'create_module', 'create_shelf']
+__all__ = ['save_as_module',
+ # The rest of these are all deprecated
+ 'save', 'create_module',
+ 'create_shelf', 'load']
+
import dumb_shelve
import os
-def load(module):
+from numpy import deprecate_with_doc, deprecate
+
+def _load(module):
""" Load data into module from a shelf with
the same name as the module.
"""
@@ -34,15 +40,15 @@
# print i, 'loaded...'
# print 'done'
-def save(file_name=None,data=None):
- """ Save the dictionary "data" into
- a module and shelf named save
- """
- import dumb_shelve
- create_module(file_name)
- create_shelf(file_name,data)
+load = deprecate_with_doc("""
+This is an internal function used with scipy.io.save_as_module
-def create_module(file_name):
+If you are saving arrays into a module, you should think about using
+HDF5 or .npz files instead.
+""")(_load)
+
+
+def _create_module(file_name):
""" Create the module file.
"""
if not os.path.exists(file_name+'.py'): # don't clobber existing files
@@ -50,10 +56,17 @@
f = open(file_name+'.py','w')
f.write('import scipy.io.data_store as data_store\n')
f.write('import %s\n' % module_name)
- f.write('data_store.load(%s)' % module_name)
+ f.write('data_store._load(%s)' % module_name)
f.close()
-def create_shelf(file_name,data):
+create_module = deprecate_with_doc("""
+This is an internal function used with scipy.io.save_as_module
+
+If you are saving arrays into a module, you should think about
+using HDF5 or .npz files instead.
+""")(_create_module)
+
+def _create_shelf(file_name,data):
"""Use this to write the data to a new file
"""
shelf_name = file_name.split('.')[0]
@@ -63,3 +76,20 @@
f[i] = data[i]
# print 'done'
f.close()
+
+create_shelf = deprecate_with_doc("""
+This is an internal function used with scipy.io.save_as_module
+
+If you are saving arrays into a module, you should think about using
+HDF5 or .npz files instead.
+""")(_create_shelf)
+
+
+def save_as_module(file_name=None,data=None):
+ """ Save the dictionary "data" into
+ a module and shelf named save
+ """
+ _create_module(file_name)
+ _create_shelf(file_name,data)
+
+save = deprecate(save_as_module, 'save', 'save_as_module')
Deleted: trunk/scipy/io/datasource.py
===================================================================
--- trunk/scipy/io/datasource.py 2008-01-01 02:55:20 UTC (rev 3757)
+++ trunk/scipy/io/datasource.py 2008-01-01 09:33:02 UTC (rev 3758)
@@ -1,457 +0,0 @@
-"""A file interface for handling local and remote data files.
-The goal of datasource is to abstract some of the file system operations when
-dealing with data files so the researcher doesn't have to know all the
-low-level details. Through datasource, a researcher can obtain and use a
-file with one function call, regardless of location of the file.
-
-DataSource is meant to augment standard python libraries, not replace them.
-It should work seemlessly with standard file IO operations and the os module.
-
-DataSource files can originate locally or remotely:
-
-- local files : '/home/guido/src/local/data.txt'
-- URLs (http, ftp, ...) : 'http://www.scipy.org/not/real/data.txt'
-
-DataSource files can also be compressed or uncompressed. Currently only gzip
-and bz2 are supported.
-
-Example:
-
- >>> # Create a DataSource, use os.curdir (default) for local storage.
- >>> ds = datasource.DataSource()
- >>>
- >>> # Open a remote file.
- >>> # DataSource downloads the file, stores it locally in:
- >>> # './www.google.com/index.html'
- >>> # opens the file and returns a file object.
- >>> fp = ds.open('http://www.google.com/index.html')
- >>>
- >>> # Use the file as you normally would
- >>> fp.read()
- >>> fp.close()
-
-"""
-
-__docformat__ = "restructuredtext en"
-
-import bz2
-import gzip
-import os
-import tempfile
-from shutil import rmtree
-from urllib2 import urlopen, URLError
-from urlparse import urlparse
-
-import warnings
-
-# datasource has been used for a while in the NIPY project for analyzing
-# large fmri imaging files hosted over a network. Data would be fetched
-# via URLs, cached locally and analyzed. Under these conditions the code
-# worked well, however it needs to be documented, tested and reviewed
-# before being fully exposed to SciPy. We hope to do this before the
-# 0.7 release.
-_api_warning = "The datasource API will be changing frequently before \
-the 0.7 release as the code is ported from the NIPY project to SciPy. \
-Some of the current public interface may become private during the port! \
-Use this module minimally, if at all, until it is stabilized."
-
-warnings.warn(_api_warning)
-
-# TODO: .zip support, .tar support?
-_file_openers = {".gz":gzip.open, ".bz2":bz2.BZ2File, None:file}
-
-
-def open(path, mode='r', destpath=os.curdir):
- """Open ``path`` with ``mode`` and return the file object.
-
- If ``path`` is an URL, it will be downloaded, stored in the DataSource
- directory and opened from there.
-
- *Parameters*:
-
- path : {string}
-
- mode : {string}, optional
-
- destpath : {string}, optional
- Destination directory where URLs will be downloaded and stored.
-
- *Returns*:
-
- file object
-
- """
-
- ds = DataSource(destpath)
- return ds.open(path, mode)
-
-
-class DataSource (object):
- """A generic data source file (file, http, ftp, ...).
-
- DataSources could be local files or remote files/URLs. The files may
- also be compressed or uncompressed. DataSource hides some of the low-level
- details of downloading the file, allowing you to simply pass in a valid
- file path (or URL) and obtain a file object.
-
- *Methods*:
-
- - exists : test if the file exists locally or remotely
- - abspath : get absolute path of the file in the DataSource directory
- - open : open the file
-
- *Example URL DataSource*::
-
- # Initialize DataSource with a local directory, default is os.curdir.
- ds = DataSource('/home/guido')
-
- # Open remote file.
- # File will be downloaded and opened from here:
- # /home/guido/site/xyz.txt
- ds.open('http://fake.xyz.web/site/xyz.txt')
-
- *Example using DataSource for temporary files*::
-
- # Initialize DataSource with 'None' for the local directory.
- ds = DataSource(None)
-
- # Open local file.
- # Opened file exists in a temporary directory like:
- # /tmp/tmpUnhcvM/foobar.txt
- # Temporary directories are deleted when the DataSource is deleted.
- ds.open('/home/guido/foobar.txt')
-
- *Notes*:
- BUG : URLs require a scheme string ('http://') to be used.
- www.google.com will fail.
-
- >>> repos.exists('www.google.com/index.html')
- False
-
- >>> repos.exists('http://www.google.com/index.html')
- True
-
- """
-
- def __init__(self, destpath=os.curdir):
- """Create a DataSource with a local path at destpath."""
- if destpath:
- self._destpath = os.path.abspath(destpath)
- self._istmpdest = False
- else:
- self._destpath = tempfile.mkdtemp()
- self._istmpdest = True
-
- def __del__(self):
- # Remove temp directories
- if self._istmpdest:
- rmtree(self._destpath)
-
- def _iszip(self, filename):
- """Test if the filename is a zip file by looking at the file extension.
- """
- fname, ext = os.path.splitext(filename)
- return ext in _file_openers.keys()
-
- def _iswritemode(self, mode):
- """Test if the given mode will open a file for writing."""
-
- # Currently only used to test the bz2 files.
- _writemodes = ("w", "+")
- for c in mode:
- if c in _writemodes:
- return True
- return False
-
- def _splitzipext(self, filename):
- """Split zip extension from filename and return filename.
-
- *Returns*:
- base, zip_ext : {tuple}
-
- """
-
- if self._iszip(filename):
- return os.path.splitext(filename)
- else:
- return filename, None
-
- def _possible_names(self, filename):
- """Return a tuple containing compressed filename variations."""
- names = [filename]
- if not self._iszip(filename):
- for zipext in _file_openers.keys():
- if zipext:
- names.append(filename+zipext)
- return names
-
- def _isurl(self, path):
- """Test if path is a net location. Tests the scheme and netloc."""
-
- # BUG : URLs require a scheme string ('http://') to be used.
- # www.google.com will fail.
- # Should we prepend the scheme for those that don't have it and
- # test that also? Similar to the way we append .gz and test for
- # for compressed versions of files.
-
- scheme, netloc, upath, uparams, uquery, ufrag = urlparse(path)
- return bool(scheme and netloc)
-
- def _cache(self, path):
- """Cache the file specified by path.
-
- Creates a copy of the file in the datasource cache.
-
- """
-
- upath = self.abspath(path)
-
- # ensure directory exists
- if not os.path.exists(os.path.dirname(upath)):
- os.makedirs(os.path.dirname(upath))
-
- # TODO: Doesn't handle compressed files!
- if self._isurl(path):
- try:
- openedurl = urlopen(path)
- file(upath, 'w').write(openedurl.read())
- except URLError:
- raise URLError("URL not found: ", path)
- else:
- try:
- # TODO: Why not just copy the file with shutils.copyfile?
- fp = file(path, 'r')
- file(upath, 'w').write(fp.read())
- except IOError:
- raise IOError("File not found: ", path)
- return upath
-
- def _findfile(self, path):
- """Searches for ``path`` and returns full path if found.
-
- If path is an URL, _findfile will cache a local copy and return
- the path to the cached file.
- If path is a local file, _findfile will return a path to that local
- file.
-
- The search will include possible compressed versions of the file and
- return the first occurence found.
-
- """
-
- # Build list of possible local file paths
- if not self._isurl(path):
- # Valid local paths
- filelist = self._possible_names(path)
- # Paths in self._destpath
- filelist += self._possible_names(self.abspath(path))
- else:
- # Cached URLs in self._destpath
- filelist = self._possible_names(self.abspath(path))
- # Remote URLs
- filelist = filelist + self._possible_names(path)
-
- for name in filelist:
- if self.exists(name):
- if self._isurl(name):
- name = self._cache(name)
- return name
- return None
-
- def abspath(self, path):
- """Return absolute path of ``path`` in the DataSource directory.
-
- If ``path`` is an URL, the ``abspath`` will be either the location
- the file exists locally or the location it would exist when opened
- using the ``open`` method.
-
- The functionality is idential to os.path.abspath.
-
- *Parameters*:
-
- path : {string}
- Can be a local file or a remote URL.
-
- *Returns*:
-
- Complete path, rooted in the DataSource destination directory.
-
- *See Also*:
-
- `open` : Method that downloads and opens files.
-
- """
-
- # TODO: This should be more robust. Handles case where path includes
- # the destpath, but not other sub-paths. Failing case:
- # path = /home/guido/datafile.txt
- # destpath = /home/alex/
- # upath = self.abspath(path)
- # upath == '/home/alex/home/guido/datafile.txt'
-
- # handle case where path includes self._destpath
- splitpath = path.split(self._destpath, 2)
- if len(splitpath) > 1:
- path = splitpath[1]
- scheme, netloc, upath, uparams, uquery, ufrag = urlparse(path)
- return os.path.join(self._destpath, netloc, upath.strip(os.sep))
-
- def exists(self, path):
- """Test if ``path`` exists.
-
- Test if ``path`` exists as (and in this order):
-
- - a local file.
- - a remote URL that have been downloaded and stored locally in the
- DataSource directory.
- - a remote URL that has not been downloaded, but is valid and
- accessible.
-
- *Parameters*:
-
- path : {string}
- Can be a local file or a remote URL.
-
- *Returns*:
-
- boolean
-
- *See Also*:
-
- `abspath`
-
- *Notes*
-
- When ``path`` is an URL, ``exist`` will return True if it's either
- stored locally in the DataSource directory, or is a valid remote
- URL. DataSource does not discriminate between to two, the file
- is accessible if it exists in either location.
-
- """
-
- # Test local path
- if os.path.exists(path):
- return True
-
- # Test cached url
- upath = self.abspath(path)
- if os.path.exists(upath):
- return True
-
- # Test remote url
- if self._isurl(path):
- try:
- netfile = urlopen(path)
- del(netfile)
- return True
- except URLError:
- return False
- return False
-
- def open(self, path, mode='r'):
- """Open ``path`` with ``mode`` and return the file object.
-
- If ``path`` is an URL, it will be downloaded, stored in the DataSource
- directory and opened from there.
-
- *Parameters*:
-
- path : {string}
-
- mode : {string}, optional
-
-
- *Returns*:
-
- file object
-
- """
-
- # TODO: There is no support for opening a file for writing which
- # doesn't exist yet (creating a file). Should there be?
-
- # TODO: Add a ``subdir`` parameter for specifying the subdirectory
- # used to store URLs in self._destpath.
-
- if self._isurl(path) and self._iswritemode(mode):
- raise ValueError("URLs are not writeable")
-
- # NOTE: _findfile will fail on a new file opened for writing.
- found = self._findfile(path)
- if found:
- _fname, ext = self._splitzipext(found)
- if ext == 'bz2':
- mode.replace("+", "")
- return _file_openers[ext](found, mode=mode)
- else:
- raise IOError("%s not found." % path)
-
-
-class Repository (DataSource):
- """A data Repository where multiple DataSource's share a base URL/directory.
-
- Repository extends DataSource by prepending a base URL (or directory) to
- all the files it handles. Use a Repository when you will be working with
- multiple files from one base URL. Initialize the Respository with the
- base URL, then refer to each file by it's filename only.
-
- *Methods*:
-
- - exists : test if the file exists locally or remotely
- - abspath : get absolute path of the file in the DataSource directory
- - open : open the file
-
- *Toy example*::
-
- # Analyze all files in the repository.
- repos = Repository('/home/user/data/dir/')
- for filename in filelist:
- fp = repos.open(filename)
- fp.analyze()
- fp.close()
-
- # Similarly you could use a URL for a repository.
- repos = Repository('http://www.xyz.edu/data')
-
- """
-
- def __init__(self, baseurl, destpath=os.curdir):
- """Create a Repository with a shared url or directory of baseurl."""
- DataSource.__init__(self, destpath=destpath)
- self._baseurl = baseurl
-
- def __del__(self):
- DataSource.__del__(self)
-
- def _fullpath(self, path):
- """Return complete path for path. Prepends baseurl if necessary."""
- splitpath = path.split(self._baseurl, 2)
- if len(splitpath) == 1:
- result = os.path.join(self._baseurl, path)
- else:
- result = path # path contains baseurl already
- return result
-
- def _findfile(self, path):
- """Extend DataSource method to prepend baseurl to ``path``."""
- return DataSource._findfile(self, self._fullpath(path))
-
- def abspath(self, path):
- """Extend DataSource method to prepend baseurl to ``path``."""
- return DataSource.abspath(self, self._fullpath(path))
-
- def exists(self, path):
- """Extend DataSource method to prepend baseurl to ``path``."""
- return DataSource.exists(self, self._fullpath(path))
-
- def open(self, path, mode='r'):
- """Extend DataSource method to prepend baseurl to ``path``."""
- return DataSource.open(self, self._fullpath(path), mode)
-
- def listdir(self):
- '''List files in the source Repository.'''
- if self._isurl(self._baseurl):
- raise NotImplementedError, \
- "Directory listing of URLs, not supported yet."
- else:
- return os.listdir(self._baseurl)
Copied: trunk/scipy/io/matlab (from rev 3757, branches/io_new/matlab)
Deleted: trunk/scipy/io/matlab/__init__.py
===================================================================
Copied: trunk/scipy/io/matlab/__init__.py (from rev 3757, branches/io_new/matlab/__init__.py)
Deleted: trunk/scipy/io/matlab/mio.py
===================================================================
--- branches/io_new/matlab/mio.py 2008-01-01 02:55:20 UTC (rev 3757)
+++ trunk/scipy/io/matlab/mio.py 2008-01-01 09:33:02 UTC (rev 3758)
@@ -1,133 +0,0 @@
-# Authors: Travis Oliphant, Matthew Brett
-
-"""
-Module for reading and writing matlab (TM) .mat files
-"""
-
-import os
-import sys
-
-from mio4 import MatFile4Reader, MatFile4Writer
-from mio5 import MatFile5Reader, MatFile5Writer
-
-__all__ = ['find_mat_file', 'mat_reader_factory', 'loadmat', 'savemat']
-
-def find_mat_file(file_name, appendmat=True):
- ''' Try to find .mat file on system path
-
- file_name - file name string
- append_mat - If True, and file_name does not end in '.mat', appends it
- '''
- if appendmat and file_name[-4:] == ".mat":
- file_name = file_name[:-4]
- if os.sep in file_name:
- full_name = file_name
- if appendmat:
- full_name = file_name + ".mat"
- else:
- full_name = None
- junk, file_name = os.path.split(file_name)
- for path in [os.curdir] + list(sys.path):
- test_name = os.path.join(path, file_name)
- if appendmat:
- test_name += ".mat"
- try:
- fid = open(test_name,'rb')
- fid.close()
- full_name = test_name
- break
- except IOError:
- pass
- return full_name
-
-def mat_reader_factory(file_name, appendmat=True, **kwargs):
- """Create reader for matlab (TM) .mat format files
-
- See docstring for loadmat for input options
- """
- if isinstance(file_name, basestring):
- full_name = find_mat_file(file_name, appendmat)
- if full_name is None:
- raise IOError, "%s not found on the path." % file_name
- byte_stream = open(full_name, 'rb')
- else:
- try:
- file_name.read(0)
- except AttributeError:
- raise IOError, 'Reader needs file name or open file-like object'
- byte_stream = file_name
-
- MR = MatFile4Reader(byte_stream, **kwargs)
- if MR.format_looks_right():
- return MR
- return MatFile5Reader(byte_stream, **kwargs)
-
-def loadmat(file_name, mdict=None, appendmat=True, basename='raw', **kwargs):
- ''' Load Matlab(tm) file
-
- file_name - Name of the mat file
- (do not need .mat extension if appendmat==True)
- If name not a full path name, search for the file on
- the sys.path list and use the first one found (the
- current directory is searched first).
- Can also pass open file-like object
- m_dict - optional dictionary in which to insert matfile variables
- appendmat - True to append the .mat extension to the end of the
- given filename, if not already present
- base_name - base name for unnamed variables (unused in code)
- byte_order - byte order ('native', 'little', 'BIG')
- in ('native', '=')
- or in ('little', '<')
- or in ('BIG', '>')
- mat_dtype - return arrays in same dtype as loaded into matlab
- (instead of the dtype with which they are saved)
- squeeze_me - whether to squeeze matrix dimensions or not
- chars_as_strings - whether to convert char arrays to string arrays
- mat_dtype - return matrices with datatype that matlab would load as
- (rather than in the datatype matlab saves as)
- matlab_compatible - returns matrices as would be loaded by matlab
- (implies squeeze_me=False, chars_as_strings=False,
- mat_dtype=True)
-
- v4 (Level 1.0), v6 and v7.1 matfiles are supported.
-
- '''
- MR = mat_reader_factory(file_name, appendmat, **kwargs)
- matfile_dict = MR.get_variables()
- if mdict is not None:
- mdict.update(matfile_dict)
- else:
- mdict = matfile_dict
- return mdict
-
-def savemat(file_name, mdict, appendmat=True, format='4'):
- """Save a dictionary of names and arrays into the MATLAB-style .mat file.
-
- This saves the arrayobjects in the given dictionary to a matlab
- style .mat file.
-
- appendmat - if true, appends '.mat' extension to filename, if not present
- format - '4' for matlab 4 mat files, '5' for matlab 5 onwards
- """
- file_is_string = isinstance(file_name, basestring)
- if file_is_string:
- if appendmat and file_name[-4:] != ".mat":
- file_name = file_name + ".mat"
- file_stream = open(file_name, 'wb')
- else:
- try:
- file_name.write('')
- except AttributeError:
- raise IOError, 'Writer needs file name or writeable '\
- 'file-like object'
- file_stream = file_name
-
- if format == '4':
- MW = MatFile4Writer(file_stream)
- elif format == '5':
- MW = MatFile5Writer(file_stream)
- else:
- raise ValueError, 'Format should be 4 or 5'
- MW.put_variables(mdict)
- if file_is_string:
- file_stream.close()
Copied: trunk/scipy/io/matlab/mio.py (from rev 3757, branches/io_new/matlab/mio.py)
Deleted: trunk/scipy/io/matlab/mio4.py
===================================================================
--- branches/io_new/matlab/mio4.py 2008-01-01 02:55:20 UTC (rev 3757)
+++ trunk/scipy/io/matlab/mio4.py 2008-01-01 09:33:02 UTC (rev 3758)
@@ -1,345 +0,0 @@
-''' Classes for read / write of matlab (TM) 4 files
-'''
-
-import numpy as N
-
-from miobase import *
-
-miDOUBLE = 0
-miSINGLE = 1
-miINT32 = 2
-miINT16 = 3
-miUINT16 = 4
-miUINT8 = 5
-
-mdtypes_template = {
- miDOUBLE: 'f8',
- miSINGLE: 'f4',
- miINT32: 'i4',
- miINT16: 'i2',
- miUINT16: 'u2',
- miUINT8: 'u1',
- 'header': [('mopt', 'i4'),
- ('mrows', 'i4'),
- ('ncols', 'i4'),
- ('imagf', 'i4'),
- ('namlen', 'i4')],
- 'U1': 'U1',
- }
-
-np_to_mtypes = {
- 'f8': miDOUBLE,
- 'c32': miDOUBLE,
- 'c24': miDOUBLE,
- 'c16': miDOUBLE,
- 'f4': miSINGLE,
- 'c8': miSINGLE,
- 'i4': miINT32,
- 'i2': miINT16,
- 'u2': miUINT16,
- 'u1': miUINT8,
- 'S1': miUINT8,
- }
-
-# matrix classes
-mxFULL_CLASS = 0
-mxCHAR_CLASS = 1
-mxSPARSE_CLASS = 2
-
-order_codes = {
- 0: '<',
- 1: '>',
- 2: 'VAX D-float', #!
- 3: 'VAX G-float',
- 4: 'Cray', #!!
- }
-
-
-class Mat4ArrayReader(MatArrayReader):
- ''' Class for reading Mat4 arrays
- '''
-
- def matrix_getter_factory(self):
- ''' Read header, return matrix getter '''
- data = self.read_dtype(self.dtypes['header'])
- header = {}
- header['name'] = self.read_ztstring(data['namlen'])
- if data['mopt'] < 0 or data['mopt'] > 5000:
- ValueError, 'Mat 4 mopt wrong format, byteswapping problem?'
- M,rest = divmod(data['mopt'], 1000)
- O,rest = divmod(rest,100)
- P,rest = divmod(rest,10)
- T = rest
- if O != 0:
- raise ValueError, 'O in MOPT integer should be 0, wrong format?'
- header['dtype'] = self.dtypes[P]
- header['mclass'] = T
- header['dims'] = (data['mrows'], data['ncols'])
- header['is_complex'] = data['imagf'] == 1
- remaining_bytes = header['dtype'].itemsize * N.product(header['dims'])
- if header['is_complex'] and not header['mclass'] == mxSPARSE_CLASS:
- remaining_bytes *= 2
- next_pos = self.mat_stream.tell() + remaining_bytes
- if T == mxFULL_CLASS:
- getter = Mat4FullGetter(self, header)
- elif T == mxCHAR_CLASS:
- getter = Mat4CharGetter(self, header)
- elif T == mxSPARSE_CLASS:
- getter = Mat4SparseGetter(self, header)
- else:
- raise TypeError, 'No reader for class code %s' % T
- getter.next_position = next_pos
- return getter
-
-
-class Mat4MatrixGetter(MatMatrixGetter):
-
- # Mat4 variables never global or logical
- is_global = False
- is_logical = False
-
- def read_array(self, copy=True):
- ''' Mat4 read array always uses header dtype and dims
- copy - copies array if True
- (buffer is usually read only)
- a_dtype is assumed to be correct endianness
- '''
- dt = self.header['dtype']
- dims = self.header['dims']
- num_bytes = dt.itemsize
- for d in dims:
- num_bytes *= d
- arr = N.ndarray(shape=dims,
- dtype=dt,
- buffer=self.mat_stream.read(num_bytes),
- order='F')
- if copy:
- arr = arr.copy()
- return arr
-
-
-class Mat4FullGetter(Mat4MatrixGetter):
- def __init__(self, array_reader, header):
- super(Mat4FullGetter, self).__init__(array_reader, header)
- if header['is_complex']:
- self.mat_dtype = N.dtype(N.complex128)
- else:
- self.mat_dtype = N.dtype(N.float64)
-
- def get_raw_array(self):
- if self.header['is_complex']:
- # avoid array copy to save memory
- res = self.read_array(copy=False)
- res_j = self.read_array(copy=False)
- return res + (res_j * 1j)
- return self.read_array()
-
-
-class Mat4CharGetter(Mat4MatrixGetter):
- def get_raw_array(self):
- arr = self.read_array().astype(N.uint8)
- # ascii to unicode
- S = arr.tostring().decode('ascii')
- return N.ndarray(shape=self.header['dims'],
- dtype=N.dtype('U1'),
- buffer = N.array(S)).copy()
-
-
-class Mat4SparseGetter(Mat4MatrixGetter):
- ''' Read sparse matrix type
-
- Matlab (TM) 4 real sparse arrays are saved in a N+1 by 3 array
- format, where N is the number of non-zero values. Column 1 values
- [0:N] are the (1-based) row indices of the each non-zero value,
- column 2 [0:N] are the column indices, column 3 [0:N] are the
- (real) values. The last values [-1,0:2] of the rows, column
- indices are shape[0] and shape[1] respectively of the output
- matrix. The last value for the values column is a padding 0. mrows
- and ncols values from the header give the shape of the stored
- matrix, here [N+1, 3]. Complex data is saved as a 4 column
- matrix, where the fourth column contains the imaginary component;
- the last value is again 0. Complex sparse data do _not_ have the
- header imagf field set to True; the fact that the data are complex
- is only detectable because there are 4 storage columns
- '''
- def get_raw_array(self):
- res = self.read_array()
- tmp = res[:-1,:]
- dims = res[-1,0:2]
- I = N.ascontiguousarray(tmp[:,0],dtype='intc') #fixes byte order also
- J = N.ascontiguousarray(tmp[:,1],dtype='intc')
- I -= 1 # for 1-based indexing
- J -= 1
- if res.shape[1] == 3:
- V = N.ascontiguousarray(tmp[:,2],dtype='float')
- else:
- V = N.ascontiguousarray(tmp[:,2],dtype='complex')
- V.imag = tmp[:,3]
- if have_sparse:
- return scipy.sparse.coo_matrix((V,(I,J)), dims)
- return (dims, I, J, V)
-
-
-class MatFile4Reader(MatFileReader):
- ''' Reader for Mat4 files '''
- def __init__(self, mat_stream, *args, **kwargs):
- self._array_reader = Mat4ArrayReader(
- mat_stream,
- None,
- None,
- )
- super(MatFile4Reader, self).__init__(mat_stream, *args, **kwargs)
- self._array_reader.processor_func = self.processor_func
-
- def set_dtypes(self):
- self.dtypes = self.convert_dtypes(mdtypes_template)
- self._array_reader.dtypes = self.dtypes
-
- def matrix_getter_factory(self):
- return self._array_reader.matrix_getter_factory()
-
- def format_looks_right(self):
- # Mat4 files have a zero somewhere in first 4 bytes
- self.mat_stream.seek(0)
- mopt_bytes = N.ndarray(shape=(4,),
- dtype=N.uint8,
- buffer = self.mat_stream.read(4))
- self.mat_stream.seek(0)
- return 0 in mopt_bytes
-
- def guess_byte_order(self):
- self.mat_stream.seek(0)
- mopt = self.read_dtype(N.dtype('i4'))
- self.mat_stream.seek(0)
- if mopt < 0 or mopt > 5000:
- return ByteOrder.swapped_code
- return ByteOrder.native_code
-
-
-class Mat4MatrixWriter(MatStreamWriter):
-
- def write_header(self, P=0, T=0, imagf=0, dims=None):
- ''' Write header for given data options
- P - mat4 data type
- T - mat4 matrix class
- imagf - complex flag
- dims - matrix dimensions
- '''
- if dims is None:
- dims = self.arr.shape
- header = N.empty((), mdtypes_template['header'])
- M = not ByteOrder.little_endian
- O = 0
- header['mopt'] = (M * 1000 +
- O * 100 +
- P * 10 +
- T)
- header['mrows'] = dims[0]
- header['ncols'] = dims[1]
- header['imagf'] = imagf
- header['namlen'] = len(self.name) + 1
- self.write_bytes(header)
- self.write_string(self.name + '\0')
-
- def arr_to_2d(self):
- self.arr = N.atleast_2d(self.arr)
- dims = self.arr.shape
- if len(dims) > 2:
- self.arr = self.arr.reshape(-1,dims[-1])
-
- def write(self):
- assert False, 'Not implemented'
-
-
-class Mat4NumericWriter(Mat4MatrixWriter):
-
- def write(self):
- self.arr_to_2d()
- imagf = self.arr.dtype.kind == 'c'
- try:
- P = np_to_mtypes[self.arr.dtype.str[1:]]
- except KeyError:
- if imagf:
- self.arr = self.arr.astype('c128')
- else:
- self.arr = self.arr.astype('f8')
- P = miDOUBLE
- self.write_header(P=P,
- T=mxFULL_CLASS,
- imagf=imagf)
- if imagf:
- self.write_bytes(self.arr.real)
- self.write_bytes(self.arr.imag)
- else:
- self.write_bytes(self.arr)
-
-
-class Mat4CharWriter(Mat4MatrixWriter):
-
- def write(self):
- self.arr_to_chars()
- self.arr_to_2d()
- dims = self.arr.shape
- self.write_header(P=miUINT8,
- T=mxCHAR_CLASS)
- if self.arr.dtype.kind == 'U':
- # Recode unicode to ascii
- n_chars = N.product(dims)
- st_arr = N.ndarray(shape=(),
- dtype=self.arr_dtype_number(n_chars),
- buffer=self.arr)
- st = st_arr.item().encode('ascii')
- self.arr = N.ndarray(shape=dims, dtype='S1', buffer=st)
- self.write_bytes(self.arr)
-
-
-class Mat4SparseWriter(Mat4MatrixWriter):
-
- def write(self):
- ''' Sparse matrices are 2D
- See docstring for Mat4SparseGetter
- '''
- imagf = self.arr.dtype.kind == 'c'
- nnz = self.arr.nnz
- ijd = N.zeros((nnz+1, 3+imagf), dtype='f8')
- for i in range(nnz):
- ijd[i,0], ijd[i,1] = self.arr.rowcol(i)
- ijd[:-1,0:2] += 1 # 1 based indexing
- if imagf:
- ijd[:-1,2] = self.arr.data.real
- ijd[:-1,3] = self.arr.data.imag
- else:
- ijd[:-1,2] = self.arr.data
- ijd[-1,0:2] = self.arr.shape
- self.write_header(P=miDOUBLE,
- T=mxSPARSE_CLASS,
- dims=ijd.shape)
- self.write_bytes(ijd)
-
-
-def matrix_writer_factory(stream, arr, name):
- ''' Factory function to return matrix writer given variable to write
- stream - file or file-like stream to write to
- arr - array to write
- name - name in matlab (TM) workspace
- '''
- if have_sparse:
- if scipy.sparse.issparse(arr):
- return Mat4SparseWriter(stream, arr, name)
- arr = N.array(arr)
- dtt = arr.dtype.type
- if dtt is N.object_:
- raise TypeError, 'Cannot save object arrays in Mat4'
- elif dtt is N.void:
- raise TypeError, 'Cannot save void type arrays'
- elif dtt in (N.unicode_, N.string_):
- return Mat4CharWriter(stream, arr, name)
- else:
- return Mat4NumericWriter(stream, arr, name)
-
-
-class MatFile4Writer(MatFileWriter):
-
- def put_variables(self, mdict):
- for name, var in mdict.items():
- matrix_writer_factory(self.file_stream, var, name).write()
Copied: trunk/scipy/io/matlab/mio4.py (from rev 3757, branches/io_new/matlab/mio4.py)
Deleted: trunk/scipy/io/matlab/mio5.py
===================================================================
--- branches/io_new/matlab/mio5.py 2008-01-01 02:55:20 UTC (rev 3757)
+++ trunk/scipy/io/matlab/mio5.py 2008-01-01 09:33:02 UTC (rev 3758)
@@ -1,807 +0,0 @@
-''' Classes for read / write of matlab (TM) 5 files
-'''
-
-# Small fragments of current code adapted from matfile.py by Heiko
-# Henkelmann
-
-## Notice in matfile.py file
-
-# Copyright (c) 2003 Heiko Henkelmann
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to
-# deal in the Software without restriction, including without limitation the
-# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-# sell copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-import zlib
-from copy import copy as pycopy
-from cStringIO import StringIO
-import numpy as N
-
-from miobase import *
-
-try: # Python 2.3 support
- from sets import Set as set
-except:
- pass
-
-miINT8 = 1
-miUINT8 = 2
-miINT16 = 3
-miUINT16 = 4
-miINT32 = 5
-miUINT32 = 6
-miSINGLE = 7
-miDOUBLE = 9
-miINT64 = 12
-miUINT64 = 13
-miMATRIX = 14
-miCOMPRESSED = 15
-miUTF8 = 16
-miUTF16 = 17
-miUTF32 = 18
-
-mxCELL_CLASS = 1
-mxSTRUCT_CLASS = 2
-mxOBJECT_CLASS = 3
-mxCHAR_CLASS = 4
-mxSPARSE_CLASS = 5
-mxDOUBLE_CLASS = 6
-mxSINGLE_CLASS = 7
-mxINT8_CLASS = 8
-mxUINT8_CLASS = 9
-mxINT16_CLASS = 10
-mxUINT16_CLASS = 11
-mxINT32_CLASS = 12
-mxUINT32_CLASS = 13
-
-mdtypes_template = {
- miINT8: 'i1',
- miUINT8: 'u1',
- miINT16: 'i2',
- miUINT16: 'u2',
- miINT32: 'i4',
- miUINT32: 'u4',
- miSINGLE: 'f4',
- miDOUBLE: 'f8',
- miINT64: 'i8',
- miUINT64: 'u8',
- miUTF8: 'u1',
- miUTF16: 'u2',
- miUTF32: 'u4',
- 'file_header': [('description', 'S116'),
- ('subsystem_offset', 'i8'),
- ('version', 'u2'),
- ('endian_test', 'S2')],
- 'tag_full': [('mdtype', 'u4'), ('byte_count', 'u4')],
- 'array_flags': [('data_type', 'u4'),
- ('byte_count', 'u4'),
- ('flags_class','u4'),
- ('nzmax', 'u4')],
- 'U1': 'U1',
- }
-
-mclass_dtypes_template = {
- mxINT8_CLASS: 'i1',
- mxUINT8_CLASS: 'u1',
- mxINT16_CLASS: 'i2',
- mxUINT16_CLASS: 'u2',
- mxINT32_CLASS: 'i4',
- mxUINT32_CLASS: 'u4',
- mxSINGLE_CLASS: 'f4',
- mxDOUBLE_CLASS: 'f8',
- }
-
-
-np_to_mtypes = {
- 'f8': miDOUBLE,
- 'c32': miDOUBLE,
- 'c24': miDOUBLE,
- 'c16': miDOUBLE,
- 'f4': miSINGLE,
- 'c8': miSINGLE,
- 'i1': miINT8,
- 'i2': miINT16,
- 'i4': miINT32,
- 'u1': miUINT8,
- 'u4': miUINT32,
- 'u2': miUINT16,
- 'S1': miUINT8,
- 'U1': miUTF16,
- }
-
-
-np_to_mxtypes = {
- 'f8': mxDOUBLE_CLASS,
- 'c32': mxDOUBLE_CLASS,
- 'c24': mxDOUBLE_CLASS,
- 'c16': mxDOUBLE_CLASS,
- 'f4': mxSINGLE_CLASS,
- 'c8': mxSINGLE_CLASS,
- 'i4': mxINT32_CLASS,
- 'i2': mxINT16_CLASS,
- 'u2': mxUINT16_CLASS,
- 'u1': mxUINT8_CLASS,
- 'S1': mxUINT8_CLASS,
- }
-
-
-
-''' Before release v7.1 (release 14) matlab (TM) used the system
-default character encoding scheme padded out to 16-bits. Release 14
-and later use Unicode. When saving character data, R14 checks if it
-can be encoded in 7-bit ascii, and saves in that format if so.'''
-
-codecs_template = {
- miUTF8: {'codec': 'utf_8', 'width': 1},
- miUTF16: {'codec': 'utf_16', 'width': 2},
- miUTF32: {'codec': 'utf_32','width': 4},
- }
-
-miUINT16_codec = sys.getdefaultencoding()
-
-mx_numbers = (
- mxDOUBLE_CLASS,
- mxSINGLE_CLASS,
- mxINT8_CLASS,
- mxUINT8_CLASS,
- mxINT16_CLASS,
- mxUINT16_CLASS,
- mxINT32_CLASS,
- mxUINT32_CLASS,
- )
-
-class mat_struct(object):
- ''' Placeholder for holding read data from structs '''
- pass
-
-class mat_obj(object):
- ''' Placeholder for holding read data from objects '''
- pass
-
-class Mat5ArrayReader(MatArrayReader):
- ''' Class to get Mat5 arrays
-
- Provides element reader functions, header reader, matrix reader
- factory function
- '''
-
- def __init__(self, mat_stream, dtypes, processor_func, codecs, class_dtypes):
- super(Mat5ArrayReader, self).__init__(mat_stream,
- dtypes,
- processor_func,
- )
- self.codecs = codecs
- self.class_dtypes = class_dtypes
-
- def read_element(self, copy=True):
- raw_tag = self.mat_stream.read(8)
- tag = N.ndarray(shape=(),
- dtype=self.dtypes['tag_full'],
- buffer = raw_tag)
- mdtype = tag['mdtype'].item()
- byte_count = mdtype >> 16
- if byte_count: # small data element format
- if byte_count > 4:
- raise ValueError, 'Too many bytes for sde format'
- mdtype = mdtype & 0xFFFF
- dt = self.dtypes[mdtype]
- el_count = byte_count / dt.itemsize
- return N.ndarray(shape=(el_count,),
- dtype=dt,
- buffer=raw_tag[4:])
- byte_count = tag['byte_count'].item()
- if mdtype == miMATRIX:
- return self.current_getter(byte_count).get_array()
- if mdtype in self.codecs: # encoded char data
- raw_str = self.mat_stream.read(byte_count)
- codec = self.codecs[mdtype]
- if not codec:
- raise TypeError, 'Do not support encoding %d' % mdtype
- el = raw_str.decode(codec)
- else: # numeric data
- dt = self.dtypes[mdtype]
- el_count = byte_count / dt.itemsize
- el = N.ndarray(shape=(el_count,),
- dtype=dt,
- buffer=self.mat_stream.read(byte_count))
- if copy:
- el = el.copy()
- mod8 = byte_count % 8
- if mod8:
- self.mat_stream.seek(8 - mod8, 1)
- return el
-
- def matrix_getter_factory(self):
- ''' Returns reader for next matrix at top level '''
- tag = self.read_dtype(self.dtypes['tag_full'])
- mdtype = tag['mdtype'].item()
- byte_count = tag['byte_count'].item()
- next_pos = self.mat_stream.tell() + byte_count
- if mdtype == miCOMPRESSED:
- getter = Mat5ZArrayReader(self, byte_count).matrix_getter_factory()
- elif not mdtype == miMATRIX:
- raise TypeError, \
- 'Expecting miMATRIX type here, got %d' % mdtype
- else:
- getter = self.current_getter(byte_count)
- getter.next_position = next_pos
- return getter
-
- def current_getter(self, byte_count):
- ''' Return matrix getter for current stream position
-
- Returns matrix getters at top level and sub levels
- '''
- if not byte_count: # an empty miMATRIX can contain no bytes
- return Mat5EmptyMatrixGetter(self)
- af = self.read_dtype(self.dtypes['array_flags'])
- header = {}
- flags_class = af['flags_class']
- mc = flags_class & 0xFF
- header['mclass'] = mc
- header['is_logical'] = flags_class >> 9 & 1
- header['is_global'] = flags_class >> 10 & 1
- header['is_complex'] = flags_class >> 11 & 1
- header['nzmax'] = af['nzmax']
- header['dims'] = self.read_element()
- header['name'] = self.read_element().tostring()
- if mc in mx_numbers:
- return Mat5NumericMatrixGetter(self, header)
- if mc == mxSPARSE_CLASS:
- return Mat5SparseMatrixGetter(self, header)
- if mc == mxCHAR_CLASS:
- return Mat5CharMatrixGetter(self, header)
- if mc == mxCELL_CLASS:
- return Mat5CellMatrixGetter(self, header)
- if mc == mxSTRUCT_CLASS:
- return Mat5StructMatrixGetter(self, header)
- if mc == mxOBJECT_CLASS:
- return Mat5ObjectMatrixGetter(self, header)
- raise TypeError, 'No reader for class code %s' % mc
-
-
-class Mat5ZArrayReader(Mat5ArrayReader):
- ''' Getter for compressed arrays
-
- Reads and uncompresses gzipped stream on init, providing wrapper
- for this new sub-stream.
- '''
- def __init__(self, array_reader, byte_count):
- '''Reads and uncompresses gzipped stream'''
- data = array_reader.mat_stream.read(byte_count)
- super(Mat5ZArrayReader, self).__init__(
- StringIO(zlib.decompress(data)),
- array_reader.dtypes,
- array_reader.processor_func,
- array_reader.codecs,
- array_reader.class_dtypes)
-
-
-class Mat5MatrixGetter(MatMatrixGetter):
- ''' Base class for getting Mat5 matrices
-
- Gets current read information from passed array_reader
- '''
-
- def __init__(self, array_reader, header):
- super(Mat5MatrixGetter, self).__init__(array_reader, header)
- self.class_dtypes = array_reader.class_dtypes
- self.codecs = array_reader.codecs
- self.is_global = header['is_global']
- self.mat_dtype = None
-
- def read_element(self, *args, **kwargs):
- return self.array_reader.read_element(*args, **kwargs)
-
-
-class Mat5EmptyMatrixGetter(Mat5MatrixGetter):
- ''' Dummy class to return empty array for empty matrix
- '''
- def __init__(self, array_reader):
- self.array_reader = array_reader
- self.mat_stream = array_reader.mat_stream
- self.data_position = self.mat_stream.tell()
- self.header = {}
- self.is_global = False
- self.mat_dtype = 'f8'
-
- def get_raw_array(self):
- return N.array([[]])
-
-
-class Mat5NumericMatrixGetter(Mat5MatrixGetter):
-
- def __init__(self, array_reader, header):
- super(Mat5NumericMatrixGetter, self).__init__(array_reader, header)
- if header['is_logical']:
- self.mat_dtype = N.dtype('bool')
- else:
- self.mat_dtype = self.class_dtypes[header['mclass']]
-
- def get_raw_array(self):
- if self.header['is_complex']:
- # avoid array copy to save memory
- res = self.read_element(copy=False)
- res_j = self.read_element(copy=False)
- res = res + (res_j * 1j)
- else:
- res = self.read_element()
- return N.ndarray(shape=self.header['dims'],
- dtype=res.dtype,
- buffer=res,
- order='F')
-
-
-class Mat5SparseMatrixGetter(Mat5MatrixGetter):
- def get_raw_array(self):
- rowind = self.read_element()
- indptr = self.read_element()
- if self.header['is_complex']:
- # avoid array copy to save memory
- data = self.read_element(copy=False)
- data_j = self.read_element(copy=False)
- data = data + (data_j * 1j)
- else:
- data = self.read_element()
- ''' From the matlab (TM) API documentation, last found here:
- http://www.mathworks.com/access/helpdesk/help/techdoc/matlab_external/
- rowind are simply the row indices for all the (res) non-zero
- entries in the sparse array. rowind has nzmax entries, so
- may well have more entries than len(res), the actual number
- of non-zero entries, but rowind[len(res):] can be discarded
- and should be 0. indptr has length (number of columns + 1),
- and is such that, if D = diff(colind), D[j] gives the number
- of non-zero entries in column j. Because rowind values are
- stored in column order, this gives the column corresponding to
- each rowind
- '''
- if have_sparse:
- dims = self.header['dims']
- return scipy.sparse.csc_matrix((data,rowind,indptr), dims)
- else:
- return (dims, data, rowind, indptr)
-
-
-class Mat5CharMatrixGetter(Mat5MatrixGetter):
- def get_raw_array(self):
- res = self.read_element()
- # Convert non-string types to unicode
- if isinstance(res, N.ndarray):
- if res.dtype.type == N.uint16:
- codec = miUINT16_codec
- if self.codecs['uint16_len'] == 1:
- res = res.astype(N.uint8)
- elif res.dtype.type in (N.uint8, N.int8):
- codec = 'ascii'
- else:
- raise TypeError, 'Did not expect type %s' % res.dtype
- res = res.tostring().decode(codec)
- return N.ndarray(shape=self.header['dims'],
- dtype=N.dtype('U1'),
- buffer=N.array(res),
- order='F').copy()
-
-
-class Mat5CellMatrixGetter(Mat5MatrixGetter):
- def get_raw_array(self):
- # Account for fortran indexing of cells
- tupdims = tuple(self.header['dims'][::-1])
- length = N.product(tupdims)
- result = N.empty(length, dtype=object)
- for i in range(length):
- result[i] = self.get_item()
- return result.reshape(tupdims).T
-
- def get_item(self):
- return self.read_element()
-
-
-class Mat5StructMatrixGetter(Mat5CellMatrixGetter):
- def __init__(self, *args, **kwargs):
- super(Mat5StructMatrixGetter, self).__init__(*args, **kwargs)
- self.obj_template = mat_struct()
-
- def get_raw_array(self):
- namelength = self.read_element()[0]
- # get field names
- names = self.read_element()
- splitnames = [names[i:i+namelength] for i in \
- xrange(0,len(names),namelength)]
- self.obj_template._fieldnames = [x.tostring().strip('\x00')
- for x in splitnames]
- return super(Mat5StructMatrixGetter, self).get_raw_array()
-
- def get_item(self):
- item = pycopy(self.obj_template)
- for element in item._fieldnames:
- item.__dict__[element] = self.read_element()
- return item
-
-
-class Mat5ObjectMatrixGetter(Mat5StructMatrixGetter):
- def __init__(self, *args, **kwargs):
- super(Mat5StructMatrixGetter, self).__init__(*args, **kwargs)
- self.obj_template = mat_obj()
-
- def get_raw_array(self):
- self.obj_template._classname = self.read_element().tostring()
- return super(Mat5ObjectMatrixGetter, self).get_raw_array()
-
-
-class MatFile5Reader(MatFileReader):
- ''' Reader for Mat 5 mat files
-
- Adds the following attribute to base class
-
- uint16_codec - char codec to use for uint16 char arrays
- (defaults to system default codec)
- '''
-
- def __init__(self,
- mat_stream,
- byte_order=None,
- mat_dtype=False,
- squeeze_me=True,
- chars_as_strings=True,
- matlab_compatible=False,
- uint16_codec=None
- ):
- self.codecs = {}
- self._array_reader = Mat5ArrayReader(
- mat_stream,
- None,
- None,
- None,
- None,
- )
- super(MatFile5Reader, self).__init__(
- mat_stream,
- byte_order,
- mat_dtype,
- squeeze_me,
- chars_as_strings,
- matlab_compatible,
- )
- self._array_reader.processor_func = self.processor_func
- self.uint16_codec = uint16_codec
-
- def get_uint16_codec(self):
- return self._uint16_codec
- def set_uint16_codec(self, uint16_codec):
- if not uint16_codec:
- uint16_codec = sys.getdefaultencoding()
- # Set length of miUINT16 char encoding
- self.codecs['uint16_len'] = len(" ".encode(uint16_codec)) \
- - len(" ".encode(uint16_codec))
- self.codecs['uint16_codec'] = uint16_codec
- self._array_reader.codecs = self.codecs
- self._uint16_codec = uint16_codec
- uint16_codec = property(get_uint16_codec,
- set_uint16_codec,
- None,
- 'get/set uint16_codec')
-
- def set_dtypes(self):
- ''' Set dtypes and codecs '''
- self.dtypes = self.convert_dtypes(mdtypes_template)
- self.class_dtypes = self.convert_dtypes(mclass_dtypes_template)
- codecs = {}
- postfix = self.order_code == '<' and '_le' or '_be'
- for k, v in codecs_template.items():
- codec = v['codec']
- try:
- " ".encode(codec)
- except LookupError:
- codecs[k] = None
- continue
- if v['width'] > 1:
- codec += postfix
- codecs[k] = codec
- self.codecs.update(codecs)
- self.update_array_reader()
-
- def update_array_reader(self):
- self._array_reader.codecs = self.codecs
- self._array_reader.dtypes = self.dtypes
- self._array_reader.class_dtypes = self.class_dtypes
-
- def matrix_getter_factory(self):
- return self._array_reader.matrix_getter_factory()
-
- def guess_byte_order(self):
- self.mat_stream.seek(126)
- mi = self.mat_stream.read(2)
- self.mat_stream.seek(0)
- return mi == 'IM' and '<' or '>'
-
- def file_header(self):
- ''' Read in mat 5 file header '''
- hdict = {}
- hdr = self.read_dtype(self.dtypes['file_header'])
- hdict['__header__'] = hdr['description'].item().strip(' \t\n\000')
- v_major = hdr['version'] >> 8
- v_minor = hdr['version'] & 0xFF
- hdict['__version__'] = '%d.%d' % (v_major, v_minor)
- return hdict
-
- def format_looks_right(self):
- # Mat4 files have a zero somewhere in first 4 bytes
- self.mat_stream.seek(0)
- mopt_bytes = N.ndarray(shape=(4,),
- dtype=N.uint8,
- buffer = self.mat_stream.read(4))
- self.mat_stream.seek(0)
- return 0 not in mopt_bytes
-
-
-class Mat5MatrixWriter(MatStreamWriter):
-
- mat_tag = N.zeros((), mdtypes_template['tag_full'])
- mat_tag['mdtype'] = miMATRIX
-
- def __init__(self, file_stream, arr, name, is_global=False):
- super(Mat5MatrixWriter, self).__init__(file_stream, arr, name)
- self.is_global = is_global
-
- def write_dtype(self, arr):
- self.file_stream.write(arr.tostring())
-
- def write_element(self, arr, mdtype=None):
- # write tag, data
- tag = N.zeros((), mdtypes_template['tag_full'])
- if mdtype is None:
- tag['mdtype'] = np_to_mtypes[arr.dtype.str[1:]]
- else:
- tag['mdtype'] = mdtype
- tag['byte_count'] = arr.size*arr.itemsize
- self.write_dtype(tag)
- self.write_bytes(arr)
- # do 8 byte padding if needed
- if tag['byte_count']%8 != 0:
- pad = (1+tag['byte_count']//8)*8 - tag['byte_count']
- self.write_bytes(N.zeros((pad,),dtype='u1'))
-
- def write_header(self, mclass,
- is_global=False,
- is_complex=False,
- is_logical=False,
- nzmax=0):
- ''' Write header for given data options
- mclass - mat5 matrix class
- is_global - True if matrix is global
- is_complex - True is matrix is complex
- is_logical - True if matrix is logical
- nzmax - max non zero elements for sparse arrays
- '''
- self._mat_tag_pos = self.file_stream.tell()
- self.write_dtype(self.mat_tag)
- # write array flags (complex, global, logical, class, nzmax)
- af = N.zeros((), mdtypes_template['array_flags'])
- af['data_type'] = miUINT32
- af['byte_count'] = 8
- flags = is_complex << 3 | is_global << 2 | is_logical << 1
- af['flags_class'] = mclass | flags << 8
- af['nzmax'] = nzmax
- self.write_dtype(af)
- # write array shape
- self.arr=N.atleast_2d(self.arr)
- self.write_element(N.array(self.arr.shape, dtype='i4'))
- # write name
- self.write_element(N.ndarray(shape=len(self.name), dtype='S1', buffer=self.name))
-
- def update_matrix_tag(self):
- curr_pos = self.file_stream.tell()
- self.file_stream.seek(self._mat_tag_pos)
- self.mat_tag['byte_count'] = curr_pos - self._mat_tag_pos - 8
- self.write_dtype(self.mat_tag)
- self.file_stream.seek(curr_pos)
-
- def write(self):
- assert False, 'Not implemented'
-
-
-class Mat5NumericWriter(Mat5MatrixWriter):
-
- def write(self):
- imagf = self.arr.dtype.kind == 'c'
- try:
- mclass = np_to_mxtypes[self.arr.dtype.str[1:]]
- except KeyError:
- if imagf:
- self.arr = self.arr.astype('c128')
- else:
- self.arr = self.arr.astype('f8')
- mclass = mxDOUBLE_CLASS
- self.write_header(mclass=mclass,is_complex=imagf)
- if imagf:
- self.write_element(self.arr.real)
- self.write_element(self.arr.imag)
- else:
- self.write_element(self.arr)
- self.update_matrix_tag()
-
-class Mat5CharWriter(Mat5MatrixWriter):
- codec='ascii'
- def write(self):
- self.arr_to_chars()
- self.write_header(mclass=mxCHAR_CLASS)
- if self.arr.dtype.kind == 'U':
- # Recode unicode using self.codec
- n_chars = N.product(self.arr.shape)
- st_arr = N.ndarray(shape=(),
- dtype=self.arr_dtype_number(n_chars),
- buffer=self.arr)
- st = st_arr.item().encode(self.codec)
- self.arr = N.ndarray(shape=(len(st)), dtype='u1', buffer=st)
- self.write_element(self.arr,mdtype=miUTF8)
- self.update_matrix_tag()
-
-class Mat5UniCharWriter(Mat5CharWriter):
- codec='UTF8'
-
-
-class Mat5SparseWriter(Mat5MatrixWriter):
-
- def write(self):
- ''' Sparse matrices are 2D
- See docstring for Mat5SparseGetter
- '''
- imagf = self.arr.dtype.kind == 'c'
- N = self.arr.nnz
- ijd = N.zeros((N+1, 3+imagf), dtype='f8')
- for i in range(N):
- ijd[i,0], ijd[i,1] = self.arr.rowcol(i)
- ijd[:-1,0:2] += 1 # 1 based indexing
- if imagf:
- ijd[:-1,2] = self.arr.data.real
- ijd[:-1,3] = self.arr.data.imag
- else:
- ijd[:-1,2] = self.arr.data
- ijd[-1,0:2] = self.arr.shape
- self.write_header(P=miDOUBLE,
- T=mxSPARSE_CLASS,
- dims=ijd.shape)
- self.write_bytes(ijd)
-
-
-class Mat5WriterGetter(object):
- ''' Wraps stream and options, provides methods for getting Writer objects '''
- def __init__(self, stream, unicode_strings):
- self.stream = stream
- self.unicode_strings = unicode_strings
-
- def rewind(self):
- self.stream.seek(0)
-
- def matrix_writer_factory(self, arr, name, is_global=False):
- ''' Factory function to return matrix writer given variable to write
- stream - file or file-like stream to write to
- arr - array to write
- name - name in matlab (TM) workspace
- '''
- if have_sparse:
- if scipy.sparse.issparse(arr):
- return Mat5SparseWriter(self.stream, arr, name, is_global)
- arr = N.array(arr)
- if arr.dtype.hasobject:
- types, arr_type = self.classify_mobjects(arr)
- if arr_type == 'c':
- return Mat5CellWriter(self.stream, arr, name, is_global, types)
- elif arr_type == 's':
- return Mat5StructWriter(self.stream, arr, name, is_global)
- elif arr_type == 'o':
- return Mat5ObjectWriter(self.stream, arr, name, is_global)
- if arr.dtype.kind in ('U', 'S'):
- if self.unicode_strings:
- return Mat5UniCharWriter(self.stream, arr, name, is_global)
- else:
- return Mat5CharWriter(self.stream, arr, name, is_global)
- else:
- return Mat5NumericWriter(self.stream, arr, name, is_global)
-
- def classify_mobjects(self, objarr):
- ''' Function to classify objects passed for writing
- returns
- types - S1 array of same shape as objarr with codes for each object
- i - invalid object
- a - ndarray
- s - matlab struct
- o - matlab object
- arr_type - one of
- c - cell array
- s - struct array
- o - object array
- '''
- n = objarr.size
- types = N.empty((n,), dtype='S1')
- types[:] = 'i'
- type_set = set()
- flato = objarr.flat
- for i in range(n):
- obj = flato[i]
- if isinstance(obj, N.ndarray):
- types[i] = 'a'
- continue
- try:
- fns = tuple(obj._fieldnames)
- except AttributeError:
- continue
- try:
- cn = obj._classname
- except AttributeError:
- types[i] = 's'
- type_set.add(fns)
- continue
- types[i] = 'o'
- type_set.add((cn, fns))
- arr_type = 'c'
- if len(set(types))==1 and len(type_set) == 1:
- arr_type = types[0]
- return types.reshape(objarr.shape), arr_type
-
-
-class MatFile5Writer(MatFileWriter):
- ''' Class for writing mat5 files '''
- def __init__(self, file_stream,
- do_compression=False,
- unicode_strings=False,
- global_vars=None):
- super(MatFile5Writer, self).__init__(file_stream)
- self.do_compression = do_compression
- if global_vars:
- self.global_vars = global_vars
- else:
- self.global_vars = []
- self.writer_getter = Mat5WriterGetter(
- StringIO(),
- unicode_strings)
- # write header
- import os, time
- hdr = N.zeros((), mdtypes_template['file_header'])
- hdr['description']='MATLAB 5.0 MAT-file Platform: %s, Created on: %s' % (
- os.name,time.asctime())
- hdr['version']= 0x0100
- hdr['endian_test']=N.ndarray(shape=(),dtype='S2',buffer=N.uint16(0x4d49))
- file_stream.write(hdr.tostring())
-
- def get_unicode_strings(self):
- return self.write_getter.unicode_strings
- def set_unicode_strings(self, unicode_strings):
- self.writer_getter.unicode_strings = unicode_strings
- unicode_strings = property(get_unicode_strings,
- set_unicode_strings,
- None,
- 'get/set unicode strings property')
-
- def put_variables(self, mdict):
- for name, var in mdict.items():
- is_global = name in self.global_vars
- self.writer_getter.rewind()
- self.writer_getter.matrix_writer_factory(
- var,
- name,
- is_global,
- ).write()
- stream = self.writer_getter.stream
- if self.do_compression:
- str = zlib.compress(stream.getvalue(stream.tell()))
- tag = N.empty((), mdtypes_template['tag_full'])
- tag['mdtype'] = miCOMPRESSED
- tag['byte_count'] = len(str)
- self.file_stream.write(tag.tostring() + str)
- else:
- self.file_stream.write(stream.getvalue(stream.tell()))
Copied: trunk/scipy/io/matlab/mio5.py (from rev 3757, branches/io_new/matlab/mio5.py)
Deleted: trunk/scipy/io/matlab/miobase.py
===================================================================
--- branches/io_new/matlab/miobase.py 2008-01-01 02:55:20 UTC (rev 3757)
+++ trunk/scipy/io/matlab/miobase.py 2008-01-01 09:33:02 UTC (rev 3758)
@@ -1,379 +0,0 @@
-# Authors: Travis Oliphant, Matthew Brett
-
-"""
-Base classes for matlab (TM) file stream reading
-"""
-
-import sys
-
-import numpy as N
-
-try:
- import scipy.sparse
- have_sparse = 1
-except ImportError:
- have_sparse = 0
-
-
-def small_product(arr):
- ''' Faster than product for small arrays '''
- res = 1
- for e in arr:
- res *= e
- return res
-
-class ByteOrder(object):
- ''' Namespace for byte ordering '''
- little_endian = sys.byteorder == 'little'
- native_code = little_endian and '<' or '>'
- swapped_code = little_endian and '>' or '<'
-
- def to_numpy_code(code):
- if code is None:
- return ByteOrder.native_code
- if code in ('little', '<', 'l', 'L'):
- return '<'
- elif code in ('BIG', '>', 'B', 'b'):
- return '>'
- elif code in ('native', '='):
- return ByteOrder.native_code
- elif code in ('swapped'):
- return ByteOrder.swapped_code
- else:
- raise ValueError, 'We cannot handle byte order %s' % byte_order
- to_numpy_code = staticmethod(to_numpy_code)
-
-
-class MatStreamAgent(object):
- ''' Base object for readers / getters from mat file streams
-
- Attaches to initialized stream
-
- Base class for "getters" - which do store state of what they are
- reading on itialization, and therefore need to be initialized
- before each read, and "readers" which do not store state, and only
- need to be initialized once on object creation
-
- Implements common array reading functions
-
- Inputs mat_steam - MatFileReader object
- '''
-
- def __init__(self, mat_stream):
- self.mat_stream = mat_stream
-
- def read_dtype(self, a_dtype):
- ''' Generic get of byte stream data of known type
-
- Inputs
- a_dtype - dtype of array
-
- a_dtype is assumed to be correct endianness
- '''
- num_bytes = a_dtype.itemsize
- arr = N.ndarray(shape=(),
- dtype=a_dtype,
- buffer=self.mat_stream.read(num_bytes),
- order='F')
- return arr
-
- def read_ztstring(self, num_bytes):
- return self.mat_stream.read(num_bytes).strip('\x00')
-
-
-class MatFileReader(MatStreamAgent):
- """ Base object for reading mat files
-
- mat_stream - initialized byte stream object - file io interface object
- byte_order - byte order ('native', 'little', 'BIG')
- in ('native', '=')
- or in ('little', '<')
- or in ('BIG', '>')
- mat_dtype - return arrays in same dtype as loaded into matlab
- (instead of the dtype with which they are saved)
- squeeze_me - whether to squeeze unit dimensions or not
- chars_as_strings - whether to convert char arrays to string arrays
- mat_dtype - return matrices with datatype that matlab would load as
- (rather than in the datatype matlab saves as)
- matlab_compatible - returns matrices as would be loaded by matlab
- (implies squeeze_me=False, chars_as_strings=False
- mat_dtype=True)
-
- To make this class functional, you will need to override the
- following methods:
-
- set_dtypes - sets data types defs from byte order
- matrix_getter_factory - gives object to fetch next matrix from stream
- format_looks_right - returns True if format looks correct for
- this file type (Mat4, Mat5)
- guess_byte_order - guesses file byte order from file
- """
-
- def __init__(self, mat_stream,
- byte_order=None,
- mat_dtype=False,
- squeeze_me=True,
- chars_as_strings=True,
- matlab_compatible=False,
- ):
- # Initialize stream
- self.mat_stream = mat_stream
- self.dtypes = {}
- if not byte_order:
- byte_order = self.guess_byte_order()
- self.order_code = byte_order # sets dtypes and other things too
- if matlab_compatible:
- self.set_matlab_compatible()
- else:
- self._squeeze_me = squeeze_me
- self._chars_as_strings = chars_as_strings
- self._mat_dtype = mat_dtype
- self.processor_func = self.get_processor_func()
-
- def set_matlab_compatible(self):
- ''' Sets options to return arrays as matlab (tm) loads them '''
- self._mat_dtype = True
- self._squeeze_me = False
- self._chars_as_strings = False
- self.processor_func = self.get_processor_func()
-
- def get_mat_dtype(self):
- return self._mat_dtype
- def set_mat_dtype(self, mat_dtype):
- self._mat_dtype = mat_dtype
- self.processor_func = self.get_processor_func()
- mat_dtype = property(get_mat_dtype,
- set_mat_dtype,
- None,
- 'get/set mat_dtype property')
-
- def get_squeeze_me(self):
- return self._squeeze_me
- def set_squeeze_me(self, squeeze_me):
- self._squeeze_me = squeeze_me
- self.processor_func = self.get_processor_func()
- squeeze_me = property(get_squeeze_me,
- set_squeeze_me,
- None,
- 'get/set squeeze me property')
-
- def get_chars_as_strings(self):
- return self._chars_as_strings
- def set_chars_as_strings(self, chars_as_strings):
- self._chars_as_strings = chars_as_strings
- self.processor_func = self.get_processor_func()
- chars_as_strings = property(get_chars_as_strings,
- set_chars_as_strings,
- None,
- 'get/set squeeze me property')
-
- def get_order_code(self):
- return self._order_code
- def set_order_code(self, order_code):
- order_code = ByteOrder.to_numpy_code(order_code)
- self._order_code = order_code
- self.set_dtypes()
- order_code = property(get_order_code,
- set_order_code,
- None,
- 'get/set order code')
-
- def set_dtypes(self):
- assert False, 'Not implemented'
-
- def convert_dtypes(self, dtype_template):
- dtypes = dtype_template.copy()
- for k in dtypes:
- dtypes[k] = N.dtype(dtypes[k]).newbyteorder(
- self.order_code)
- return dtypes
-
- def matrix_getter_factory(self):
- assert False, 'Not implemented'
-
- def format_looks_right(self):
- "Return True if the format looks right for this object"
- assert False, 'Not implemented'
-
- def file_header(self):
- return {}
-
- def guess_byte_order(self):
- assert 0, 'Not implemented'
-
- def get_processor_func(self):
- ''' Processing to apply to read matrices
-
- Function applies options to matrices. We have to pass this
- function into the reader routines because Mat5 matrices
- occur as submatrices - in cell arrays, structs and objects -
- so we will not see these in the main variable getting routine
- here.
-
- The read array is the first argument.
- The getter, passed as second argument to the function, must
- define properties, iff mat_dtype option is True:
-
- mat_dtype - data type when loaded into matlab (tm)
- (None for no conversion)
-
- func returns the processed array
- '''
-
- def func(arr, getter):
- if arr.dtype.kind == 'U' and self.chars_as_strings:
- # Convert char array to string or array of strings
- dims = arr.shape
- if len(dims) >= 2: # return array of strings
- dtt = self.order_code + 'U'
- n_dims = dims[:-1]
- str_arr = arr.reshape(
- (small_product(n_dims),
- dims[-1]))
- arr = N.empty(n_dims, dtype=object)
- for i in range(0, n_dims[-1]):
- arr[...,i] = self.chars_to_str(str_arr[i])
- else: # return string
- arr = self.chars_to_str(arr)
- if self.mat_dtype:
- # Apply options to replicate matlab's (TM)
- # load into workspace
- if getter.mat_dtype is not None:
- arr = arr.astype(getter.mat_dtype)
- if self.squeeze_me:
- arr = N.squeeze(arr)
- if not arr.size:
- arr = N.array([])
- elif not arr.shape: # 0d coverted to scalar
- arr = arr.item()
- return arr
- return func
-
- def chars_to_str(self, str_arr):
- ''' Convert string array to string '''
- dt = N.dtype('U' + str(small_product(str_arr.shape)))
- return N.ndarray(shape=(),
- dtype = dt,
- buffer = str_arr.copy()).item()
-
- def get_variables(self, variable_names=None):
- ''' get variables from stream as dictionary
-
- variable_names - optional list of variable names to get
-
- If variable_names is None, then get all variables in file
- '''
- if isinstance(variable_names, basestring):
- variable_names = [variable_names]
- self.mat_stream.seek(0)
- mdict = self.file_header()
- mdict['__globals__'] = []
- while not self.end_of_stream():
- getter = self.matrix_getter_factory()
- name = getter.name
- if variable_names and name not in variable_names:
- getter.to_next()
- continue
- res = getter.get_array()
- mdict[name] = res
- if getter.is_global:
- mdict['__globals__'].append(name)
- if variable_names:
- variable_names.remove(name)
- if not variable_names:
- break
- return mdict
-
- def end_of_stream(self):
- b = self.mat_stream.read(1)
- self.mat_stream.seek(-1,1)
- return len(b) == 0
-
-
-class MatMatrixGetter(MatStreamAgent):
- """ Base class for matrix getters
-
- Getters are stateful versions of agents, and record state of
- current read on initialization, so need to be created for each
- read - one-shot objects.
-
- MatrixGetters are initialized with the content of the matrix
- header
-
- Accepts
- array_reader - array reading object (see below)
- header - header dictionary for matrix being read
- """
-
- def __init__(self, array_reader, header):
- super(MatMatrixGetter, self).__init__(array_reader.mat_stream)
- self.array_reader = array_reader
- self.dtypes = array_reader.dtypes
- self.header = header
- self.name = header['name']
-
- def get_array(self):
- ''' Gets an array from matrix, and applies any necessary processing '''
- arr = self.get_raw_array()
- return self.array_reader.processor_func(arr, self)
-
- def get_raw_array(self):
- assert False, 'Not implemented'
-
- def to_next(self):
- self.mat_stream.seek(self.next_position)
-
-
-class MatArrayReader(MatStreamAgent):
- ''' Base class for array readers
-
- The array_reader contains information about the current reading
- process, such as byte ordered dtypes and the processing function
- to apply to matrices as they are read, as well as routines for
- reading matrix compenents.
- '''
-
- def __init__(self, mat_stream, dtypes, processor_func):
- self.mat_stream = mat_stream
- self.dtypes = dtypes
- self.processor_func = processor_func
-
- def matrix_getter_factory(self):
- assert False, 'Not implemented'
-
-
-class MatStreamWriter(object):
- ''' Base object for writing to mat files '''
- def __init__(self, file_stream, arr, name):
- self.file_stream = file_stream
- self.arr = arr
- dt = self.arr.dtype
- if not dt.isnative:
- self.arr = self.arr.astype(dt.newbyteorder('='))
- self.name = name
-
- def arr_dtype_number(self, num):
- ''' Return dtype for given number of items per element'''
- return N.dtype(self.arr.dtype.str[:2] + str(num))
-
- def arr_to_chars(self):
- ''' Convert string array to char array '''
- dims = list(self.arr.shape)
- if not dims:
- dims = [1]
- dims.append(int(self.arr.dtype.str[2:]))
- self.arr = N.ndarray(shape=dims,
- dtype=self.arr_dtype_number(1),
- buffer=self.arr)
-
- def write_bytes(self, arr):
- self.file_stream.write(arr.tostring(order='F'))
-
- def write_string(self, s):
- self.file_stream.write(s)
-
-
-class MatFileWriter(object):
- ''' Base class for Mat file writers '''
- def __init__(self, file_stream):
- self.file_stream = file_stream
Copied: trunk/scipy/io/matlab/miobase.py (from rev 3757, branches/io_new/matlab/miobase.py)
Deleted: trunk/scipy/io/matlab/setup.py
===================================================================
--- branches/io_new/matlab/setup.py 2008-01-01 02:55:20 UTC (rev 3757)
+++ trunk/scipy/io/matlab/setup.py 2008-01-01 09:33:02 UTC (rev 3758)
@@ -1,10 +0,0 @@
-#!/usr/bin/env python
-
-def configuration(parent_package='',top_path=None):
- from numpy.distutils.misc_util import Configuration
- config = Configuration('matlab', parent_package, top_path)
- return config
-
-if __name__ == '__main__':
- from numpy.distutils.core import setup
- setup(**configuration(top_path='').todict())
Copied: trunk/scipy/io/matlab/setup.py (from rev 3757, branches/io_new/matlab/setup.py)
Deleted: trunk/scipy/io/mio.py
===================================================================
--- trunk/scipy/io/mio.py 2008-01-01 02:55:20 UTC (rev 3757)
+++ trunk/scipy/io/mio.py 2008-01-01 09:33:02 UTC (rev 3758)
@@ -1,133 +0,0 @@
-# Authors: Travis Oliphant, Matthew Brett
-
-"""
-Module for reading and writing matlab (TM) .mat files
-"""
-
-import os
-import sys
-
-from scipy.io.mio4 import MatFile4Reader, MatFile4Writer
-from scipy.io.mio5 import MatFile5Reader, MatFile5Writer
-
-__all__ = ['find_mat_file', 'mat_reader_factory', 'loadmat', 'savemat']
-
-def find_mat_file(file_name, appendmat=True):
- ''' Try to find .mat file on system path
-
- file_name - file name string
- append_mat - If True, and file_name does not end in '.mat', appends it
- '''
- if appendmat and file_name[-4:] == ".mat":
- file_name = file_name[:-4]
- if os.sep in file_name:
- full_name = file_name
- if appendmat:
- full_name = file_name + ".mat"
- else:
- full_name = None
- junk, file_name = os.path.split(file_name)
- for path in [os.curdir] + list(sys.path):
- test_name = os.path.join(path, file_name)
- if appendmat:
- test_name += ".mat"
- try:
- fid = open(test_name,'rb')
- fid.close()
- full_name = test_name
- break
- except IOError:
- pass
- return full_name
-
-def mat_reader_factory(file_name, appendmat=True, **kwargs):
- """Create reader for matlab (TM) .mat format files
-
- See docstring for loadmat for input options
- """
- if isinstance(file_name, basestring):
- full_name = find_mat_file(file_name, appendmat)
- if full_name is None:
- raise IOError, "%s not found on the path." % file_name
- byte_stream = open(full_name, 'rb')
- else:
- try:
- file_name.read(0)
- except AttributeError:
- raise IOError, 'Reader needs file name or open file-like object'
- byte_stream = file_name
-
- MR = MatFile4Reader(byte_stream, **kwargs)
- if MR.format_looks_right():
- return MR
- return MatFile5Reader(byte_stream, **kwargs)
-
-def loadmat(file_name, mdict=None, appendmat=True, basename='raw', **kwargs):
- ''' Load Matlab(tm) file
-
- file_name - Name of the mat file
- (do not need .mat extension if appendmat==True)
- If name not a full path name, search for the file on
- the sys.path list and use the first one found (the
- current directory is searched first).
- Can also pass open file-like object
- m_dict - optional dictionary in which to insert matfile variables
- appendmat - True to append the .mat extension to the end of the
- given filename, if not already present
- base_name - base name for unnamed variables (unused in code)
- byte_order - byte order ('native', 'little', 'BIG')
- in ('native', '=')
- or in ('little', '<')
- or in ('BIG', '>')
- mat_dtype - return arrays in same dtype as loaded into matlab
- (instead of the dtype with which they are saved)
- squeeze_me - whether to squeeze matrix dimensions or not
- chars_as_strings - whether to convert char arrays to string arrays
- mat_dtype - return matrices with datatype that matlab would load as
- (rather than in the datatype matlab saves as)
- matlab_compatible - returns matrices as would be loaded by matlab
- (implies squeeze_me=False, chars_as_strings=False,
- mat_dtype=True)
-
- v4 (Level 1.0), v6 and v7.1 matfiles are supported.
-
- '''
- MR = mat_reader_factory(file_name, appendmat, **kwargs)
- matfile_dict = MR.get_variables()
- if mdict is not None:
- mdict.update(matfile_dict)
- else:
- mdict = matfile_dict
- return mdict
-
-def savemat(file_name, mdict, appendmat=True, format='4'):
- """Save a dictionary of names and arrays into the MATLAB-style .mat file.
-
- This saves the arrayobjects in the given dictionary to a matlab
- style .mat file.
-
- appendmat - if true, appends '.mat' extension to filename, if not present
- format - '4' for matlab 4 mat files, '5' for matlab 5 onwards
- """
- file_is_string = isinstance(file_name, basestring)
- if file_is_string:
- if appendmat and file_name[-4:] != ".mat":
- file_name = file_name + ".mat"
- file_stream = open(file_name, 'wb')
- else:
- try:
- file_name.write('')
- except AttributeError:
- raise IOError, 'Writer needs file name or writeable '\
- 'file-like object'
- file_stream = file_name
-
- if format == '4':
- MW = MatFile4Writer(file_stream)
- elif format == '5':
- MW = MatFile5Writer(file_stream)
- else:
- raise ValueError, 'Format should be 4 or 5'
- MW.put_variables(mdict)
- if file_is_string:
- file_stream.close()
Deleted: trunk/scipy/io/mio4.py
===================================================================
--- trunk/scipy/io/mio4.py 2008-01-01 02:55:20 UTC (rev 3757)
+++ trunk/scipy/io/mio4.py 2008-01-01 09:33:02 UTC (rev 3758)
@@ -1,345 +0,0 @@
-''' Classes for read / write of matlab (TM) 4 files
-'''
-
-import numpy as N
-
-from scipy.io.miobase import *
-
-miDOUBLE = 0
-miSINGLE = 1
-miINT32 = 2
-miINT16 = 3
-miUINT16 = 4
-miUINT8 = 5
-
-mdtypes_template = {
- miDOUBLE: 'f8',
- miSINGLE: 'f4',
- miINT32: 'i4',
- miINT16: 'i2',
- miUINT16: 'u2',
- miUINT8: 'u1',
- 'header': [('mopt', 'i4'),
- ('mrows', 'i4'),
- ('ncols', 'i4'),
- ('imagf', 'i4'),
- ('namlen', 'i4')],
- 'U1': 'U1',
- }
-
-np_to_mtypes = {
- 'f8': miDOUBLE,
- 'c32': miDOUBLE,
- 'c24': miDOUBLE,
- 'c16': miDOUBLE,
- 'f4': miSINGLE,
- 'c8': miSINGLE,
- 'i4': miINT32,
- 'i2': miINT16,
- 'u2': miUINT16,
- 'u1': miUINT8,
- 'S1': miUINT8,
- }
-
-# matrix classes
-mxFULL_CLASS = 0
-mxCHAR_CLASS = 1
-mxSPARSE_CLASS = 2
-
-order_codes = {
- 0: '<',
- 1: '>',
- 2: 'VAX D-float', #!
- 3: 'VAX G-float',
- 4: 'Cray', #!!
- }
-
-
-class Mat4ArrayReader(MatArrayReader):
- ''' Class for reading Mat4 arrays
- '''
-
- def matrix_getter_factory(self):
- ''' Read header, return matrix getter '''
- data = self.read_dtype(self.dtypes['header'])
- header = {}
- header['name'] = self.read_ztstring(data['namlen'])
- if data['mopt'] < 0 or data['mopt'] > 5000:
- ValueError, 'Mat 4 mopt wrong format, byteswapping problem?'
- M,rest = divmod(data['mopt'], 1000)
- O,rest = divmod(rest,100)
- P,rest = divmod(rest,10)
- T = rest
- if O != 0:
- raise ValueError, 'O in MOPT integer should be 0, wrong format?'
- header['dtype'] = self.dtypes[P]
- header['mclass'] = T
- header['dims'] = (data['mrows'], data['ncols'])
- header['is_complex'] = data['imagf'] == 1
- remaining_bytes = header['dtype'].itemsize * N.product(header['dims'])
- if header['is_complex'] and not header['mclass'] == mxSPARSE_CLASS:
- remaining_bytes *= 2
- next_pos = self.mat_stream.tell() + remaining_bytes
- if T == mxFULL_CLASS:
- getter = Mat4FullGetter(self, header)
- elif T == mxCHAR_CLASS:
- getter = Mat4CharGetter(self, header)
- elif T == mxSPARSE_CLASS:
- getter = Mat4SparseGetter(self, header)
- else:
- raise TypeError, 'No reader for class code %s' % T
- getter.next_position = next_pos
- return getter
-
-
-class Mat4MatrixGetter(MatMatrixGetter):
-
- # Mat4 variables never global or logical
- is_global = False
- is_logical = False
-
- def read_array(self, copy=True):
- ''' Mat4 read array always uses header dtype and dims
- copy - copies array if True
- (buffer is usually read only)
- a_dtype is assumed to be correct endianness
- '''
- dt = self.header['dtype']
- dims = self.header['dims']
- num_bytes = dt.itemsize
- for d in dims:
- num_bytes *= d
- arr = N.ndarray(shape=dims,
- dtype=dt,
- buffer=self.mat_stream.read(num_bytes),
- order='F')
- if copy:
- arr = arr.copy()
- return arr
-
-
-class Mat4FullGetter(Mat4MatrixGetter):
- def __init__(self, array_reader, header):
- super(Mat4FullGetter, self).__init__(array_reader, header)
- if header['is_complex']:
- self.mat_dtype = N.dtype(N.complex128)
- else:
- self.mat_dtype = N.dtype(N.float64)
-
- def get_raw_array(self):
- if self.header['is_complex']:
- # avoid array copy to save memory
- res = self.read_array(copy=False)
- res_j = self.read_array(copy=False)
- return res + (res_j * 1j)
- return self.read_array()
-
-
-class Mat4CharGetter(Mat4MatrixGetter):
- def get_raw_array(self):
- arr = self.read_array().astype(N.uint8)
- # ascii to unicode
- S = arr.tostring().decode('ascii')
- return N.ndarray(shape=self.header['dims'],
- dtype=N.dtype('U1'),
- buffer = N.array(S)).copy()
-
-
-class Mat4SparseGetter(Mat4MatrixGetter):
- ''' Read sparse matrix type
-
- Matlab (TM) 4 real sparse arrays are saved in a N+1 by 3 array
- format, where N is the number of non-zero values. Column 1 values
- [0:N] are the (1-based) row indices of the each non-zero value,
- column 2 [0:N] are the column indices, column 3 [0:N] are the
- (real) values. The last values [-1,0:2] of the rows, column
- indices are shape[0] and shape[1] respectively of the output
- matrix. The last value for the values column is a padding 0. mrows
- and ncols values from the header give the shape of the stored
- matrix, here [N+1, 3]. Complex data is saved as a 4 column
- matrix, where the fourth column contains the imaginary component;
- the last value is again 0. Complex sparse data do _not_ have the
- header imagf field set to True; the fact that the data are complex
- is only detectable because there are 4 storage columns
- '''
- def get_raw_array(self):
- res = self.read_array()
- tmp = res[:-1,:]
- dims = res[-1,0:2]
- I = N.ascontiguousarray(tmp[:,0],dtype='intc') #fixes byte order also
- J = N.ascontiguousarray(tmp[:,1],dtype='intc')
- I -= 1 # for 1-based indexing
- J -= 1
- if res.shape[1] == 3:
- V = N.ascontiguousarray(tmp[:,2],dtype='float')
- else:
- V = N.ascontiguousarray(tmp[:,2],dtype='complex')
- V.imag = tmp[:,3]
- if have_sparse:
- return scipy.sparse.coo_matrix((V,(I,J)), dims)
- return (dims, I, J, V)
-
-
-class MatFile4Reader(MatFileReader):
- ''' Reader for Mat4 files '''
- def __init__(self, mat_stream, *args, **kwargs):
- self._array_reader = Mat4ArrayReader(
- mat_stream,
- None,
- None,
- )
- super(MatFile4Reader, self).__init__(mat_stream, *args, **kwargs)
- self._array_reader.processor_func = self.processor_func
-
- def set_dtypes(self):
- self.dtypes = self.convert_dtypes(mdtypes_template)
- self._array_reader.dtypes = self.dtypes
-
- def matrix_getter_factory(self):
- return self._array_reader.matrix_getter_factory()
-
- def format_looks_right(self):
- # Mat4 files have a zero somewhere in first 4 bytes
- self.mat_stream.seek(0)
- mopt_bytes = N.ndarray(shape=(4,),
- dtype=N.uint8,
- buffer = self.mat_stream.read(4))
- self.mat_stream.seek(0)
- return 0 in mopt_bytes
-
- def guess_byte_order(self):
- self.mat_stream.seek(0)
- mopt = self.read_dtype(N.dtype('i4'))
- self.mat_stream.seek(0)
- if mopt < 0 or mopt > 5000:
- return ByteOrder.swapped_code
- return ByteOrder.native_code
-
-
-class Mat4MatrixWriter(MatStreamWriter):
-
- def write_header(self, P=0, T=0, imagf=0, dims=None):
- ''' Write header for given data options
- P - mat4 data type
- T - mat4 matrix class
- imagf - complex flag
- dims - matrix dimensions
- '''
- if dims is None:
- dims = self.arr.shape
- header = N.empty((), mdtypes_template['header'])
- M = not ByteOrder.little_endian
- O = 0
- header['mopt'] = (M * 1000 +
- O * 100 +
- P * 10 +
- T)
- header['mrows'] = dims[0]
- header['ncols'] = dims[1]
- header['imagf'] = imagf
- header['namlen'] = len(self.name) + 1
- self.write_bytes(header)
- self.write_string(self.name + '\0')
-
- def arr_to_2d(self):
- self.arr = N.atleast_2d(self.arr)
- dims = self.arr.shape
- if len(dims) > 2:
- self.arr = self.arr.reshape(-1,dims[-1])
-
- def write(self):
- assert False, 'Not implemented'
-
-
-class Mat4NumericWriter(Mat4MatrixWriter):
-
- def write(self):
- self.arr_to_2d()
- imagf = self.arr.dtype.kind == 'c'
- try:
- P = np_to_mtypes[self.arr.dtype.str[1:]]
- except KeyError:
- if imagf:
- self.arr = self.arr.astype('c128')
- else:
- self.arr = self.arr.astype('f8')
- P = miDOUBLE
- self.write_header(P=P,
- T=mxFULL_CLASS,
- imagf=imagf)
- if imagf:
- self.write_bytes(self.arr.real)
- self.write_bytes(self.arr.imag)
- else:
- self.write_bytes(self.arr)
-
-
-class Mat4CharWriter(Mat4MatrixWriter):
-
- def write(self):
- self.arr_to_chars()
- self.arr_to_2d()
- dims = self.arr.shape
- self.write_header(P=miUINT8,
- T=mxCHAR_CLASS)
- if self.arr.dtype.kind == 'U':
- # Recode unicode to ascii
- n_chars = N.product(dims)
- st_arr = N.ndarray(shape=(),
- dtype=self.arr_dtype_number(n_chars),
- buffer=self.arr)
- st = st_arr.item().encode('ascii')
- self.arr = N.ndarray(shape=dims, dtype='S1', buffer=st)
- self.write_bytes(self.arr)
-
-
-class Mat4SparseWriter(Mat4MatrixWriter):
-
- def write(self):
- ''' Sparse matrices are 2D
- See docstring for Mat4SparseGetter
- '''
- imagf = self.arr.dtype.kind == 'c'
- nnz = self.arr.nnz
- ijd = N.zeros((nnz+1, 3+imagf), dtype='f8')
- for i in range(nnz):
- ijd[i,0], ijd[i,1] = self.arr.rowcol(i)
- ijd[:-1,0:2] += 1 # 1 based indexing
- if imagf:
- ijd[:-1,2] = self.arr.data.real
- ijd[:-1,3] = self.arr.data.imag
- else:
- ijd[:-1,2] = self.arr.data
- ijd[-1,0:2] = self.arr.shape
- self.write_header(P=miDOUBLE,
- T=mxSPARSE_CLASS,
- dims=ijd.shape)
- self.write_bytes(ijd)
-
-
-def matrix_writer_factory(stream, arr, name):
- ''' Factory function to return matrix writer given variable to write
- stream - file or file-like stream to write to
- arr - array to write
- name - name in matlab (TM) workspace
- '''
- if have_sparse:
- if scipy.sparse.issparse(arr):
- return Mat4SparseWriter(stream, arr, name)
- arr = N.array(arr)
- dtt = arr.dtype.type
- if dtt is N.object_:
- raise TypeError, 'Cannot save object arrays in Mat4'
- elif dtt is N.void:
- raise TypeError, 'Cannot save void type arrays'
- elif dtt in (N.unicode_, N.string_):
- return Mat4CharWriter(stream, arr, name)
- else:
- return Mat4NumericWriter(stream, arr, name)
-
-
-class MatFile4Writer(MatFileWriter):
-
- def put_variables(self, mdict):
- for name, var in mdict.items():
- matrix_writer_factory(self.file_stream, var, name).write()
Deleted: trunk/scipy/io/mio5.py
===================================================================
--- trunk/scipy/io/mio5.py 2008-01-01 02:55:20 UTC (rev 3757)
+++ trunk/scipy/io/mio5.py 2008-01-01 09:33:02 UTC (rev 3758)
@@ -1,807 +0,0 @@
-''' Classes for read / write of matlab (TM) 5 files
-'''
-
-# Small fragments of current code adapted from matfile.py by Heiko
-# Henkelmann
-
-## Notice in matfile.py file
-
-# Copyright (c) 2003 Heiko Henkelmann
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to
-# deal in the Software without restriction, including without limitation the
-# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-# sell copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-import zlib
-from copy import copy as pycopy
-from cStringIO import StringIO
-import numpy as N
-
-from scipy.io.miobase import *
-
-try: # Python 2.3 support
- from sets import Set as set
-except:
- pass
-
-miINT8 = 1
-miUINT8 = 2
-miINT16 = 3
-miUINT16 = 4
-miINT32 = 5
-miUINT32 = 6
-miSINGLE = 7
-miDOUBLE = 9
-miINT64 = 12
-miUINT64 = 13
-miMATRIX = 14
-miCOMPRESSED = 15
-miUTF8 = 16
-miUTF16 = 17
-miUTF32 = 18
-
-mxCELL_CLASS = 1
-mxSTRUCT_CLASS = 2
-mxOBJECT_CLASS = 3
-mxCHAR_CLASS = 4
-mxSPARSE_CLASS = 5
-mxDOUBLE_CLASS = 6
-mxSINGLE_CLASS = 7
-mxINT8_CLASS = 8
-mxUINT8_CLASS = 9
-mxINT16_CLASS = 10
-mxUINT16_CLASS = 11
-mxINT32_CLASS = 12
-mxUINT32_CLASS = 13
-
-mdtypes_template = {
- miINT8: 'i1',
- miUINT8: 'u1',
- miINT16: 'i2',
- miUINT16: 'u2',
- miINT32: 'i4',
- miUINT32: 'u4',
- miSINGLE: 'f4',
- miDOUBLE: 'f8',
- miINT64: 'i8',
- miUINT64: 'u8',
- miUTF8: 'u1',
- miUTF16: 'u2',
- miUTF32: 'u4',
- 'file_header': [('description', 'S116'),
- ('subsystem_offset', 'i8'),
- ('version', 'u2'),
- ('endian_test', 'S2')],
- 'tag_full': [('mdtype', 'u4'), ('byte_count', 'u4')],
- 'array_flags': [('data_type', 'u4'),
- ('byte_count', 'u4'),
- ('flags_class','u4'),
- ('nzmax', 'u4')],
- 'U1': 'U1',
- }
-
-mclass_dtypes_template = {
- mxINT8_CLASS: 'i1',
- mxUINT8_CLASS: 'u1',
- mxINT16_CLASS: 'i2',
- mxUINT16_CLASS: 'u2',
- mxINT32_CLASS: 'i4',
- mxUINT32_CLASS: 'u4',
- mxSINGLE_CLASS: 'f4',
- mxDOUBLE_CLASS: 'f8',
- }
-
-
-np_to_mtypes = {
- 'f8': miDOUBLE,
- 'c32': miDOUBLE,
- 'c24': miDOUBLE,
- 'c16': miDOUBLE,
- 'f4': miSINGLE,
- 'c8': miSINGLE,
- 'i1': miINT8,
- 'i2': miINT16,
- 'i4': miINT32,
- 'u1': miUINT8,
- 'u4': miUINT32,
- 'u2': miUINT16,
- 'S1': miUINT8,
- 'U1': miUTF16,
- }
-
-
-np_to_mxtypes = {
- 'f8': mxDOUBLE_CLASS,
- 'c32': mxDOUBLE_CLASS,
- 'c24': mxDOUBLE_CLASS,
- 'c16': mxDOUBLE_CLASS,
- 'f4': mxSINGLE_CLASS,
- 'c8': mxSINGLE_CLASS,
- 'i4': mxINT32_CLASS,
- 'i2': mxINT16_CLASS,
- 'u2': mxUINT16_CLASS,
- 'u1': mxUINT8_CLASS,
- 'S1': mxUINT8_CLASS,
- }
-
-
-
-''' Before release v7.1 (release 14) matlab (TM) used the system
-default character encoding scheme padded out to 16-bits. Release 14
-and later use Unicode. When saving character data, R14 checks if it
-can be encoded in 7-bit ascii, and saves in that format if so.'''
-
-codecs_template = {
- miUTF8: {'codec': 'utf_8', 'width': 1},
- miUTF16: {'codec': 'utf_16', 'width': 2},
- miUTF32: {'codec': 'utf_32','width': 4},
- }
-
-miUINT16_codec = sys.getdefaultencoding()
-
-mx_numbers = (
- mxDOUBLE_CLASS,
- mxSINGLE_CLASS,
- mxINT8_CLASS,
- mxUINT8_CLASS,
- mxINT16_CLASS,
- mxUINT16_CLASS,
- mxINT32_CLASS,
- mxUINT32_CLASS,
- )
-
-class mat_struct(object):
- ''' Placeholder for holding read data from structs '''
- pass
-
-class mat_obj(object):
- ''' Placeholder for holding read data from objects '''
- pass
-
-class Mat5ArrayReader(MatArrayReader):
- ''' Class to get Mat5 arrays
-
- Provides element reader functions, header reader, matrix reader
- factory function
- '''
-
- def __init__(self, mat_stream, dtypes, processor_func, codecs, class_dtypes):
- super(Mat5ArrayReader, self).__init__(mat_stream,
- dtypes,
- processor_func,
- )
- self.codecs = codecs
- self.class_dtypes = class_dtypes
-
- def read_element(self, copy=True):
- raw_tag = self.mat_stream.read(8)
- tag = N.ndarray(shape=(),
- dtype=self.dtypes['tag_full'],
- buffer = raw_tag)
- mdtype = tag['mdtype'].item()
- byte_count = mdtype >> 16
- if byte_count: # small data element format
- if byte_count > 4:
- raise ValueError, 'Too many bytes for sde format'
- mdtype = mdtype & 0xFFFF
- dt = self.dtypes[mdtype]
- el_count = byte_count / dt.itemsize
- return N.ndarray(shape=(el_count,),
- dtype=dt,
- buffer=raw_tag[4:])
- byte_count = tag['byte_count'].item()
- if mdtype == miMATRIX:
- return self.current_getter(byte_count).get_array()
- if mdtype in self.codecs: # encoded char data
- raw_str = self.mat_stream.read(byte_count)
- codec = self.codecs[mdtype]
- if not codec:
- raise TypeError, 'Do not support encoding %d' % mdtype
- el = raw_str.decode(codec)
- else: # numeric data
- dt = self.dtypes[mdtype]
- el_count = byte_count / dt.itemsize
- el = N.ndarray(shape=(el_count,),
- dtype=dt,
- buffer=self.mat_stream.read(byte_count))
- if copy:
- el = el.copy()
- mod8 = byte_count % 8
- if mod8:
- self.mat_stream.seek(8 - mod8, 1)
- return el
-
- def matrix_getter_factory(self):
- ''' Returns reader for next matrix at top level '''
- tag = self.read_dtype(self.dtypes['tag_full'])
- mdtype = tag['mdtype'].item()
- byte_count = tag['byte_count'].item()
- next_pos = self.mat_stream.tell() + byte_count
- if mdtype == miCOMPRESSED:
- getter = Mat5ZArrayReader(self, byte_count).matrix_getter_factory()
- elif not mdtype == miMATRIX:
- raise TypeError, \
- 'Expecting miMATRIX type here, got %d' % mdtype
- else:
- getter = self.current_getter(byte_count)
- getter.next_position = next_pos
- return getter
-
- def current_getter(self, byte_count):
- ''' Return matrix getter for current stream position
-
- Returns matrix getters at top level and sub levels
- '''
- if not byte_count: # an empty miMATRIX can contain no bytes
- return Mat5EmptyMatrixGetter(self)
- af = self.read_dtype(self.dtypes['array_flags'])
- header = {}
- flags_class = af['flags_class']
- mc = flags_class & 0xFF
- header['mclass'] = mc
- header['is_logical'] = flags_class >> 9 & 1
- header['is_global'] = flags_class >> 10 & 1
- header['is_complex'] = flags_class >> 11 & 1
- header['nzmax'] = af['nzmax']
- header['dims'] = self.read_element()
- header['name'] = self.read_element().tostring()
- if mc in mx_numbers:
- return Mat5NumericMatrixGetter(self, header)
- if mc == mxSPARSE_CLASS:
- return Mat5SparseMatrixGetter(self, header)
- if mc == mxCHAR_CLASS:
- return Mat5CharMatrixGetter(self, header)
- if mc == mxCELL_CLASS:
- return Mat5CellMatrixGetter(self, header)
- if mc == mxSTRUCT_CLASS:
- return Mat5StructMatrixGetter(self, header)
- if mc == mxOBJECT_CLASS:
- return Mat5ObjectMatrixGetter(self, header)
- raise TypeError, 'No reader for class code %s' % mc
-
-
-class Mat5ZArrayReader(Mat5ArrayReader):
- ''' Getter for compressed arrays
-
- Reads and uncompresses gzipped stream on init, providing wrapper
- for this new sub-stream.
- '''
- def __init__(self, array_reader, byte_count):
- '''Reads and uncompresses gzipped stream'''
- data = array_reader.mat_stream.read(byte_count)
- super(Mat5ZArrayReader, self).__init__(
- StringIO(zlib.decompress(data)),
- array_reader.dtypes,
- array_reader.processor_func,
- array_reader.codecs,
- array_reader.class_dtypes)
-
-
-class Mat5MatrixGetter(MatMatrixGetter):
- ''' Base class for getting Mat5 matrices
-
- Gets current read information from passed array_reader
- '''
-
- def __init__(self, array_reader, header):
- super(Mat5MatrixGetter, self).__init__(array_reader, header)
- self.class_dtypes = array_reader.class_dtypes
- self.codecs = array_reader.codecs
- self.is_global = header['is_global']
- self.mat_dtype = None
-
- def read_element(self, *args, **kwargs):
- return self.array_reader.read_element(*args, **kwargs)
-
-
-class Mat5EmptyMatrixGetter(Mat5MatrixGetter):
- ''' Dummy class to return empty array for empty matrix
- '''
- def __init__(self, array_reader):
- self.array_reader = array_reader
- self.mat_stream = array_reader.mat_stream
- self.data_position = self.mat_stream.tell()
- self.header = {}
- self.is_global = False
- self.mat_dtype = 'f8'
-
- def get_raw_array(self):
- return N.array([[]])
-
-
-class Mat5NumericMatrixGetter(Mat5MatrixGetter):
-
- def __init__(self, array_reader, header):
- super(Mat5NumericMatrixGetter, self).__init__(array_reader, header)
- if header['is_logical']:
- self.mat_dtype = N.dtype('bool')
- else:
- self.mat_dtype = self.class_dtypes[header['mclass']]
-
- def get_raw_array(self):
- if self.header['is_complex']:
- # avoid array copy to save memory
- res = self.read_element(copy=False)
- res_j = self.read_element(copy=False)
- res = res + (res_j * 1j)
- else:
- res = self.read_element()
- return N.ndarray(shape=self.header['dims'],
- dtype=res.dtype,
- buffer=res,
- order='F')
-
-
-class Mat5SparseMatrixGetter(Mat5MatrixGetter):
- def get_raw_array(self):
- rowind = self.read_element()
- indptr = self.read_element()
- if self.header['is_complex']:
- # avoid array copy to save memory
- data = self.read_element(copy=False)
- data_j = self.read_element(copy=False)
- data = data + (data_j * 1j)
- else:
- data = self.read_element()
- ''' From the matlab (TM) API documentation, last found here:
- http://www.mathworks.com/access/helpdesk/help/techdoc/matlab_external/
- rowind are simply the row indices for all the (res) non-zero
- entries in the sparse array. rowind has nzmax entries, so
- may well have more entries than len(res), the actual number
- of non-zero entries, but rowind[len(res):] can be discarded
- and should be 0. indptr has length (number of columns + 1),
- and is such that, if D = diff(colind), D[j] gives the number
- of non-zero entries in column j. Because rowind values are
- stored in column order, this gives the column corresponding to
- each rowind
- '''
- if have_sparse:
- dims = self.header['dims']
- return scipy.sparse.csc_matrix((data,rowind,indptr), dims)
- else:
- return (dims, data, rowind, indptr)
-
-
-class Mat5CharMatrixGetter(Mat5MatrixGetter):
- def get_raw_array(self):
- res = self.read_element()
- # Convert non-string types to unicode
- if isinstance(res, N.ndarray):
- if res.dtype.type == N.uint16:
- codec = miUINT16_codec
- if self.codecs['uint16_len'] == 1:
- res = res.astype(N.uint8)
- elif res.dtype.type in (N.uint8, N.int8):
- codec = 'ascii'
- else:
- raise TypeError, 'Did not expect type %s' % res.dtype
- res = res.tostring().decode(codec)
- return N.ndarray(shape=self.header['dims'],
- dtype=N.dtype('U1'),
- buffer=N.array(res),
- order='F').copy()
-
-
-class Mat5CellMatrixGetter(Mat5MatrixGetter):
- def get_raw_array(self):
- # Account for fortran indexing of cells
- tupdims = tuple(self.header['dims'][::-1])
- length = N.product(tupdims)
- result = N.empty(length, dtype=object)
- for i in range(length):
- result[i] = self.get_item()
- return result.reshape(tupdims).T
-
- def get_item(self):
- return self.read_element()
-
-
-class Mat5StructMatrixGetter(Mat5CellMatrixGetter):
- def __init__(self, *args, **kwargs):
- super(Mat5StructMatrixGetter, self).__init__(*args, **kwargs)
- self.obj_template = mat_struct()
-
- def get_raw_array(self):
- namelength = self.read_element()[0]
- # get field names
- names = self.read_element()
- splitnames = [names[i:i+namelength] for i in \
- xrange(0,len(names),namelength)]
- self.obj_template._fieldnames = [x.tostring().strip('\x00')
- for x in splitnames]
- return super(Mat5StructMatrixGetter, self).get_raw_array()
-
- def get_item(self):
- item = pycopy(self.obj_template)
- for element in item._fieldnames:
- item.__dict__[element] = self.read_element()
- return item
-
-
-class Mat5ObjectMatrixGetter(Mat5StructMatrixGetter):
- def __init__(self, *args, **kwargs):
- super(Mat5StructMatrixGetter, self).__init__(*args, **kwargs)
- self.obj_template = mat_obj()
-
- def get_raw_array(self):
- self.obj_template._classname = self.read_element().tostring()
- return super(Mat5ObjectMatrixGetter, self).get_raw_array()
-
-
-class MatFile5Reader(MatFileReader):
- ''' Reader for Mat 5 mat files
-
- Adds the following attribute to base class
-
- uint16_codec - char codec to use for uint16 char arrays
- (defaults to system default codec)
- '''
-
- def __init__(self,
- mat_stream,
- byte_order=None,
- mat_dtype=False,
- squeeze_me=True,
- chars_as_strings=True,
- matlab_compatible=False,
- uint16_codec=None
- ):
- self.codecs = {}
- self._array_reader = Mat5ArrayReader(
- mat_stream,
- None,
- None,
- None,
- None,
- )
- super(MatFile5Reader, self).__init__(
- mat_stream,
- byte_order,
- mat_dtype,
- squeeze_me,
- chars_as_strings,
- matlab_compatible,
- )
- self._array_reader.processor_func = self.processor_func
- self.uint16_codec = uint16_codec
-
- def get_uint16_codec(self):
- return self._uint16_codec
- def set_uint16_codec(self, uint16_codec):
- if not uint16_codec:
- uint16_codec = sys.getdefaultencoding()
- # Set length of miUINT16 char encoding
- self.codecs['uint16_len'] = len(" ".encode(uint16_codec)) \
- - len(" ".encode(uint16_codec))
- self.codecs['uint16_codec'] = uint16_codec
- self._array_reader.codecs = self.codecs
- self._uint16_codec = uint16_codec
- uint16_codec = property(get_uint16_codec,
- set_uint16_codec,
- None,
- 'get/set uint16_codec')
-
- def set_dtypes(self):
- ''' Set dtypes and codecs '''
- self.dtypes = self.convert_dtypes(mdtypes_template)
- self.class_dtypes = self.convert_dtypes(mclass_dtypes_template)
- codecs = {}
- postfix = self.order_code == '<' and '_le' or '_be'
- for k, v in codecs_template.items():
- codec = v['codec']
- try:
- " ".encode(codec)
- except LookupError:
- codecs[k] = None
- continue
- if v['width'] > 1:
- codec += postfix
- codecs[k] = codec
- self.codecs.update(codecs)
- self.update_array_reader()
-
- def update_array_reader(self):
- self._array_reader.codecs = self.codecs
- self._array_reader.dtypes = self.dtypes
- self._array_reader.class_dtypes = self.class_dtypes
-
- def matrix_getter_factory(self):
- return self._array_reader.matrix_getter_factory()
-
- def guess_byte_order(self):
- self.mat_stream.seek(126)
- mi = self.mat_stream.read(2)
- self.mat_stream.seek(0)
- return mi == 'IM' and '<' or '>'
-
- def file_header(self):
- ''' Read in mat 5 file header '''
- hdict = {}
- hdr = self.read_dtype(self.dtypes['file_header'])
- hdict['__header__'] = hdr['description'].item().strip(' \t\n\000')
- v_major = hdr['version'] >> 8
- v_minor = hdr['version'] & 0xFF
- hdict['__version__'] = '%d.%d' % (v_major, v_minor)
- return hdict
-
- def format_looks_right(self):
- # Mat4 files have a zero somewhere in first 4 bytes
- self.mat_stream.seek(0)
- mopt_bytes = N.ndarray(shape=(4,),
- dtype=N.uint8,
- buffer = self.mat_stream.read(4))
- self.mat_stream.seek(0)
- return 0 not in mopt_bytes
-
-
-class Mat5MatrixWriter(MatStreamWriter):
-
- mat_tag = N.zeros((), mdtypes_template['tag_full'])
- mat_tag['mdtype'] = miMATRIX
-
- def __init__(self, file_stream, arr, name, is_global=False):
- super(Mat5MatrixWriter, self).__init__(file_stream, arr, name)
- self.is_global = is_global
-
- def write_dtype(self, arr):
- self.file_stream.write(arr.tostring())
-
- def write_element(self, arr, mdtype=None):
- # write tag, data
- tag = N.zeros((), mdtypes_template['tag_full'])
- if mdtype is None:
- tag['mdtype'] = np_to_mtypes[arr.dtype.str[1:]]
- else:
- tag['mdtype'] = mdtype
- tag['byte_count'] = arr.size*arr.itemsize
- self.write_dtype(tag)
- self.write_bytes(arr)
- # do 8 byte padding if needed
- if tag['byte_count']%8 != 0:
- pad = (1+tag['byte_count']//8)*8 - tag['byte_count']
- self.write_bytes(N.zeros((pad,),dtype='u1'))
-
- def write_header(self, mclass,
- is_global=False,
- is_complex=False,
- is_logical=False,
- nzmax=0):
- ''' Write header for given data options
- mclass - mat5 matrix class
- is_global - True if matrix is global
- is_complex - True is matrix is complex
- is_logical - True if matrix is logical
- nzmax - max non zero elements for sparse arrays
- '''
- self._mat_tag_pos = self.file_stream.tell()
- self.write_dtype(self.mat_tag)
- # write array flags (complex, global, logical, class, nzmax)
- af = N.zeros((), mdtypes_template['array_flags'])
- af['data_type'] = miUINT32
- af['byte_count'] = 8
- flags = is_complex << 3 | is_global << 2 | is_logical << 1
- af['flags_class'] = mclass | flags << 8
- af['nzmax'] = nzmax
- self.write_dtype(af)
- # write array shape
- self.arr=N.atleast_2d(self.arr)
- self.write_element(N.array(self.arr.shape, dtype='i4'))
- # write name
- self.write_element(N.ndarray(shape=len(self.name), dtype='S1', buffer=self.name))
-
- def update_matrix_tag(self):
- curr_pos = self.file_stream.tell()
- self.file_stream.seek(self._mat_tag_pos)
- self.mat_tag['byte_count'] = curr_pos - self._mat_tag_pos - 8
- self.write_dtype(self.mat_tag)
- self.file_stream.seek(curr_pos)
-
- def write(self):
- assert False, 'Not implemented'
-
-
-class Mat5NumericWriter(Mat5MatrixWriter):
-
- def write(self):
- imagf = self.arr.dtype.kind == 'c'
- try:
- mclass = np_to_mxtypes[self.arr.dtype.str[1:]]
- except KeyError:
- if imagf:
- self.arr = self.arr.astype('c128')
- else:
- self.arr = self.arr.astype('f8')
- mclass = mxDOUBLE_CLASS
- self.write_header(mclass=mclass,is_complex=imagf)
- if imagf:
- self.write_element(self.arr.real)
- self.write_element(self.arr.imag)
- else:
- self.write_element(self.arr)
- self.update_matrix_tag()
-
-class Mat5CharWriter(Mat5MatrixWriter):
- codec='ascii'
- def write(self):
- self.arr_to_chars()
- self.write_header(mclass=mxCHAR_CLASS)
- if self.arr.dtype.kind == 'U':
- # Recode unicode using self.codec
- n_chars = N.product(self.arr.shape)
- st_arr = N.ndarray(shape=(),
- dtype=self.arr_dtype_number(n_chars),
- buffer=self.arr)
- st = st_arr.item().encode(self.codec)
- self.arr = N.ndarray(shape=(len(st)), dtype='u1', buffer=st)
- self.write_element(self.arr,mdtype=miUTF8)
- self.update_matrix_tag()
-
-class Mat5UniCharWriter(Mat5CharWriter):
- codec='UTF8'
-
-
-class Mat5SparseWriter(Mat5MatrixWriter):
-
- def write(self):
- ''' Sparse matrices are 2D
- See docstring for Mat5SparseGetter
- '''
- imagf = self.arr.dtype.kind == 'c'
- N = self.arr.nnz
- ijd = N.zeros((N+1, 3+imagf), dtype='f8')
- for i in range(N):
- ijd[i,0], ijd[i,1] = self.arr.rowcol(i)
- ijd[:-1,0:2] += 1 # 1 based indexing
- if imagf:
- ijd[:-1,2] = self.arr.data.real
- ijd[:-1,3] = self.arr.data.imag
- else:
- ijd[:-1,2] = self.arr.data
- ijd[-1,0:2] = self.arr.shape
- self.write_header(P=miDOUBLE,
- T=mxSPARSE_CLASS,
- dims=ijd.shape)
- self.write_bytes(ijd)
-
-
-class Mat5WriterGetter(object):
- ''' Wraps stream and options, provides methods for getting Writer objects '''
- def __init__(self, stream, unicode_strings):
- self.stream = stream
- self.unicode_strings = unicode_strings
-
- def rewind(self):
- self.stream.seek(0)
-
- def matrix_writer_factory(self, arr, name, is_global=False):
- ''' Factory function to return matrix writer given variable to write
- stream - file or file-like stream to write to
- arr - array to write
- name - name in matlab (TM) workspace
- '''
- if have_sparse:
- if scipy.sparse.issparse(arr):
- return Mat5SparseWriter(self.stream, arr, name, is_global)
- arr = N.array(arr)
- if arr.dtype.hasobject:
- types, arr_type = self.classify_mobjects(arr)
- if arr_type == 'c':
- return Mat5CellWriter(self.stream, arr, name, is_global, types)
- elif arr_type == 's':
- return Mat5StructWriter(self.stream, arr, name, is_global)
- elif arr_type == 'o':
- return Mat5ObjectWriter(self.stream, a