[Scipy-svn] r4473 - branches/mb_mio_refactor/matlab
scipy-svn@scip...
scipy-svn@scip...
Tue Jun 24 03:55:24 CDT 2008
Author: matthew.brett@gmail.com
Date: 2008-06-24 03:55:06 -0500 (Tue, 24 Jun 2008)
New Revision: 4473
Added:
branches/mb_mio_refactor/matlab/c_python.pxd
branches/mb_mio_refactor/matlab/cython_setup.py
branches/mb_mio_refactor/matlab/tagreader.pyx
Modified:
branches/mb_mio_refactor/matlab/mio.py
branches/mb_mio_refactor/matlab/mio4.py
branches/mb_mio_refactor/matlab/mio5.py
branches/mb_mio_refactor/matlab/miobase.py
Log:
Scribbling at cython, checking for HDF5 format
Added: branches/mb_mio_refactor/matlab/c_python.pxd
===================================================================
--- branches/mb_mio_refactor/matlab/c_python.pxd 2008-06-24 08:02:32 UTC (rev 4472)
+++ branches/mb_mio_refactor/matlab/c_python.pxd 2008-06-24 08:55:06 UTC (rev 4473)
@@ -0,0 +1,75 @@
+# -*- python -*-
+# :Author: Robert Kern
+# :Copyright: 2004, Enthought, Inc.
+# :License: BSD Style
+
+
+cdef extern from "Python.h":
+ # Not part of the Python API, but we might as well define it here.
+ # Note that the exact type doesn't actually matter for Pyrex.
+ ctypedef int size_t
+
+ # Some type declarations we need
+ ctypedef int Py_intptr_t
+
+
+ # String API
+ char* PyString_AsString(object string)
+ char* PyString_AS_STRING(object string)
+ object PyString_FromString(char* c_string)
+ object PyString_FromStringAndSize(char* c_string, int length)
+ object PyString_InternFromString(char *v)
+
+ # Float API
+ object PyFloat_FromDouble(double v)
+ double PyFloat_AsDouble(object ob)
+ long PyInt_AsLong(object ob)
+
+
+ # Memory API
+ void* PyMem_Malloc(size_t n)
+ void* PyMem_Realloc(void* buf, size_t n)
+ void PyMem_Free(void* buf)
+
+ void Py_DECREF(object obj)
+ void Py_XDECREF(object obj)
+ void Py_INCREF(object obj)
+ void Py_XINCREF(object obj)
+
+ # CObject API
+ ctypedef void (*destructor1)(void* cobj)
+ ctypedef void (*destructor2)(void* cobj, void* desc)
+ int PyCObject_Check(object p)
+ object PyCObject_FromVoidPtr(void* cobj, destructor1 destr)
+ object PyCObject_FromVoidPtrAndDesc(void* cobj, void* desc,
+ destructor2 destr)
+ void* PyCObject_AsVoidPtr(object self)
+ void* PyCObject_GetDesc(object self)
+ int PyCObject_SetVoidPtr(object self, void* cobj)
+
+ # TypeCheck API
+ int PyFloat_Check(object obj)
+ int PyInt_Check(object obj)
+
+ # Error API
+ int PyErr_Occurred()
+ void PyErr_Clear()
+ int PyErr_CheckSignals()
+
+ # File API
+ ctypedef struct FILE
+ FILE* PyFile_AsFile(object)
+
+cdef extern from "stdio.h":
+ size_t fread(void *ptr, size_t size, size_t n, FILE *file)
+
+cdef extern from "string.h":
+ void *memcpy(void *s1, void *s2, int n)
+
+cdef extern from "math.h":
+ double fabs(double x)
+
+cdef extern from "fileobject.h":
+ ctypedef class __builtin__.file [object PyFileObject]:
+ pass
+
Added: branches/mb_mio_refactor/matlab/cython_setup.py
===================================================================
--- branches/mb_mio_refactor/matlab/cython_setup.py 2008-06-24 08:02:32 UTC (rev 4472)
+++ branches/mb_mio_refactor/matlab/cython_setup.py 2008-06-24 08:55:06 UTC (rev 4473)
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+"""Install file for example on how to use Cython with Numpy.
+
+Note: Cython is the successor project to Pyrex. For more information, see
+http://cython.org.
+"""
+
+from distutils.core import setup
+from distutils.extension import Extension
+
+import numpy
+
+# We detect whether Cython is available, so that below, we can eventually ship
+# pre-generated C for users to compile the extension without having Cython
+# installed on their systems.
+try:
+ from Cython.Distutils import build_ext
+ has_cython = True
+except ImportError:
+ has_cython = False
+
+# Define a cython-based extension module, using the generated sources if cython
+# is not available.
+if has_cython:
+ pyx_sources = ['tagreader.pyx']
+ cmdclass = {'build_ext': build_ext}
+else:
+ # In production work, you can ship the auto-generated C source yourself to
+ # your users. In this case, we do NOT ship the .c file as part of numpy,
+ # so you'll need to actually have cython installed at least the first
+ # time. Since this is really just an example to show you how to use
+ # *Cython*, it makes more sense NOT to ship the C sources so you can edit
+ # the pyx at will with less chances for source update conflicts when you
+ # update numpy.
+ pyx_sources = ['tagreader.c']
+ cmdclass = {}
+
+
+# Declare the extension object
+pyx_ext = Extension('tagreader',
+ pyx_sources,
+ include_dirs = [numpy.get_include()])
+
+# Call the routine which does the real work
+setup(name = 'tagreader',
+ description = 'tagreader extension',
+ ext_modules = [pyx_ext],
+ cmdclass = cmdclass,
+ )
Modified: branches/mb_mio_refactor/matlab/mio.py
===================================================================
--- branches/mb_mio_refactor/matlab/mio.py 2008-06-24 08:02:32 UTC (rev 4472)
+++ branches/mb_mio_refactor/matlab/mio.py 2008-06-24 08:55:06 UTC (rev 4473)
@@ -7,6 +7,7 @@
import os
import sys
+from miobase import get_matfile_version
from mio4 import MatFile4Reader, MatFile4Writer
from mio5 import MatFile5Reader, MatFile5Writer
@@ -57,11 +58,16 @@
raise IOError, 'Reader needs file name or open file-like object'
byte_stream = file_name
- MR = MatFile4Reader(byte_stream, **kwargs)
- if MR.format_looks_right():
- return MR
- return MatFile5Reader(byte_stream, **kwargs)
-
+ mv = get_matfile_version(byte_stream)
+ if mv == '4':
+ return MatFile4Reader(byte_stream, **kwargs)
+ elif mv == '5':
+ return MatFile5Reader(byte_stream, **kwargs)
+ elif mv == '7':
+ raise NotImplementedError('Please use PyTables for matlab HDF files')
+ else:
+ raise TypeError('Did not recognize version %s' % mv)
+
def loadmat(file_name, mdict=None, appendmat=True, basename='raw', **kwargs):
''' Load Matlab(tm) file
Modified: branches/mb_mio_refactor/matlab/mio4.py
===================================================================
--- branches/mb_mio_refactor/matlab/mio4.py 2008-06-24 08:02:32 UTC (rev 4472)
+++ branches/mb_mio_refactor/matlab/mio4.py 2008-06-24 08:55:06 UTC (rev 4473)
@@ -198,15 +198,6 @@
def matrix_getter_factory(self):
return self._array_reader.matrix_getter_factory()
- def format_looks_right(self):
- # Mat4 files have a zero somewhere in first 4 bytes
- self.mat_stream.seek(0)
- mopt_bytes = N.ndarray(shape=(4,),
- dtype=N.uint8,
- buffer = self.mat_stream.read(4))
- self.mat_stream.seek(0)
- return 0 in mopt_bytes
-
def guess_byte_order(self):
self.mat_stream.seek(0)
mopt = self.read_dtype(N.dtype('i4'))
Modified: branches/mb_mio_refactor/matlab/mio5.py
===================================================================
--- branches/mb_mio_refactor/matlab/mio5.py 2008-06-24 08:02:32 UTC (rev 4472)
+++ branches/mb_mio_refactor/matlab/mio5.py 2008-06-24 08:55:06 UTC (rev 4473)
@@ -188,46 +188,27 @@
self.class_dtypes = class_dtypes
def read_element(self, copy=True):
- raw_tag = self.mat_stream.read(8)
- tag = N.ndarray(shape=(),
- dtype=self.dtypes['tag_full'],
- buffer=raw_tag)
- mdtype = tag['mdtype'].item()
-
- byte_count = mdtype >> 16
- if byte_count: # small data element format
- if byte_count > 4:
- raise ValueError, 'Too many bytes for sde format'
- mdtype = mdtype & 0xFFFF
- dt = self.dtypes[mdtype]
- el_count = byte_count // dt.itemsize
- return N.ndarray(shape=(el_count,),
- dtype=dt,
- buffer=raw_tag[4:])
-
- byte_count = tag['byte_count'].item()
+ mdtype, byte_count, buf = tagparse(self.mat_stream, swapf)
if mdtype == miMATRIX:
+ # Can this use buf or not?
return self.current_getter(byte_count).get_array()
elif mdtype in self.codecs: # encoded char data
- raw_str = self.mat_stream.read(byte_count)
codec = self.codecs[mdtype]
if not codec:
raise TypeError, 'Do not support encoding %d' % mdtype
- el = raw_str.decode(codec)
+ el = buf.decode(codec)
else: # numeric data
dt = self.dtypes[mdtype]
el_count = byte_count // dt.itemsize
el = N.ndarray(shape=(el_count,),
dtype=dt,
- buffer=self.mat_stream.read(byte_count))
+ buffer=buf)
if copy:
el = el.copy()
-
# Seek to next 64-bit boundary
mod8 = byte_count % 8
if mod8:
self.mat_stream.seek(8 - mod8, 1)
-
return el
def matrix_getter_factory(self):
@@ -460,7 +441,6 @@
uint16_codec - char codec to use for uint16 char arrays
(defaults to system default codec)
'''
-
def __init__(self,
mat_stream,
byte_order=None,
@@ -533,6 +513,8 @@
return self._array_reader.matrix_getter_factory()
def guess_byte_order(self):
+ ''' Guess byte order.
+ Sets stream pointer to 0 '''
self.mat_stream.seek(126)
mi = self.mat_stream.read(2)
self.mat_stream.seek(0)
@@ -548,16 +530,7 @@
hdict['__version__'] = '%d.%d' % (v_major, v_minor)
return hdict
- def format_looks_right(self):
- # Mat4 files have a zero somewhere in first 4 bytes
- self.mat_stream.seek(0)
- mopt_bytes = N.ndarray(shape=(4,),
- dtype=N.uint8,
- buffer = self.mat_stream.read(4))
- self.mat_stream.seek(0)
- return 0 not in mopt_bytes
-
class Mat5MatrixWriter(MatStreamWriter):
mat_tag = N.zeros((), mdtypes_template['tag_full'])
Modified: branches/mb_mio_refactor/matlab/miobase.py
===================================================================
--- branches/mb_mio_refactor/matlab/miobase.py 2008-06-24 08:02:32 UTC (rev 4472)
+++ branches/mb_mio_refactor/matlab/miobase.py 2008-06-24 08:55:06 UTC (rev 4473)
@@ -22,6 +22,38 @@
res *= e
return res
+def get_matfile_version(fileobj):
+ ''' Return '4', '5', or '7' depending on apparent mat file type
+ Inputs
+ fileobj - file object implementing seek() and read()
+ Outputs
+ version_str - one of (strings) 4, 5, or 7
+
+ Has the side effect of setting the file read pointer to 0
+ '''
+ # Mat4 files have a zero somewhere in first 4 bytes
+ fileobj.seek(0)
+ mopt_bytes = N.ndarray(shape=(4,),
+ dtype=N.uint8,
+ buffer = fileobj.read(4))
+ if 0 in mopt_bytes:
+ fileobj.seek(0)
+ return '4'
+ # For 5 or 7 we need to read an integer in the header
+ # bytes 124 through 128 contain a version integer
+ # and an endian test string
+ fileobj.seek(124)
+ tst_str = fileobj.read(4)
+ fileobj.seek(0)
+ maj_ind = int(tst_str[2] == 'I')
+ verb = ord(tst_str[maj_ind])
+ if verb == 1:
+ return '5'
+ elif verb == 2:
+ return '7'
+ raise ValueError('Unknown mat file type, version %d' % verb)
+
+
class ByteOrder(object):
''' Namespace for byte ordering '''
little_endian = sys.byteorder == 'little'
@@ -50,7 +82,7 @@
Attaches to initialized stream
Base class for "getters" - which do store state of what they are
- reading on itialization, and therefore need to be initialized
+ reading on initialization, and therefore need to be initialized
before each read, and "readers" which do not store state, and only
need to be initialized once on object creation
@@ -102,11 +134,8 @@
set_dtypes - sets data types defs from byte order
matrix_getter_factory - gives object to fetch next matrix from stream
- format_looks_right - returns True if format looks correct for
- this file type (Mat4, Mat5)
guess_byte_order - guesses file byte order from file
"""
-
def __init__(self, mat_stream,
byte_order=None,
mat_dtype=False,
@@ -177,7 +206,8 @@
'get/set order code')
def set_dtypes(self):
- assert False, 'Not implemented'
+ ''' Set dtype endianness. In this case we have no dtypes '''
+ pass
def convert_dtypes(self, dtype_template):
dtypes = dtype_template.copy()
@@ -188,16 +218,13 @@
def matrix_getter_factory(self):
assert False, 'Not implemented'
-
- def format_looks_right(self):
- "Return True if the format looks right for this object"
- assert False, 'Not implemented'
-
+
def file_header(self):
return {}
def guess_byte_order(self):
- assert 0, 'Not implemented'
+ ''' As we do not know what file type we have, assume native '''
+ return ByteOrder.native_code
def get_processor_func(self):
''' Processing to apply to read matrices
Added: branches/mb_mio_refactor/matlab/tagreader.pyx
===================================================================
--- branches/mb_mio_refactor/matlab/tagreader.pyx 2008-06-24 08:02:32 UTC (rev 4472)
+++ branches/mb_mio_refactor/matlab/tagreader.pyx 2008-06-24 08:55:06 UTC (rev 4473)
@@ -0,0 +1,19 @@
+# -*- python -*-
+''' Extension to parse matlab 5 tags '''
+
+# Import the pieces of the Python C API we need to use (from c_python.pxd):
+cimport c_python as py
+
+def parse(fileobj, int swapf):
+ ''' Read in the tag
+ The tag can be normal format (mdtype=u4, byte_count=u4)
+ or small element format (mdtype=u2, byte_count=u2, data in last 4 bytes)
+ Small element format is where mdtype (u4) has non-zero high bytes
+ '''
+ cdef py.size_t n_out
+ cdef char raw_tag[8]
+ cdef py.FILE* infile
+ infile = py.PyFile_AsFile(fileobj)
+ n_out = py.fread(raw_tag, 8, 1, infile)
+ # Raise Exception if n_out < 1
+ return mdtype, byte_count, buf
More information about the Scipy-svn
mailing list