[Scipy-svn] r3327 - in trunk/scipy/sandbox/timeseries: . tests
scipy-svn@scip...
scipy-svn@scip...
Tue Sep 18 19:51:24 CDT 2007
Author: mattknox_ca
Date: 2007-09-18 19:51:14 -0500 (Tue, 18 Sep 2007)
New Revision: 3327
Added:
trunk/scipy/sandbox/timeseries/dates.py
trunk/scipy/sandbox/timeseries/extras.py
trunk/scipy/sandbox/timeseries/report.py
trunk/scipy/sandbox/timeseries/tests/test_trecords.py
trunk/scipy/sandbox/timeseries/trecords.py
Removed:
trunk/scipy/sandbox/timeseries/reportlib.py
trunk/scipy/sandbox/timeseries/tdates.py
trunk/scipy/sandbox/timeseries/tests/test_multitimeseries.py
trunk/scipy/sandbox/timeseries/textras.py
trunk/scipy/sandbox/timeseries/tmulti.py
Modified:
trunk/scipy/sandbox/timeseries/__init__.py
trunk/scipy/sandbox/timeseries/tseries.py
Log:
renamed several files for organizational purposes
renamed MultiTimeSeries class to TimeSeriesRecords
Modified: trunk/scipy/sandbox/timeseries/__init__.py
===================================================================
--- trunk/scipy/sandbox/timeseries/__init__.py 2007-09-18 23:22:21 UTC (rev 3326)
+++ trunk/scipy/sandbox/timeseries/__init__.py 2007-09-19 00:51:14 UTC (rev 3327)
@@ -12,27 +12,22 @@
__date__ = '$Date$'
import const
-import tdates
-from tdates import *
+import dates
+from dates import *
import tseries
from tseries import *
-import tmulti
-from tmulti import *
-import reportlib
-
-from reportlib import *
+import trecords
+from trecords import *
+
+import report
+from report import *
+
import lib
from lib import filters, interpolate, moving_funcs
-
-__all__ = ['tdates', 'tseries','tmulti','reportlib','filters','interpolate']
-__all__ += tdates.__all__
-__all__ += tseries.__all__
-
-__all__ = ['const', 'tdates','tseries','tmulti','reportlib','filters',
+__all__ = ['const', 'dates','tseries','trecords','report','filters',
'interpolate', 'moving_funcs']
-__all__ += tdates.__all__
+__all__ += dates.__all__
__all__ += tseries.__all__
-__all__ += tmulti.__all__
-__all__ += reportlib.__all__
-
+__all__ += trecords.__all__
+__all__ += report.__all__
Copied: trunk/scipy/sandbox/timeseries/dates.py (from rev 3326, trunk/scipy/sandbox/timeseries/tdates.py)
Copied: trunk/scipy/sandbox/timeseries/extras.py (from rev 3319, trunk/scipy/sandbox/timeseries/textras.py)
Copied: trunk/scipy/sandbox/timeseries/report.py (from rev 3319, trunk/scipy/sandbox/timeseries/reportlib.py)
Deleted: trunk/scipy/sandbox/timeseries/reportlib.py
===================================================================
--- trunk/scipy/sandbox/timeseries/reportlib.py 2007-09-18 23:22:21 UTC (rev 3326)
+++ trunk/scipy/sandbox/timeseries/reportlib.py 2007-09-19 00:51:14 UTC (rev 3327)
@@ -1,538 +0,0 @@
-"""
-Reporting functions
-
-:author: Pierre GF Gerard-Marchant & Matt Knox
-:contact: pierregm_at_uga_dot_edu - mattknox_ca_at_hotmail_dot_com
-:version: $Id$
-
-Ideas borrowed from:
-
-- George Sakkis
- http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/267662
-
-- Mike Brown
- http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061
-
-:Examples:
-
- import numpy as np
- import timeseries as ts
- import maskedarray as ma
- from timeseries import Report, wrap_onspace
-
- series1 = ts.time_series(np.random.uniform(-100,100,15), start_date=ts.thisday('b')-15)
- series2 = ts.time_series(np.random.uniform(-100,100,13), start_date=ts.thisday('b')-10)
- series3 = ts.time_series(['string1', 'another string', 'yet another string']*3, start_date=ts.thisday('b')-10)
-
- darray = ts.date_array(start_date=ts.thisday('b')-8, end_date=ts.thisday('b')-3)
-
- txt_o = open('myfile.txt', 'w')
- html_o = open('myfile.html', 'w')
-
- # report containing only numerical series, showing 2 decimal places
- num_report = Report(series1, series2, fmtfunc=lambda x:'%.2f' % x)
-
- # report containing some string and numerical data
- mixed_report = Report(series1, series2, series3)
-
- # output a csv report suitable for excel to sys.stdout, show masked values as "N/A"
- num_report(delim=', ', mask_rep='N/A')
-
- # format one column one with 2 decimal places, and column two with 4.
- # Add a sum footer. Write the output to txt_o
- num_report(fmtfunc=[(lambda x:'%.2f' % x), (lambda x:'%.4f' % x)],
- footer_func=ma.sum, footer_label='sum', output=txt_o)
-
- # create an html table of the data over a specified range.
- # Wrap text in cells to width 10. Output to html_o
- html_o.write("<table>")
- mixed_report(series1, series2, series3, dates=darray,
- delim="</td><td>", prefix="<tr><td>", postfix="</td></tr>",
- wrapfunc=wrap_onspace(10, nls='<BR>'), output=html_o)
- html_o.write("</table>")
-
-"""
-__author__ = "Pierre GF Gerard-Marchant & Matt Knox ($Author$)"
-__version__ = '1.0'
-__revision__ = "$Revision$"
-__date__ = '$Date$'
-
-import sys
-import operator, types, copy
-import timeseries as ts
-import maskedarray as ma
-
-__all__ = [
- 'Report', 'wrap_onspace', 'wrap_onspace_strict',
- 'wrap_always']
-
-class fmtfunc_wrapper:
- """wraps a formatting function such that it handles masked values
-
-:IVariables:
- - `fmtfunc` : formatting function.
- - `mask_rep` : string to use for masked values
- """
- def __init__ (self, fmtfunc, mask_rep):
- if fmtfunc is None:
- self.f = str
- else:
- self.f = fmtfunc
- self.mr = mask_rep
-
- def __call__ (self, item):
- "Execute the call behavior."
-
- if hasattr(item, "_mask") and isinstance(item._mask, bool) and item._mask:
- return self.mr
- else:
- return self.f(item)
-
-
-_default_options = {
- 'dates':None,
- 'header_row':None,
- 'header_char':'-',
- 'header_justify':None,
- 'row_char':None,
- 'footer_label':None,
- 'footer_char':'-',
- 'footer_func':None,
- 'delim':' | ',
- 'justify':None,
- 'prefix':'',
- 'postfix':'',
- 'mask_rep':'--',
- 'datefmt':None,
- 'fmtfunc':str,
- 'wrapfunc':lambda x:x,
- 'col_width':None,
- 'nls':'\n',
- 'output':sys.stdout,
- 'fixed_width':True
-}
-
-class Report(object):
- """Create a tabular TimeSeries report with dates in the left column.
-All instance variables are optional and simply serve as the defaults when calling
-the report. Parameters for calling the report are the exact same as for
-initialization. When calling the report, new options specified will not be saved
-to the instance.
-
-:IVariables:
- - `*tseries` : time series objects. Must all be at the same frequency, but
- do not need to be aligned.
-
- - `dates` (DateArray, *[None]*) : dates at which values of all the series
- will be output. If not specified, data will be output from the minimum
- start_date to the maximum end_date of all the time series objects
-
- - `header_row` (list, *[None]*) : List of column headers. Specifying
- the header for the date column is optional.
-
- - `header_char` (string, *['-']*): Character to be used for the row separator
- line between the header and first row of data. None for no separator. This
- is ignored if `header_row` is None.
-
- - `header_justify` (List of strings or single string, *[None]*) : Determines
- how headers are justified. If not specified, all headers are left justified.
- If a string is specified, it must be one of 'left', 'right', or 'center'
- and all headers will be justified the same way. If a list is specified, each
- header will be justified according to the specification for that header in
- the list. Specifying the justification for the date column is header is
- optional.
-
- - `row_char` (string, *[None]*): Character to be used for the row separator
- line between each row of data. None for no separator
-
- - `footer_func` (List of functions or single function, *[None]*) : A function or
- list of functions for summarizing each data column in the report. For example,
- ma.sum to get the sum of the column. If a list of functions is provided
- there must be exactly one function for each column. Do not specify a function
- for the Date column.
-
- - `footer_char` (string, *['-']*): Character to be used for the row separator
- line between the last row of data and the footer. None for no separator. This
- is ignored if `footer_func` is None.
-
- - `footer_label` (string, *[None]*) : label for the footer row. This goes at the
- end of the date column. This is ignored if footer_func is None.
-
- - `justify` (List of strings or single string, *[None]*) : Determines how data
- are justified in their column. If not specified, the date column and string
- columns are left justified, and everything else is right justified. If a
- string is specified, it must be one of 'left', 'right', or 'center' and all
- columns will be justified the same way. If a list is specified, each column
- will be justified according to the specification for that column in the list
- Specifying the justification for the date column is optional.
-
- - `prefix` (string, *['']*) : A string prepended to each printed row.
-
- - `postfix` (string, *['']*) : A string appended to each printed row.
-
- - `mask_rep` (string, *['--']*): String used to represent masked values in
- output
-
- - `datefmt` (string, *[None]*) : Formatting string used for displaying the
- dates in the date column. If None, str() is simply called on the dates
-
- - `fmtfunc` (List of functions or single function, *[None]*) : A function or
- list of functions for formatting each data column in the report. If not
- specified, str() is simply called on each item. If a list of functions is
- provided, there must be exactly one function for each column. Do not specify
- a function for the Date column, that is handled by the datefmt argument
-
- - `wrapfunc` (List of functions or single function, *[lambda x:x]*): A function
- f(text) for wrapping text; each element in the column is first wrapped by this
- function. Instances of wrap_onspace, wrap_onspace_strict, and wrap_always
- (which are part of this module) work well for this. Eg. wrapfunc=wrap_onspace(10)
- If a list is specified, each column will be wrapped according to the
- specification for that column in the list. Specifying a function for the Date
- column is optional
-
- - `col_width` (list of integers or single integer, *[None]*): use this to specify
- a width for all columns (single integer), or each column individually (list
- of integers). The column will be at least as wide as col_width, but may be
- larger if cell contents exceed col_width. If specifying a list, you may
- optionally specify the width for the Date column as the first entry
-
- - `output` (buffer, *[sys.stdout]*): `output` must have a write method.
-
- - `fixed_width` (boolean, *[True]*): If True, columns are fixed width (ie.
- cells will be padded with spaces to ensure all cells in a given column are
- the same width). If False, `col_width` will be ignored and cells will not
- be padded."""
-
- def __init__(self, *tseries, **kwargs):
-
- self.options = {}
- self.tseries = None
- if len(tseries) > 0:
- self.tseries = tseries
- self.options = self.__make_dict(**kwargs)
-
- def __make_dict(self, **kwargs):
-
- option_dict = copy.copy(self.options)
-
- option_list = list(_default_options)
-
- for x in [kw for kw in kwargs if kw in option_list]:
- option_dict[x] = kwargs.pop(x)
-
- if len(kwargs) > 0:
- raise KeyError("Unrecognized keyword(s): %s" % (", ".join(kwargs.keys())))
-
- return option_dict
-
- def set_series(self, *tseries):
- """set new time series for the report
-
-:Paramaters:
- - `*tseries` : the TimeSeries objects to be used in the report"""
- self.tseries = tseries
-
- def set_options(self, **kwargs):
- """set new options or modify options in the report
-
-:Paramaters:
- - `**kwargs` : the options to be used in the report. See the __doc__
- string for the Report class for valid options"""
- self.options = self.__make_dict(**kwargs)
-
-
- def __call__(self, *tseries, **kwargs):
- """generate a report
-
-:Paramaters:
- - `*tseries` : the TimeSeries objects to be used in the report. If
- omitted, the previously set TimeSeries objects will be used
- - `**kwargs` : the options to be used in the report. See the __doc__
- string for the Report class for valid options. If omitted, the
- previously set options will be used"""
-
- option_dict = self.__make_dict(**kwargs)
- if len(tseries) == 0:
- tseries = self.tseries
-
- def option(kw):
- return option_dict.get(kw, _default_options[kw])
-
- dates = option('dates')
- header_row = option('header_row')
- header_char = option('header_char')
- header_justify = option('header_justify')
- row_char = option('row_char')
- footer_label = option('footer_label')
- footer_char = option('footer_char')
- footer_func = option('footer_func')
- delim = option('delim')
- justify = option('justify')
- prefix = option('prefix')
- postfix = option('postfix')
- mask_rep = option('mask_rep')
- datefmt = option('datefmt')
- fmtfunc = option('fmtfunc')
- wrapfunc = option('wrapfunc')
- col_width = option('col_width')
- nls=option('nls')
- output=option('output')
- fixed_width=option('fixed_width')
-
- if header_row is not None:
- has_header=True
- if len(header_row) == len(tseries)+1:
- # label for date column included
- rows = [header_row]
- elif len(header_row) == len(tseries):
- # label for date column not included
- rows = [['']+header_row]
- else:
- raise ValueError("mismatch with number of headers and series")
- else:
- has_header=False
- rows=[]
-
- if fixed_width:
-
- def _standardize_justify(userspec):
- if isinstance(userspec, str):
- # justify all columns the the same way
- return [userspec for x in range(len(tseries)+1)]
- elif isinstance(userspec, list):
- if len(userspec) == len(tseries):
- # justification for date column not included, so set that
- # to left by default
- return ['left'] + userspec
- else:
- raise ValueError("invalid `justify` specification")
-
- if justify is not None:
- justify = _standardize_justify(justify)
- else:
- # default column justification
- justify = ['left']
- for ser in tseries:
- if ser.dtype.char in 'SUO': justify.append('left')
- else: justify.append('right')
-
-
- if header_justify is not None:
- header_justify = _standardize_justify(header_justify)
- else:
- # default column justification
- header_justify = ['left' for x in range(len(tseries)+1)]
- else:
- justify = [None for x in range(len(tseries)+1)]
-
- if datefmt is None:
- def datefmt_func(date): return str(date)
- else:
- def datefmt_func(date): return date.strfmt(datefmt)
-
- if dates is None:
- tseries = ts.align_series(*tseries)
- dates = ts.date_array(start_date=tseries[0].start_date,
- end_date=tseries[0].end_date)
- else:
- tseries = ts.align_series(start_date=dates[0], end_date=dates[-1], *tseries)
-
- if isinstance(fmtfunc, list):
- fmtfunc = [fmtfunc_wrapper(f, mask_rep) for f in fmtfunc]
- else:
- fmtfunc = [fmtfunc_wrapper(fmtfunc, mask_rep)]*len(tseries)
-
- def wrapfunc_default(func):
- if func is None: return lambda x:x
- else: return func
-
- if isinstance(wrapfunc, list):
- if len(wrapfunc) == len(tseries):
- wrapfunc = [lambda x: x] + wrapfunc
- wrapfunc = [wrapfunc_default(func) for func in wrapfunc]
- else:
- wrapfunc = [wrapfunc_default(wrapfunc) for x in range(len(tseries)+1)]
-
-
- if isinstance(col_width, list):
- if len(col_width) == len(tseries):
- col_width = [None] + col_width
- else:
- col_width = [col_width for x in range(len(tseries)+1)]
-
- def getval(series, date):
- try:
- val = series[date]
- except IndexError:
- val = ma.masked
- return val
-
- for d in dates:
- rows.append([datefmt_func(d)]+[fmtfunc[i](getval(ser, d)) for i, ser in enumerate(tseries)])
-
- if footer_func is not None:
- has_footer=True
- if not isinstance(footer_func, list):
- footer_func = [footer_func]*len(tseries)
-
- if footer_label is None: footer_label = ['']
- else: footer_label = [footer_label]
-
- footer_data = []
- for i, ser in enumerate(tseries):
- if footer_func[i] is None:
- footer_data.append('')
- else:
- footer_data.append(fmtfunc[i](footer_func[i](ser[dates])))
-
- rows.append(footer_label + footer_data)
- else:
- has_footer=False
-
-
- def rowWrapper(row):
- newRows = [wrapfunc[i](item).split('\n') for i, item in enumerate(row)]
- return [[(substr or '') for substr in item] for item in map(None,*newRows)]
- # break each logical row into one or more physical ones
- logicalRows = [rowWrapper(row) for row in rows]
- numLogicalRows = len(logicalRows)
- # columns of physical rows
- columns = map(None,*reduce(operator.add,logicalRows))
- numCols = len(columns)
- colNums = list(range(numCols))
-
- # get the maximum of each column by the string length of its items
- maxWidths = [max(col_width[i], *[len(str(item)) for item in column])
- for i, column in enumerate(columns)]
-
- def getSeparator(char, separate):
- if char is not None and separate:
- return char * (len(prefix) + len(postfix) + sum(maxWidths) + \
- len(delim)*(len(maxWidths)-1))
- else:
- return None
-
- header_separator = getSeparator(header_char, has_header)
- footer_separator = getSeparator(footer_char, has_footer)
- row_separator = getSeparator(row_char, True)
-
- # select the appropriate justify method
- justify_funcs = {'center':str.center, 'right':str.rjust, 'left':str.ljust,
- 'none':(lambda text, width: text)}
-
- if has_header and has_footer:
- data_start = 1
- data_end = numLogicalRows-3
- elif has_header:
- data_start = 1
- data_end = numLogicalRows-2
- elif has_footer:
- data_start = 0
- data_end = numLogicalRows-3
- else:
- data_start = 0
- data_end = numLogicalRows-2
-
- for rowNum, physicalRows in enumerate(logicalRows):
- for row in physicalRows:
- if rowNum == 0 and header_separator:
- _justify = header_justify
- else:
- _justify = justify
-
- output.write(prefix \
- + delim.join([justify_funcs[str(_justify[colNum]).lower()](str(item),width) for (colNum,item,width) in zip(colNums,row,maxWidths)]) \
- + postfix + nls)
-
- if row_separator and (data_start <= rowNum <= data_end):
- output.write(row_separator + nls)
- elif header_separator and rowNum < data_start:
- output.write(header_separator + nls)
- elif footer_separator and rowNum == data_end + 1:
- output.write(footer_separator + nls)
-
-
-class wrap_onspace(object):
- """A callable word-wrap class that preserves existing line breaks
-and most spaces in the text.
-
-:IVariables:
- - `width` (int): width to wrap at. Won't split up words wider than `width`
- - `nls` (str, *['\n']*): New line separator. Assumes existing line
- breaks use this new line separator as well.
-
-:Parameters (for __call__ method):
- - `text` (str): text to wrap"""
-
- def __init__(self, width, nls='\n'):
- self.width = width
- self.nls = nls
-
- def __call__(self, text):
-
- width = self.width
- nls = self.nls
-
- def break_or_space(line, word, width):
- temp_idx = (len(line[line.rfind(nls)+1:]) + len(word.split(nls,1)[0]) >= width)
- if temp_idx:
- return nls
- else:
- return ' '
-
- return reduce(lambda line, word, width=width: '%s%s%s' %
- (line,
- break_or_space(line, word, width),
- word),
- text.split(' ')
- )
-
-
-import re
-class wrap_onspace_strict(object):
- """A callable word-wrap class similar to wrap_onspace, but
-enforces the width constraint: words longer than width are split.
-
-:IVariables:
- - `width` (int): width to wrap at. Will split up words wider than `width`
- - `nls` (str, *['\n']*): New line separator. Assumes existing line
- breaks use this new line separator as well.
-
-:Parameters (for __call__ method):
- - `text` (str): text to wrap"""
-
- def __init__(self, width, nls='\n'):
- self.width = width
- self.nls = nls
-
- def __call__(self, text):
-
- width = self.width
- nls = self.nls
-
- wordRegex = re.compile(r'\S{'+str(width)+r',}')
- return wrap_onspace(wordRegex.sub(lambda m: wrap_always(m.group(),width, nls=nls),text),width, nls=nls)
-
-
-import math
-class wrap_always(object):
- """A callable word-wrap class that wraps text on exactly width
-characters. It doesn't split the text into words.
-
-:IVariables:
- - `width` (int): width to wrap at.
- - `nls` (str, *['\n']*): New line separator.
-
-:Parameters (for __call__ method):
- - `text` (str): text to wrap"""
-
- def __init__(self, width, nls='\n'):
- self.width = width
- self.nls = nls
-
- def __call__(self, text):
-
- width = self.width
- nls = self.nls
- return nls.join([ text[width*i:width*(i+1)] \
- for i in xrange(int(math.ceil(1.*len(text)/width))) ])
Deleted: trunk/scipy/sandbox/timeseries/tdates.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tdates.py 2007-09-18 23:22:21 UTC (rev 3326)
+++ trunk/scipy/sandbox/timeseries/tdates.py 2007-09-19 00:51:14 UTC (rev 3327)
@@ -1,714 +0,0 @@
-"""
-Classes definition for the support of individual dates and array of dates.
-
-:author: Pierre GF Gerard-Marchant & Matt Knox
-:contact: pierregm_at_uga_dot_edu - mattknox_ca_at_hotmail_dot_com
-:version: $Id$
-"""
-__author__ = "Pierre GF Gerard-Marchant & Matt Knox ($Author$)"
-__version__ = '1.0'
-__revision__ = "$Revision$"
-__date__ = '$Date$'
-
-import datetime as dt
-
-import itertools
-import warnings
-import types
-
-
-import numpy
-from numpy import bool_, float_, int_, object_
-from numpy import ndarray
-import numpy.core.numeric as numeric
-import numpy.core.fromnumeric as fromnumeric
-import numpy.core.numerictypes as ntypes
-from numpy.core.numerictypes import generic
-
-import maskedarray as MA
-
-from parser import DateFromString, DateTimeFromString
-
-import const as _c
-import cseries
-
-# initialize python callbacks for C code
-cseries.set_callback_DateFromString(DateFromString)
-cseries.set_callback_DateTimeFromString(DateTimeFromString)
-
-from cseries import Date, thisday, check_freq, check_freq_str, get_freq_group,\
- DateCalc_Error, DateCalc_RangeError
-today = thisday
-
-__all__ = [
-'Date', 'DateArray','isDate','isDateArray',
-'DateError', 'ArithmeticDateError', 'FrequencyDateError','InsufficientDateError',
-'datearray','date_array', 'date_array_fromlist', 'date_array_fromrange',
-'day_of_week','day_of_year','day','month','quarter','year','hour','minute',
-'second','thisday','today','prevbusday','period_break', 'check_freq',
-'check_freq_str','get_freq_group', 'DateCalc_Error', 'DateCalc_RangeError'
- ]
-
-
-#####---------------------------------------------------------------------------
-#---- --- Date Exceptions ---
-#####---------------------------------------------------------------------------
-class DateError(Exception):
- "Defines a generic DateArrayError."
- def __init__ (self, value=None):
- "Creates an exception."
- self.value = value
- def __str__(self):
- "Calculates the string representation."
- return str(self.value)
- __repr__ = __str__
-
-class InsufficientDateError(DateError):
- """Defines the exception raised when there is not enough information
- to create a Date object."""
- def __init__(self, msg=None):
- if msg is None:
- msg = "Insufficient parameters given to create a date at the given frequency"
- DateError.__init__(self, msg)
-
-class FrequencyDateError(DateError):
- """Defines the exception raised when the frequencies are incompatible."""
- def __init__(self, msg, freql=None, freqr=None):
- msg += " : Incompatible frequencies!"
- if not (freql is None or freqr is None):
- msg += " (%s<>%s)" % (freql, freqr)
- DateError.__init__(self, msg)
-
-class ArithmeticDateError(DateError):
- """Defines the exception raised when dates are used in arithmetic expressions."""
- def __init__(self, msg=''):
- msg += " Cannot use dates for arithmetics!"
- DateError.__init__(self, msg)
-
-
-#####---------------------------------------------------------------------------
-#---- --- Functions ---
-#####---------------------------------------------------------------------------
-
-def prevbusday(day_end_hour=18, day_end_min=0):
- """Returns the previous business day (Monday-Friday) at business frequency.
-
-:Parameters:
- - day_end_hour : (int, *[18]* )
- - day_end_min : (int, *[0]*)
-
-:Return values:
- If it is currently Saturday or Sunday, then the preceding Friday will be
- returned. If it is later than the specified day_end_hour and day_end_min,
- thisday('b') will be returned. Otherwise, thisday('b')-1 will be returned.
-"""
- tempDate = dt.datetime.now()
- dateNum = tempDate.hour + float(tempDate.minute)/60
- checkNum = day_end_hour + float(day_end_min)/60
- if dateNum < checkNum:
- return thisday(_c.FR_BUS) - 1
- else:
- return thisday(_c.FR_BUS)
-
-
-def isDate(data):
- "Returns whether `data` is an instance of Date."
- return isinstance(data, Date) or \
- (hasattr(data,'freq') and hasattr(data,'value'))
-
-
-#####---------------------------------------------------------------------------
-#---- --- DateArray ---
-#####---------------------------------------------------------------------------
-ufunc_dateOK = ['add','subtract',
- 'equal','not_equal','less','less_equal', 'greater','greater_equal',
- 'isnan']
-
-class _datearithmetics(object):
- """Defines a wrapper for arithmetic methods.
-Instead of directly calling a ufunc, the corresponding method of the `array._data`
-object is called instead.
-If `asdates` is True, a DateArray object is returned , else a regular ndarray
-is returned.
- """
- def __init__ (self, methodname, asdates=True):
- """
-:Parameters:
- - `methodname` (String) : Method name.
- """
- self.methodname = methodname
- self._asdates = asdates
- self.__doc__ = getattr(methodname, '__doc__')
- self.obj = None
- #
- def __get__(self, obj, objtype=None):
- self.obj = obj
- return self
- #
- def __call__ (self, other, *args, **kwargs):
- "Execute the call behavior."
- instance = self.obj
- freq = instance.freq
- if 'context' not in kwargs:
- kwargs['context'] = 'DateOK'
- method = getattr(super(DateArray,instance), self.methodname)
- if isinstance(other, DateArray):
- if other.freq != freq:
- raise FrequencyDateError("Cannot operate on dates", \
- freq, other.freq)
- elif isinstance(other, Date):
- if other.freq != freq:
- raise FrequencyDateError("Cannot operate on dates", \
- freq, other.freq)
- other = other.value
- elif isinstance(other, ndarray):
- if other.dtype.kind not in ['i','f']:
- raise ArithmeticDateError
- if self._asdates:
- return instance.__class__(method(other, *args),
- freq=freq)
- else:
- return method(other, *args)
-
-class DateArray(ndarray):
- """Defines a ndarray of dates, as ordinals.
-
-When viewed globally (array-wise), DateArray is an array of integers.
-When viewed element-wise, DateArray is a sequence of dates.
-For example, a test such as :
->>> DateArray(...) = value
-will be valid only if value is an integer, not a Date
-However, a loop such as :
->>> for d in DateArray(...):
-accesses the array element by element. Therefore, `d` is a Date object.
- """
- def __new__(cls, dates=None, freq=None, copy=False):
- # Get the frequency ......
- if freq is None:
- _freq = getattr(dates, 'freq', _c.FR_UND)
- else:
- _freq = check_freq(freq)
- # Get the dates ..........
- _dates = numeric.array(dates, copy=copy, dtype=int_, subok=1)
- if _dates.ndim == 0:
- _dates.shape = (1,)
- _dates = _dates.view(cls)
- _dates.freq = _freq
- _dates._unsorted = None
- return _dates
-
- def __array_wrap__(self, obj, context=None):
- if context is None:
- return self
- elif context[0].__name__ not in ufunc_dateOK:
- raise ArithmeticDateError, "(function %s)" % context[0].__name__
-
- def __array_finalize__(self, obj):
- self.freq = getattr(obj, 'freq', _c.FR_UND)
- self._unsorted = getattr(obj,'_unsorted',None)
- self._cachedinfo = dict(toobj=None, tostr=None, toord=None,
- steps=None, full=None, hasdups=None)
- if hasattr(obj,'_cachedinfo'):
- self._cachedinfo.update(obj._cachedinfo)
- return
-
- def __getitem__(self, indx):
- reset_full = True
- # Determine what kind of index is used
- if isinstance(indx, Date):
- indx = self.find_dates(indx)
- reset_full = False
- elif numeric.asarray(indx).dtype.kind == 'O':
- try:
- indx = self.find_dates(indx)
- except AttributeError:
- pass
- # Select the data
- r = ndarray.__getitem__(self, indx)
- # Select the corresponding unsorted indices (if needed)
- if self._unsorted is not None:
- unsorted = self._unsorted[indx]
- # Case 1. A simple integer
- if isinstance(r, (generic, int)):
- return Date(self.freq, value=r)
- elif hasattr(r, 'size') and r.size == 1:
- # need to check if it has a size attribute for situations
- # like when the datearray is the data for a maskedarray
- # or some other subclass of ndarray with wierd getitem
- # behaviour
- return Date(self.freq, value=r.item())
- else:
- if hasattr(r, '_cachedinfo'):
- _cache = r._cachedinfo
- _cache.update(dict([(k,_cache[k][indx])
- for k in ('toobj', 'tostr', 'toord')
- if _cache[k] is not None]))
- _cache['steps'] = None
- if reset_full:
- _cache['full'] = None
- _cache['hasdups'] = None
- return r
-
- def __getslice__(self, i, j):
- r = ndarray.__getslice__(self, i, j)
- if hasattr(r, '_cachedinfo'):
- _cache = r._cachedinfo
- _cache.update(dict([(k,_cache[k][i:j])
- for k in ('toobj', 'tostr', 'toord')
- if _cache[k] is not None]))
- _cache['steps'] = None
- return r
-
- def __repr__(self):
- return ndarray.__repr__(self)[:-1] + \
- ",\n freq='%s')" % self.freqstr
- #......................................................
- __add__ = _datearithmetics('__add__', asdates=True)
- __radd__ = _datearithmetics('__add__', asdates=True)
- __sub__ = _datearithmetics('__sub__', asdates=True)
- __rsub__ = _datearithmetics('__rsub__', asdates=True)
- __le__ = _datearithmetics('__le__', asdates=False)
- __lt__ = _datearithmetics('__lt__', asdates=False)
- __ge__ = _datearithmetics('__ge__', asdates=False)
- __gt__ = _datearithmetics('__gt__', asdates=False)
- __eq__ = _datearithmetics('__eq__', asdates=False)
- __ne__ = _datearithmetics('__ne__', asdates=False)
- #......................................................
- @property
- def freqstr(self):
- "Returns the frequency string code."
- return check_freq_str(self.freq)
- @property
- def day(self):
- "Returns the day of month."
- return self.__getdateinfo__('D')
- @property
- def day_of_week(self):
- "Returns the day of week."
- return self.__getdateinfo__('W')
- @property
- def day_of_year(self):
- "Returns the day of year."
- return self.__getdateinfo__('R')
- @property
- def month(self):
- "Returns the month."
- return self.__getdateinfo__('M')
- @property
- def quarter(self):
- "Returns the quarter."
- return self.__getdateinfo__('Q')
- @property
- def year(self):
- "Returns the year."
- return self.__getdateinfo__('Y')
- @property
- def qyear(self):
- """For quarterly frequency dates, returns the year corresponding to the
-year end (start) month. When using QTR or QTR-E based quarterly
-frequencies, this is the fiscal year in a financial context.
-
-For non-quarterly dates, this simply returns the year of the date."""
-
- return self.__getdateinfo__('F')
- @property
- def second(self):
- "Returns the seconds."
- return self.__getdateinfo__('S')
- @property
- def minute(self):
- "Returns the minutes."
- return self.__getdateinfo__('T')
- @property
- def hour(self):
- "Returns the hour."
- return self.__getdateinfo__('H')
- @property
- def week(self):
- "Returns the week."
- return self.__getdateinfo__('I')
-
- days = day
- weekdays = day_of_week
- yeardays = day_of_year
- months = month
- quarters = quarter
- years = year
- seconds = second
- minutes = minute
- hours = hour
- weeks = week
-
- def __getdateinfo__(self, info):
- return numeric.asarray(cseries.DA_getDateInfo(numeric.asarray(self),
- self.freq, info,
- int(self.isfull())),
- dtype=int_)
- __getDateInfo = __getdateinfo__
- #.... Conversion methods ....................
- #
- def tovalue(self):
- "Converts the dates to integer values."
- return numeric.asarray(self)
- #
- def toordinal(self):
- "Converts the dates from values to ordinals."
- # Note: we better try to cache the result
- if self._cachedinfo['toord'] is None:
-# diter = (Date(self.freq, value=d).toordinal() for d in self)
- if self.freq == _c.FR_UND:
- diter = (d.value for d in self)
- else:
- diter = (d.toordinal() for d in self)
- toord = numeric.fromiter(diter, dtype=float_)
- self._cachedinfo['toord'] = toord
- return self._cachedinfo['toord']
- #
- def tostring(self):
- "Converts the dates to strings."
- # Note: we better cache the result
- if self._cachedinfo['tostr'] is None:
- firststr = str(self[0])
- if self.size > 0:
- ncharsize = len(firststr)
- tostr = numpy.fromiter((str(d) for d in self),
- dtype='|S%i' % ncharsize)
- else:
- tostr = firststr
- self._cachedinfo['tostr'] = tostr
- return self._cachedinfo['tostr']
- #
- def asfreq(self, freq=None, relation="AFTER"):
- "Converts the dates to another frequency."
- # Note: As we define a new object, we don't need caching
- if freq is None or freq == _c.FR_UND:
- return self
- tofreq = check_freq(freq)
- if tofreq == self.freq:
- return self
- _rel = relation.upper()[0]
- fromfreq = self.freq
- if fromfreq == _c.FR_UND:
- fromfreq = _c.FR_DAY
- new = cseries.DA_asfreq(numeric.asarray(self), fromfreq, tofreq, _rel)
- return DateArray(new, freq=freq)
-
- #......................................................
- def find_dates(self, *dates):
- "Returns the indices corresponding to given dates, as an array."
-
- #http://aspn.activestate.com/ASPN/Mail/Message/python-tutor/2302348
- def flatten_sequence(iterable):
- """Flattens a compound of nested iterables."""
- itm = iter(iterable)
- for elm in itm:
- if hasattr(elm,'__iter__') and not isinstance(elm, basestring):
- for f in flatten_sequence(elm):
- yield f
- else:
- yield elm
-
- def flatargs(*args):
- "Flattens the arguments."
- if not hasattr(args, '__iter__'):
- return args
- else:
- return flatten_sequence(args)
-
- ifreq = self.freq
- c = numpy.zeros(self.shape, bool_)
- for d in flatargs(*dates):
- if d.freq != ifreq:
- d = d.asfreq(ifreq)
- c += (self == d.value)
- c = c.nonzero()
- if fromnumeric.size(c) == 0:
- raise IndexError, "Date out of bounds!"
- return c
-
- def date_to_index(self, date):
- "Returns the index corresponding to one given date, as an integer."
- if self.isvalid():
- index = date.value - self[0].value
- if index < 0 or index > self.size:
- raise IndexError, "Date out of bounds!"
- return index
- else:
- index_asarray = (self == date.value).nonzero()
- if fromnumeric.size(index_asarray) == 0:
- raise IndexError, "Date out of bounds!"
- return index_asarray[0][0]
- #......................................................
- def get_steps(self):
- """Returns the time steps between consecutive dates.
- The timesteps have the same unit as the frequency of the series."""
- if self.freq == _c.FR_UND:
- warnings.warn("Undefined frequency: assuming integers!")
- if self._cachedinfo['steps'] is None:
- _cached = self._cachedinfo
- val = numeric.asarray(self).ravel()
- if val.size > 1:
- steps = val[1:] - val[:-1]
- if _cached['full'] is None:
- _cached['full'] = (steps.max() == 1)
- if _cached['hasdups'] is None:
- _cached['hasdups'] = (steps.min() == 0)
- else:
- _cached['full'] = True
- _cached['hasdups'] = False
- steps = numeric.array([], dtype=int_)
- self._cachedinfo['steps'] = steps
- return self._cachedinfo['steps']
-
- def has_missing_dates(self):
- "Returns whether the DateArray have missing dates."
- if self._cachedinfo['full'] is None:
- steps = self.get_steps()
- return not(self._cachedinfo['full'])
-
- def isfull(self):
- "Returns whether the DateArray has no missing dates."
- if self._cachedinfo['full'] is None:
- steps = self.get_steps()
- return self._cachedinfo['full']
-
- def has_duplicated_dates(self):
- "Returns whether the DateArray has duplicated dates."
- if self._cachedinfo['hasdups'] is None:
- steps = self.get_steps()
- return self._cachedinfo['hasdups']
-
- def isvalid(self):
- "Returns whether the DateArray is valid: no missing/duplicated dates."
- return (self.isfull() and not self.has_duplicated_dates())
- #......................................................
-
-#............................
-
-
-#####---------------------------------------------------------------------------
-#---- --- DateArray functions ---
-#####---------------------------------------------------------------------------
-def isDateArray(a):
- "Tests whether an array is a DateArray object."
- return isinstance(a,DateArray)
-
-def guess_freq(dates):
- """Tries to estimate the frequency of a list of dates, by checking the steps
- between consecutive dates The steps should be in days.
- Returns a frequency code (alpha character)."""
- ddif = numeric.asarray(numpy.diff(dates))
- ddif.sort()
- if ddif.size == 0:
- fcode = _c.FR_UND
- elif ddif[0] == ddif[-1] == 1.:
- fcode = _c.FR_DAY
- elif (ddif[0] == 1.) and (ddif[-1] == 3.):
- fcode = _c.FR_BUS
- elif (ddif[0] > 3.) and (ddif[-1] == 7.):
- fcode = _c.FR_WK
- elif (ddif[0] >= 28.) and (ddif[-1] <= 31.):
- fcode = _c.FR_MTH
- elif (ddif[0] >= 90.) and (ddif[-1] <= 92.):
- fcode = _c.FR_QTR
- elif (ddif[0] >= 365.) and (ddif[-1] <= 366.):
- fcode = _c.FR_ANN
- elif numpy.abs(24.*ddif[0] - 1) <= 1e-5 and \
- numpy.abs(24.*ddif[-1] - 1) <= 1e-5:
- fcode = _c.FR_HR
- elif numpy.abs(1440.*ddif[0] - 1) <= 1e-5 and \
- numpy.abs(1440.*ddif[-1] - 1) <= 1e-5:
- fcode = _c.FR_MIN
- elif numpy.abs(86400.*ddif[0] - 1) <= 1e-5 and \
- numpy.abs(86400.*ddif[-1] - 1) <= 1e-5:
- fcode = _c.FR_SEC
- else:
- warnings.warn("Unable to estimate the frequency! %.3f<>%.3f" %\
- (ddif[0], ddif[-1]))
- fcode = _c.FR_UND
- return fcode
-
-
-def _listparser(dlist, freq=None):
- "Constructs a DateArray from a list."
- dlist = numeric.asarray(dlist)
- idx = dlist.argsort()
- dlist = dlist[idx]
- if dlist.ndim == 0:
- dlist.shape = (1,)
- # Case #1: dates as strings .................
- if dlist.dtype.kind in 'SU':
- #...construct a list of ordinals
- ords = numpy.fromiter((DateTimeFromString(s).toordinal() for s in dlist),
- float_)
- ords += 1
- #...try to guess the frequency
- if freq is None or freq == _c.FR_UND:
- freq = guess_freq(ords)
- #...construct a list of dates
- for s in dlist:
- x = Date(freq, string=s)
- dates = [Date(freq, string=s) for s in dlist]
- # Case #2: dates as numbers .................
- elif dlist.dtype.kind in 'if':
- #...hopefully, they are values
- if freq is None or freq == _c.FR_UND:
- freq = guess_freq(dlist)
- dates = dlist
- # Case #3: dates as objects .................
- elif dlist.dtype.kind == 'O':
- template = dlist[0]
- #...as Date objects
- if isinstance(template, Date):
- dates = numpy.fromiter((d.value for d in dlist), int_)
- #...as mx.DateTime objects
- elif hasattr(template,'absdays'):
- # no freq given: try to guess it from absdays
- if freq == _c.FR_UND:
- ords = numpy.fromiter((s.absdays for s in dlist), float_)
- ords += 1
- freq = guess_freq(ords)
- dates = [Date(freq, datetime=m) for m in dlist]
- #...as datetime objects
- elif hasattr(template, 'toordinal'):
- ords = numpy.fromiter((d.toordinal() for d in dlist), float_)
- if freq == _c.FR_UND:
- freq = guess_freq(ords)
- dates = [Date(freq, datetime=dt.datetime.fromordinal(a)) for a in ords]
- #
- result = DateArray(dates, freq)
- result._unsorted = idx
- return result
-
-
-def date_array(dlist=None, start_date=None, end_date=None, length=None,
- freq=None):
- """Constructs a DateArray from:
- - a starting date and either an ending date or a given length.
- - a list of dates.
- """
- freq = check_freq(freq)
- # Case #1: we have a list ...................
- if dlist is not None:
- # Already a DateArray....................
- if isinstance(dlist, DateArray):
- if (freq != _c.FR_UND) and (dlist.freq != check_freq(freq)):
- return dlist.asfreq(freq)
- else:
- return dlist
- # Make sure it's a sequence, else that's a start_date
- if hasattr(dlist,'__len__'):
- return _listparser(dlist, freq)
- elif start_date is not None:
- if end_date is not None:
- dmsg = "What starting date should be used ? '%s' or '%s' ?"
- raise DateError, dmsg % (dlist, start_date)
- else:
- (start_date, end_date) = (dlist, start_date)
- else:
- start_date = dlist
- # Case #2: we have a starting date ..........
- if start_date is None:
- if length == 0:
- return DateArray([], freq=freq)
- raise InsufficientDateError
- if not isDate(start_date):
- dmsg = "Starting date should be a valid Date instance! "
- dmsg += "(got '%s' instead)" % type(start_date)
- raise DateError, dmsg
- # Check if we have an end_date
- if end_date is None:
- if length is None:
-# raise ValueError,"No length precised!"
- length = 1
- else:
- if not isDate(end_date):
- raise DateError, "Ending date should be a valid Date instance!"
- length = int(end_date - start_date) + 1
-# dlist = [(start_date+i).value for i in range(length)]
- dlist = numeric.arange(length, dtype=int_)
- dlist += start_date.value
- if freq == _c.FR_UND:
- freq = start_date.freq
- return DateArray(dlist, freq=freq)
-datearray = date_array
-
-def date_array_fromlist(dlist, freq=None):
- "Constructs a DateArray from a list of dates."
- return date_array(dlist=dlist, freq=freq)
-
-def date_array_fromrange(start_date, end_date=None, length=None,
- freq=None):
- """Constructs a DateArray from a starting date and either an ending date or
- a length."""
- return date_array(start_date=start_date, end_date=end_date,
- length=length, freq=freq)
-
-#####---------------------------------------------------------------------------
-#---- --- Definition of functions from the corresponding methods ---
-#####---------------------------------------------------------------------------
-class _frommethod(object):
- """Defines functions from existing MaskedArray methods.
-:ivar _methodname (String): Name of the method to transform.
- """
- def __init__(self, methodname):
- self._methodname = methodname
- self.__doc__ = self.getdoc()
- def getdoc(self):
- "Returns the doc of the function (from the doc of the method)."
- try:
- return getattr(DateArray, self._methodname).__doc__
- except AttributeError:
- return "???"
- #
- def __call__(self, caller, *args, **params):
- if hasattr(caller, self._methodname):
- method = getattr(caller, self._methodname)
- # If method is not callable, it's a property, and don't call it
- if hasattr(method, '__call__'):
- return method.__call__(*args, **params)
- return method
- method = getattr(fromnumeric.asarray(caller), self._methodname)
- try:
- return method(*args, **params)
- except SystemError:
- return getattr(numpy,self._methodname).__call__(caller, *args, **params)
-#............................
-day_of_week = _frommethod('day_of_week')
-day_of_year = _frommethod('day_of_year')
-year = _frommethod('year')
-quarter = _frommethod('quarter')
-month = _frommethod('month')
-day = _frommethod('day')
-hour = _frommethod('hour')
-minute = _frommethod('minute')
-second = _frommethod('second')
-
-
-def period_break(dates, period):
- """Returns the indices where the given period changes.
-
-:Parameters:
- dates : DateArray
- Array of dates to monitor.
- period : string
- Name of the period to monitor.
- """
- current = getattr(dates, period)
- previous = getattr(dates-1, period)
- return (current - previous).nonzero()[0]
-
-
-################################################################################
-
-if __name__ == '__main__':
- import maskedarray.testutils
- from maskedarray.testutils import assert_equal
-
- if 1:
- dlist = ['2007-%02i' % i for i in range(1,5)+range(7,13)]
- mdates = date_array_fromlist(dlist, 'M')
-
- if 2:
- dlist = ['2007-01','2007-03','2007-04','2007-02']
- mdates = date_array_fromlist(dlist, 'M')
Deleted: trunk/scipy/sandbox/timeseries/tests/test_multitimeseries.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tests/test_multitimeseries.py 2007-09-18 23:22:21 UTC (rev 3326)
+++ trunk/scipy/sandbox/timeseries/tests/test_multitimeseries.py 2007-09-19 00:51:14 UTC (rev 3327)
@@ -1,189 +0,0 @@
-# pylint: disable-msg=W0611, W0612, W0511,R0201
-"""Tests suite for mrecarray.
-
-:author: Pierre Gerard-Marchant & Matt Knox
-:contact: pierregm_at_uga_dot_edu & mattknox_ca_at_hotmail_dot_com
-:version: $Id$
-"""
-__author__ = "Pierre GF Gerard-Marchant & Matt Knox ($Author$)"
-__version__ = '1.0'
-__revision__ = "$Revision$"
-__date__ = '$Date$'
-
-import types
-
-import numpy
-import numpy.core.fromnumeric as fromnumeric
-from numpy.testing import NumpyTest, NumpyTestCase
-from numpy.testing.utils import build_err_msg
-
-import maskedarray.testutils
-from maskedarray.testutils import assert_equal, assert_array_equal
-
-import maskedarray.core as MA
-import maskedarray.mrecords as MR
-from maskedarray.mrecords import addfield
-
-from maskedarray.core import getmaskarray, nomask, masked_array
-
-from timeseries import tmulti
-from timeseries.tmulti import MultiTimeSeries, TimeSeries,\
- fromarrays, fromtextfile, fromrecords, \
- date_array, time_series
-
-
-#..............................................................................
-class test_mrecords(NumpyTestCase):
- "Base test class for MaskedArrays."
- def __init__(self, *args, **kwds):
- NumpyTestCase.__init__(self, *args, **kwds)
- self.setup()
-
- def setup(self):
- "Generic setup"
- d = numpy.arange(5)
- m = MA.make_mask([1,0,0,1,1])
- base_d = numpy.r_[d,d[::-1]].reshape(2,-1).T
- base_m = numpy.r_[[m, m[::-1]]].T
- base = MA.array(base_d, mask=base_m)
- mrec = MR.fromarrays(base.T,)
- dlist = ['2007-%02i' % (i+1) for i in d]
- dates = date_array(dlist)
- ts = time_series(mrec,dates)
- mts = MultiTimeSeries(mrec,dates)
- self.data = [d, m, mrec, dlist, dates, ts, mts]
-
- def test_get(self):
- "Tests fields retrieval"
- [d, m, mrec, dlist, dates, ts, mts] = self.data
- assert(isinstance(mts['f0'], TimeSeries))
- assert_equal(mts['f0']._dates, dates)
- assert_equal(mts['f0']._data, d)
- assert_equal(mts['f0']._mask, m)
- #
- assert(isinstance(mts[0], MultiTimeSeries))
- assert_equal(mts._data[0], mrec._data[0])
- # We can't use assert_equal here, as it tries to convert the tuple into a singleton
-# assert(mts[0]._data.view(numpyndarray) == mrec[0])
- assert_equal(numpy.asarray(mts._data[0]), mrec[0])
- assert_equal(mts._dates[0], dates[0])
- assert_equal(mts[0]._dates, dates[0])
- #
- assert(isinstance(mts['2007-01'], MultiTimeSeries))
- assert(mts['2007-01']._data == mrec[0])
- assert_equal(mts['2007-01']._dates, dates[0])
- #
- assert(isinstance(mts.f0, TimeSeries))
- assert_equal(mts.f0, time_series(d, dates=dates, mask=m))
- assert_equal(mts.f1, time_series(d[::-1], dates=dates, mask=m[::-1]))
- assert((mts._fieldmask == numpy.core.records.fromarrays([m, m[::-1]])).all())
- assert_equal(mts._mask, numpy.r_[[m,m[::-1]]].all(0))
- assert_equal(mts.f0[1], mts[1].f0)
- #
- assert(isinstance(mts[:2], MultiTimeSeries))
- assert_equal(mts[:2]._data.f0, mrec[:2].f0)
- assert_equal(mts[:2]._data.f1, mrec[:2].f1)
- assert_equal(mts[:2]._dates, dates[:2])
-
- def test_set(self):
- "Tests setting fields/attributes."
- [d, m, mrec, dlist, dates, ts, mts] = self.data
- mts.f0._data[:] = 5
- assert_equal(mts['f0']._data, [5,5,5,5,5])
- mts.f0 = 1
- assert_equal(mts['f0']._data, [1]*5)
- assert_equal(getmaskarray(mts['f0']), [0]*5)
- mts.f1 = MA.masked
- assert_equal(mts.f1.mask, [1]*5)
- assert_equal(getmaskarray(mts['f1']), [1]*5)
- mts._mask = MA.masked
- assert_equal(getmaskarray(mts['f1']), [1]*5)
- assert_equal(mts['f0']._mask, mts['f1']._mask)
- mts._mask = MA.nomask
- assert_equal(getmaskarray(mts['f1']), [0]*5)
- assert_equal(mts['f0']._mask, mts['f1']._mask)
-
- def test_setslices(self):
- "Tests setting slices."
- [d, m, mrec, dlist, dates, ts, mts] = self.data
- #
- mts[:2] = 5
- assert_equal(mts.f0._data, [5,5,2,3,4])
- assert_equal(mts.f1._data, [5,5,2,1,0])
- assert_equal(mts.f0._mask, [0,0,0,1,1])
- assert_equal(mts.f1._mask, [0,0,0,0,1])
- mts.harden_mask()
- mts[-2:] = 5
- assert_equal(mts.f0._data, [5,5,2,3,4])
- assert_equal(mts.f1._data, [5,5,2,5,0])
- assert_equal(mts.f0._mask, [0,0,0,1,1])
- assert_equal(mts.f1._mask, [0,0,0,0,1])
-
- def test_hardmask(self):
- "Test hardmask"
- [d, m, mrec, dlist, dates, ts, mts] = self.data
- mts.harden_mask()
- assert(mts._hardmask)
- mts._mask = nomask
- assert_equal(mts._mask, numpy.r_[[m,m[::-1]]].all(0))
- mts.soften_mask()
- assert(not mts._hardmask)
- mts._mask = nomask
- assert(mts['f1']._mask is nomask)
- assert_equal(mts['f0']._mask,mts['f1']._mask)
-
- def test_addfield(self):
- "Tests addfield"
- [d, m, mrec, dlist, dates, ts, mts] = self.data
- mts = addfield(mts, masked_array(d+10, mask=m[::-1]))
- assert_equal(mts.f2, d+10)
- assert_equal(mts.f2._mask, m[::-1])
-
- def test_fromrecords(self):
- "Test from recarray."
- [d, m, mrec, dlist, dates, ts, mts] = self.data
- nrec = numpy.core.records.fromarrays(numpy.r_[[d,d[::-1]]])
- mrecfr = fromrecords(nrec.tolist(), dates=dates)
- assert_equal(mrecfr.f0, mrec.f0)
- assert_equal(mrecfr.dtype, mrec.dtype)
- #....................
- altrec = [tuple([d,]+list(r)) for (d,r) in zip(dlist,nrec)]
- mrecfr = fromrecords(altrec, names='dates,f0,f1')
- assert_equal(mrecfr.f0, mrec.f0)
- assert_equal(mrecfr.dtype, mrec.dtype)
- #....................
- tmp = MultiTimeSeries(mts._series[::-1], dates=mts.dates)
- mrecfr = fromrecords(tmp)
- assert_equal(mrecfr.f0, mrec.f0[::-1])
-
- def test_fromtextfile(self):
- "Tests reading from a text file."
- fcontent = """#
-'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)'
-'2007-01', 'strings',1,1.0,'mixed column',,1
-'2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1
-'2007-03', 'strings',3,3.0E5,3,,1
-'2007-05','strings',4,-1e-10,,,1
-"""
- import os
- from datetime import datetime
- fname = 'tmp%s' % datetime.now().strftime("%y%m%d%H%M%S%s")
- f = open(fname, 'w')
- f.write(fcontent)
- f.close()
- mrectxt = fromtextfile(fname,delimitor=',',varnames='ABCDEFG',
- dates_column=0)
- os.unlink(fname)
- #
- dlist = ['2007-%02i' % i for i in (1,2,3,5)]
- assert(isinstance(mrectxt, MultiTimeSeries))
- assert_equal(mrectxt._dates, date_array(dlist,'M'))
- assert_equal(mrectxt.dtype.names, ['B','C','D','E','F','G'])
- assert_equal(mrectxt.G, [1,1,1,1])
- assert_equal(mrectxt.F._mask, [1,1,1,1])
- assert_equal(mrectxt.D, [1,2,3.e+5,-1e-10])
-
-###############################################################################
-#------------------------------------------------------------------------------
-if __name__ == "__main__":
- NumpyTest().run()
\ No newline at end of file
Copied: trunk/scipy/sandbox/timeseries/tests/test_trecords.py (from rev 3319, trunk/scipy/sandbox/timeseries/tests/test_multitimeseries.py)
===================================================================
--- trunk/scipy/sandbox/timeseries/tests/test_multitimeseries.py 2007-09-18 15:04:50 UTC (rev 3319)
+++ trunk/scipy/sandbox/timeseries/tests/test_trecords.py 2007-09-19 00:51:14 UTC (rev 3327)
@@ -0,0 +1,189 @@
+# pylint: disable-msg=W0611, W0612, W0511,R0201
+"""Tests suite for trecords.
+
+:author: Pierre Gerard-Marchant & Matt Knox
+:contact: pierregm_at_uga_dot_edu & mattknox_ca_at_hotmail_dot_com
+:version: $Id$
+"""
+__author__ = "Pierre GF Gerard-Marchant & Matt Knox ($Author$)"
+__version__ = '1.0'
+__revision__ = "$Revision$"
+__date__ = '$Date$'
+
+import types
+
+import numpy
+import numpy.core.fromnumeric as fromnumeric
+from numpy.testing import NumpyTest, NumpyTestCase
+from numpy.testing.utils import build_err_msg
+
+import maskedarray.testutils
+from maskedarray.testutils import assert_equal, assert_array_equal
+
+import maskedarray.core as MA
+import maskedarray.mrecords as MR
+from maskedarray.mrecords import addfield
+
+from maskedarray.core import getmaskarray, nomask, masked_array
+
+from timeseries import trecords
+from timeseries.trecords import TimeSeriesRecords, TimeSeries,\
+ fromarrays, fromtextfile, fromrecords, \
+ date_array, time_series
+
+
+#..............................................................................
+class test_mrecords(NumpyTestCase):
+ "Base test class for MaskedArrays."
+ def __init__(self, *args, **kwds):
+ NumpyTestCase.__init__(self, *args, **kwds)
+ self.setup()
+
+ def setup(self):
+ "Generic setup"
+ d = numpy.arange(5)
+ m = MA.make_mask([1,0,0,1,1])
+ base_d = numpy.r_[d,d[::-1]].reshape(2,-1).T
+ base_m = numpy.r_[[m, m[::-1]]].T
+ base = MA.array(base_d, mask=base_m)
+ mrec = MR.fromarrays(base.T,)
+ dlist = ['2007-%02i' % (i+1) for i in d]
+ dates = date_array(dlist)
+ ts = time_series(mrec,dates)
+ mts = TimeSeriesRecords(mrec,dates)
+ self.data = [d, m, mrec, dlist, dates, ts, mts]
+
+ def test_get(self):
+ "Tests fields retrieval"
+ [d, m, mrec, dlist, dates, ts, mts] = self.data
+ assert(isinstance(mts['f0'], TimeSeries))
+ assert_equal(mts['f0']._dates, dates)
+ assert_equal(mts['f0']._data, d)
+ assert_equal(mts['f0']._mask, m)
+ #
+ assert(isinstance(mts[0], TimeSeriesRecords))
+ assert_equal(mts._data[0], mrec._data[0])
+ # We can't use assert_equal here, as it tries to convert the tuple into a singleton
+# assert(mts[0]._data.view(numpyndarray) == mrec[0])
+ assert_equal(numpy.asarray(mts._data[0]), mrec[0])
+ assert_equal(mts._dates[0], dates[0])
+ assert_equal(mts[0]._dates, dates[0])
+ #
+ assert(isinstance(mts['2007-01'], TimeSeriesRecords))
+ assert(mts['2007-01']._data == mrec[0])
+ assert_equal(mts['2007-01']._dates, dates[0])
+ #
+ assert(isinstance(mts.f0, TimeSeries))
+ assert_equal(mts.f0, time_series(d, dates=dates, mask=m))
+ assert_equal(mts.f1, time_series(d[::-1], dates=dates, mask=m[::-1]))
+ assert((mts._fieldmask == numpy.core.records.fromarrays([m, m[::-1]])).all())
+ assert_equal(mts._mask, numpy.r_[[m,m[::-1]]].all(0))
+ assert_equal(mts.f0[1], mts[1].f0)
+ #
+ assert(isinstance(mts[:2], TimeSeriesRecords))
+ assert_equal(mts[:2]._data.f0, mrec[:2].f0)
+ assert_equal(mts[:2]._data.f1, mrec[:2].f1)
+ assert_equal(mts[:2]._dates, dates[:2])
+
+ def test_set(self):
+ "Tests setting fields/attributes."
+ [d, m, mrec, dlist, dates, ts, mts] = self.data
+ mts.f0._data[:] = 5
+ assert_equal(mts['f0']._data, [5,5,5,5,5])
+ mts.f0 = 1
+ assert_equal(mts['f0']._data, [1]*5)
+ assert_equal(getmaskarray(mts['f0']), [0]*5)
+ mts.f1 = MA.masked
+ assert_equal(mts.f1.mask, [1]*5)
+ assert_equal(getmaskarray(mts['f1']), [1]*5)
+ mts._mask = MA.masked
+ assert_equal(getmaskarray(mts['f1']), [1]*5)
+ assert_equal(mts['f0']._mask, mts['f1']._mask)
+ mts._mask = MA.nomask
+ assert_equal(getmaskarray(mts['f1']), [0]*5)
+ assert_equal(mts['f0']._mask, mts['f1']._mask)
+
+ def test_setslices(self):
+ "Tests setting slices."
+ [d, m, mrec, dlist, dates, ts, mts] = self.data
+ #
+ mts[:2] = 5
+ assert_equal(mts.f0._data, [5,5,2,3,4])
+ assert_equal(mts.f1._data, [5,5,2,1,0])
+ assert_equal(mts.f0._mask, [0,0,0,1,1])
+ assert_equal(mts.f1._mask, [0,0,0,0,1])
+ mts.harden_mask()
+ mts[-2:] = 5
+ assert_equal(mts.f0._data, [5,5,2,3,4])
+ assert_equal(mts.f1._data, [5,5,2,5,0])
+ assert_equal(mts.f0._mask, [0,0,0,1,1])
+ assert_equal(mts.f1._mask, [0,0,0,0,1])
+
+ def test_hardmask(self):
+ "Test hardmask"
+ [d, m, mrec, dlist, dates, ts, mts] = self.data
+ mts.harden_mask()
+ assert(mts._hardmask)
+ mts._mask = nomask
+ assert_equal(mts._mask, numpy.r_[[m,m[::-1]]].all(0))
+ mts.soften_mask()
+ assert(not mts._hardmask)
+ mts._mask = nomask
+ assert(mts['f1']._mask is nomask)
+ assert_equal(mts['f0']._mask,mts['f1']._mask)
+
+ def test_addfield(self):
+ "Tests addfield"
+ [d, m, mrec, dlist, dates, ts, mts] = self.data
+ mts = addfield(mts, masked_array(d+10, mask=m[::-1]))
+ assert_equal(mts.f2, d+10)
+ assert_equal(mts.f2._mask, m[::-1])
+
+ def test_fromrecords(self):
+ "Test from recarray."
+ [d, m, mrec, dlist, dates, ts, mts] = self.data
+ nrec = numpy.core.records.fromarrays(numpy.r_[[d,d[::-1]]])
+ mrecfr = fromrecords(nrec.tolist(), dates=dates)
+ assert_equal(mrecfr.f0, mrec.f0)
+ assert_equal(mrecfr.dtype, mrec.dtype)
+ #....................
+ altrec = [tuple([d,]+list(r)) for (d,r) in zip(dlist,nrec)]
+ mrecfr = fromrecords(altrec, names='dates,f0,f1')
+ assert_equal(mrecfr.f0, mrec.f0)
+ assert_equal(mrecfr.dtype, mrec.dtype)
+ #....................
+ tmp = TimeSeriesRecords(mts._series[::-1], dates=mts.dates)
+ mrecfr = fromrecords(tmp)
+ assert_equal(mrecfr.f0, mrec.f0[::-1])
+
+ def test_fromtextfile(self):
+ "Tests reading from a text file."
+ fcontent = """#
+'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)'
+'2007-01', 'strings',1,1.0,'mixed column',,1
+'2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1
+'2007-03', 'strings',3,3.0E5,3,,1
+'2007-05','strings',4,-1e-10,,,1
+"""
+ import os
+ from datetime import datetime
+ fname = 'tmp%s' % datetime.now().strftime("%y%m%d%H%M%S%s")
+ f = open(fname, 'w')
+ f.write(fcontent)
+ f.close()
+ mrectxt = fromtextfile(fname,delimitor=',',varnames='ABCDEFG',
+ dates_column=0)
+ os.unlink(fname)
+ #
+ dlist = ['2007-%02i' % i for i in (1,2,3,5)]
+ assert(isinstance(mrectxt, TimeSeriesRecords))
+ assert_equal(mrectxt._dates, date_array(dlist,'M'))
+ assert_equal(mrectxt.dtype.names, ['B','C','D','E','F','G'])
+ assert_equal(mrectxt.G, [1,1,1,1])
+ assert_equal(mrectxt.F._mask, [1,1,1,1])
+ assert_equal(mrectxt.D, [1,2,3.e+5,-1e-10])
+
+###############################################################################
+#------------------------------------------------------------------------------
+if __name__ == "__main__":
+ NumpyTest().run()
\ No newline at end of file
Deleted: trunk/scipy/sandbox/timeseries/textras.py
===================================================================
--- trunk/scipy/sandbox/timeseries/textras.py 2007-09-18 23:22:21 UTC (rev 3326)
+++ trunk/scipy/sandbox/timeseries/textras.py 2007-09-19 00:51:14 UTC (rev 3327)
@@ -1,106 +0,0 @@
-"""
-Extras functions for time series.
-
-:author: Pierre GF Gerard-Marchant & Matt Knox
-:contact: pierregm_at_uga_dot_edu - mattknox_ca_at_hotmail_dot_com
-:version: $Id$
-"""
-__author__ = "Pierre GF Gerard-Marchant & Matt Knox ($Author$)"
-__version__ = '1.0'
-__revision__ = "$Revision$"
-__date__ = '$Date$'
-
-
-import numpy
-import maskedarray
-from maskedarray import masked
-
-import const as _c
-from tseries import TimeSeries
-
-
-
-__all__ = ['isleapyear', 'count_missing', 'accept_atmost_missing']
-
-#..............................................................................
-def isleapyear(year):
- """Returns true if year is a leap year.
-
-:Input:
- year : integer / sequence
- A given (list of) year(s).
- """
- year = numpy.asarray(year)
- return numpy.logical_or(year % 400 == 0,
- numpy.logical_and(year % 4 == 0, year % 100 > 0))
-
-#..............................................................................
-def count_missing(series):
- """Returns the number of missing data per period.
-
-
-Notes
------
-This function is designed to return the actual number of missing values when
-a series has been converted from one frequency to a smaller frequency.
-
-For example, converting a 12-month-long daily series to months will yield
-a (12x31) array, with missing values in February, April, June...
-count_missing will discard these extra missing values.
- """
- if not isinstance(series, TimeSeries):
- raise TypeError, "The input data should be a valid TimeSeries object! "\
- "(got %s instead)" % type(series)
- if series.ndim == 1:
- return len(series) - series.count()
- elif series.ndim != 2:
- raise NotImplementedError
- #
- missing = series.shape[-1] - series.count(axis=-1)
- period = series.shape[-1]
- freq = series.freq
- if (period == 366) and (freq//_c.FR_ANN == 1):
- # row: years, cols: days
- missing -= ~isleapyear(series.year)
- elif period == 31 and (freq//_c.FR_MTH == 1):
- months = series.months
- # row: months, cols: days
- missing[numpy.array([m in [4,6,9,11] for m in months])] -= 1
- isfeb = (months == 2)
- missing[isfeb] -= 2
- missing[isfeb & ~isleapyear(series.year)] -= 1
- elif period not in (12,7):
- raise NotImplementedError, "Not yet implemented for that frequency..."
- return missing
-
-#.............................................................................
-def accept_atmost_missing(series, max_missing, strict=False):
- """Masks the rows of the series that contains more than max_missing missing data.
- Returns a new masked series.
-
-:Inputs:
- series : TimeSeries
- Input time series.
- max_missing : float
- Number of maximum acceptable missing values per row (if larger than 1),
- or maximum acceptable percentage of missing values (if lower than 1).
- strict : boolean *[False]*
- Whether the
- """
- series = numpy.array(series, copy=True, subok=True)
- if not isinstance(series, TimeSeries):
- raise TypeError, "The input data should be a valid TimeSeries object! "\
- "(got %s instead)" % type(series)
- # Find the number of missing values ....
- missing = count_missing(series)
- # Transform an acceptable percentage in a number
- if max_missing < 1:
- max_missing = numpy.round(max_missing * series.shape[-1],0)
- #
- series.unshare_mask()
- if strict:
- series[missing > max_missing] = masked
- else:
- series[missing >= max_missing] = masked
- return series
-
\ No newline at end of file
Deleted: trunk/scipy/sandbox/timeseries/tmulti.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tmulti.py 2007-09-18 23:22:21 UTC (rev 3326)
+++ trunk/scipy/sandbox/timeseries/tmulti.py 2007-09-19 00:51:14 UTC (rev 3327)
@@ -1,524 +0,0 @@
-# pylint: disable-msg=W0201, W0212
-"""
-Support for multi-variable time series, through masked recarrays.
-
-:author: Pierre GF Gerard-Marchant & Matt Knox
-:contact: pierregm_at_uga_dot_edu - mattknox_ca_at_hotmail_dot_com
-:version: $Id$
-"""
-__author__ = "Pierre GF Gerard-Marchant & Matt Knox ($Author$)"
-__version__ = '1.0'
-__revision__ = "$Revision$"
-__date__ = '$Date$'
-
-
-import sys
-
-import numpy
-from numpy import bool_, complex_, float_, int_, str_, object_
-import numpy.core.fromnumeric as fromnumeric
-import numpy.core.numeric as numeric
-from numpy.core.numeric import ndarray
-import numpy.core.numerictypes as ntypes
-import numpy.core.umath as umath
-from numpy.core.defchararray import chararray
-from numpy.core.records import find_duplicate
-from numpy.core.records import format_parser, recarray, record
-from numpy.core.records import fromarrays as recfromarrays
-
-import maskedarray as MA
-#MaskedArray = MA.MaskedArray
-from maskedarray.core import MaskedArray, MAError, default_fill_value, \
- masked_print_option
-from maskedarray.core import masked, nomask, getmask, getmaskarray, make_mask,\
- make_mask_none, mask_or, masked_array, filled
-
-import maskedarray.mrecords as MR
-from maskedarray.mrecords import _checknames, _guessvartypes, openfile,\
- MaskedRecords
-from maskedarray.mrecords import fromrecords as mrecfromrecords
-
-from tseries import TimeSeries, time_series, _getdatalength
-from tdates import Date, DateArray, date_array
-
-#ndarray = numeric.ndarray
-_byteorderconv = numpy.core.records._byteorderconv
-_typestr = ntypes._typestr
-
-reserved_fields = MR.reserved_fields + ['_dates']
-
-import warnings
-
-__all__ = [
-'MultiTimeSeries','fromarrays','fromrecords','fromtextfile',
-]
-
-def _getformats(data):
- """Returns the formats of each array of arraylist as a comma-separated
- string."""
- if isinstance(data, record):
- return ",".join([desc[1] for desc in data.dtype.descr])
-
- formats = ''
- for obj in data:
- obj = numeric.asarray(obj)
-# if not isinstance(obj, ndarray):
-## if not isinstance(obj, ndarray):
-# raise ValueError, "item in the array list must be an ndarray."
- formats += _typestr[obj.dtype.type]
- if issubclass(obj.dtype.type, ntypes.flexible):
- formats += `obj.itemsize`
- formats += ','
- return formats[:-1]
-
-
-
-
-class MultiTimeSeries(TimeSeries, MaskedRecords, object):
- """
-
-:IVariables:
- - `__localfdict` : Dictionary
- Dictionary of local fields (`f0_data`, `f0_mask`...)
- - `__globalfdict` : Dictionary
- Dictionary of global fields, as the combination of a `_data` and a `_mask`.
- (`f0`)
- """
- _defaultfieldmask = nomask
- _defaulthardmask = False
- def __new__(cls, data, dates=None, mask=nomask, dtype=None,
- freq=None, observed=None, start_date=None,
- hard_mask=False, fill_value=None,
-# offset=0, strides=None,
- formats=None, names=None, titles=None,
- byteorder=None, aligned=False):
- tsoptions = dict(fill_value=fill_value, hard_mask=hard_mask,)
- mroptions = dict(fill_value=fill_value, hard_mask=hard_mask,
- formats=formats, names=names, titles=titles,
- byteorder=byteorder, aligned=aligned)
- #
- if isinstance(data, MultiTimeSeries):
-# if copy:
-# data = data.copy()
- data._hardmask = data._hardmask | hard_mask
- return data
- # .......................................
- _data = MaskedRecords(data, mask=mask, dtype=dtype, **mroptions).view(cls)
- if dates is None:
- length = _getdatalength(data)
- newdates = date_array(start_date=start_date, length=length,
- freq=freq)
- elif not hasattr(dates, 'freq'):
- newdates = date_array(dlist=dates, freq=freq)
- else:
- newdates = dates
- _data._dates = newdates
- _data._observed = observed
- cls._defaultfieldmask = _data._fieldmask
- #
- return _data
-
- def __array_finalize__(self,obj):
- if isinstance(obj, (MaskedRecords)):
- self.__dict__.update(_fieldmask=obj._fieldmask,
- _hardmask=obj._hardmask,
- _fill_value=obj._fill_value,
- _names = obj.dtype.names
- )
- if isinstance(obj, MultiTimeSeries):
- self.__dict__.update(observed=obj.observed,
- _dates=obj._dates)
- else:
- self.__dict__.update(observed=None,
- _dates=[])
- else:
- self.__dict__.update(_dates = [],
- observed=None,
- _fieldmask = nomask,
- _hardmask = False,
- fill_value = None,
- _names = self.dtype.names
- )
- return
-
-
- def _getdata(self):
- "Returns the data as a recarray."
- return self.view(recarray)
- _data = property(fget=_getdata)
-
- def _getseries(self):
- "Returns the data as a MaskedRecord array."
- return self.view(MaskedRecords)
- _series = property(fget=_getseries)
-
- #......................................................
- def __getattribute__(self, attr):
- getattribute = MaskedRecords.__getattribute__
- _dict = getattribute(self,'__dict__')
- if attr in _dict.get('_names',[]):
- obj = getattribute(self,attr).view(TimeSeries)
- obj._dates = _dict['_dates']
- return obj
- return getattribute(self,attr)
-
-
- def __setattr__(self, attr, val):
- newattr = attr not in self.__dict__
- try:
- # Is attr a generic attribute ?
- ret = object.__setattr__(self, attr, val)
- except:
- # Not a generic attribute: exit if it's not a valid field
- fielddict = self.dtype.names or {}
- if attr not in fielddict:
- exctype, value = sys.exc_info()[:2]
- raise exctype, value
- else:
- if attr not in list(self.dtype.names) + ['_dates','_mask']:
- return ret
- if newattr: # We just added this one
- try: # or this setattr worked on an internal
- # attribute.
- object.__delattr__(self, attr)
- except:
- return ret
- # Case #1.: Basic field ............
- base_fmask = self._fieldmask
- _names = self.dtype.names
- if attr in _names:
- fval = filled(val)
- mval = getmaskarray(val)
- if self._hardmask:
- mval = mask_or(mval, base_fmask.__getattr__(attr))
- self._data.__setattr__(attr, fval)
- base_fmask.__setattr__(attr, mval)
- return
- elif attr == '_mask':
- if self._hardmask:
- val = make_mask(val)
- if val is not nomask:
-# mval = getmaskarray(val)
- for k in _names:
- m = mask_or(val, base_fmask.__getattr__(k))
- base_fmask.__setattr__(k, m)
- else:
- mval = getmaskarray(val)
- for k in _names:
- base_fmask.__setattr__(k, mval)
- return
- #............................................
- def __getitem__(self, indx):
- """Returns all the fields sharing the same fieldname base.
- The fieldname base is either `_data` or `_mask`."""
- _localdict = self.__dict__
- # We want a field ........
- if indx in self.dtype.names:
- obj = self._data[indx].view(TimeSeries)
- obj._dates = _localdict['_dates']
- obj._mask = make_mask(_localdict['_fieldmask'][indx])
- return obj
- # We want some elements ..
- (sindx, dindx) = self._TimeSeries__checkindex(indx)
-# obj = numeric.array(self._data[sindx],
-# copy=False, subok=True).view(type(self))
- obj = numeric.array(self._data[sindx], copy=False, subok=True)
- obj = obj.view(type(self))
- obj.__dict__.update(_dates=_localdict['_dates'][dindx],
- _fieldmask=_localdict['_fieldmask'][sindx],
- _fill_value=_localdict['_fill_value'])
- return obj
-
- def __getslice__(self, i, j):
- """Returns the slice described by [i,j]."""
- _localdict = self.__dict__
- (si, di) = super(MultiTimeSeries, self)._TimeSeries__checkindex(i)
- (sj, dj) = super(MultiTimeSeries, self)._TimeSeries__checkindex(j)
- newdata = self._data[si:sj].view(type(self))
- newdata.__dict__.update(_dates=_localdict['_dates'][di:dj],
- _mask=_localdict['_fieldmask'][si:sj])
- return newdata
-
- def __setslice__(self, i, j, value):
- """Sets the slice described by [i,j] to `value`."""
- self.view(MaskedRecords).__setslice__(i,j,value)
- return
-
- #......................................................
- def __str__(self):
- """x.__str__() <==> str(x)
-Calculates the string representation, using masked for fill if it is enabled.
-Otherwise, fills with fill value.
- """
- if self.size > 1:
- mstr = ["(%s)" % ",".join([str(i) for i in s])
- for s in zip(*[getattr(self,f) for f in self.dtype.names])]
- return "[%s]" % ", ".join(mstr)
- else:
- mstr = numeric.asarray(self._data.item(), dtype=object_)
- mstr[list(self._fieldmask)] = masked_print_option
- return str(mstr)
-
- def __repr__(self):
- """x.__repr__() <==> repr(x)
-Calculates the repr representation, using masked for fill if it is enabled.
-Otherwise fill with fill value.
- """
- _names = self.dtype.names
- _dates = self._dates
- if numeric.size(_dates) > 2 and self._dates.isvalid():
- timestr = "[%s ... %s]" % (str(_dates[0]),str(_dates[-1]))
- else:
- timestr = str(_dates)
- fmt = "%%%is : %%s" % (max([len(n) for n in _names])+4,)
- reprstr = [fmt % (f,getattr(self,f)) for f in self.dtype.names]
- reprstr.insert(0,'multitimeseries(')
- reprstr.extend([fmt % ('dates', timestr),
- fmt % (' fill_value', self._fill_value),
- ' )'])
- return str("\n".join(reprstr))
- #.............................................
- def copy(self):
- "Returns a copy of the argument."
- _localdict = self.__dict__
- return MultiTimeSeries(_localdict['_data'].copy(),
- dates=_localdict['_dates'].copy(),
- mask=_localdict['_fieldmask'].copy(),
- dtype=self.dtype)
-
-
-#####---------------------------------------------------------------------------
-#---- --- Constructors ---
-#####---------------------------------------------------------------------------
-
-def fromarrays(arraylist, dates=None,
- dtype=None, shape=None, formats=None,
- names=None, titles=None, aligned=False, byteorder=None):
- """Creates a mrecarray from a (flat) list of masked arrays.
-
-:Parameters:
- - `arraylist` : Sequence
- A list of (masked) arrays. Each element of the sequence is first converted
- to a masked array if needed. If a 2D array is passed as argument, it is
- processed line by line
- - `dtype` : numeric.dtype
- Data type descriptor.
- - `shape` : Integer *[None]*
- Number of records. If None, `shape` is defined from the shape of the first
- array in the list.
- - `formats` :
- (Description to write)
- - `names` :
- (description to write)
- - `titles`:
- (Description to write)
- - `aligned`: Boolen *[False]*
- (Description to write, not used anyway)
- - `byteorder`: Boolen *[None]*
- (Description to write, not used anyway)
-
-
- """
- arraylist = [MA.asarray(x) for x in arraylist]
- # Define/check the shape.....................
- if shape is None or shape == 0:
- shape = arraylist[0].shape
- if isinstance(shape, int):
- shape = (shape,)
- # Define formats from scratch ...............
- if formats is None and dtype is None:
- formats = _getformats(arraylist)
- # Define the dtype ..........................
- if dtype is not None:
- descr = numeric.dtype(dtype)
- _names = descr.names
- else:
- parsed = format_parser(formats, names, titles, aligned, byteorder)
- _names = parsed._names
- descr = parsed._descr
- # Determine shape from data-type.............
- if len(descr) != len(arraylist):
- msg = "Mismatch between the number of fields (%i) and the number of "\
- "arrays (%i)"
- raise ValueError, msg % (len(descr), len(arraylist))
- d0 = descr[0].shape
- nn = len(d0)
- if nn > 0:
- shape = shape[:-nn]
- # Make sure the shape is the correct one ....
- for k, obj in enumerate(arraylist):
- nn = len(descr[k].shape)
- testshape = obj.shape[:len(obj.shape)-nn]
- if testshape != shape:
- raise ValueError, "Array-shape mismatch in array %d" % k
- # Reconstruct the descriptor, by creating a _data and _mask version
- return MultiTimeSeries(arraylist, dtype=descr)
-
-def __getdates(dates=None, newdates=None, length=None, freq=None,
- start_date=None):
- """Determines new dates (private function not meant to be used)."""
- if dates is None:
- if newdates is not None:
- if not hasattr(newdates, 'freq'):
- newdates = date_array(dlist=newdates, freq=freq)
- else:
- newdates = date_array(start_date=start_date, length=length,
- freq=freq)
- elif not hasattr(dates, 'freq'):
- newdates = date_array(dlist=dates, freq=freq)
- else:
- newdates = dates
- return newdates
-
-#..............................................................................
-def fromrecords(reclist, dates=None, freq=None, start_date=None,
- dtype=None, shape=None, formats=None, names=None,
- titles=None, aligned=False, byteorder=None):
- """Creates a MaskedRecords from a list of records.
-
- The data in the same field can be heterogeneous, they will be promoted
- to the highest data type. This method is intended for creating
- smaller record arrays. If used to create large array without formats
- defined, it can be slow.
-
- If formats is None, then this will auto-detect formats. Use a list of
- tuples rather than a list of lists for faster processing.
- """
- # reclist is in fact a mrecarray .................
- if isinstance(reclist, MultiTimeSeries):
- mdescr = reclist.dtype
- shape = reclist.shape
- return MultiTimeSeries(reclist, dtype=mdescr)
- # No format, no dtype: create from to arrays .....
- _data = mrecfromrecords(reclist, dtype=dtype, shape=shape, formats=formats,
- names=names, titles=titles, aligned=aligned,
- byteorder=byteorder)
- _dtype = _data.dtype
- # Check the names for a '_dates' .................
- newdates = None
- _names = list(_dtype.names)
- reserved = [n for n in _names if n.lower() in ['dates', '_dates']]
- if len(reserved) > 0:
- newdates = _data[reserved[-1]]
- [_names.remove(n) for n in reserved]
- _dtype = numeric.dtype([t for t in _dtype.descr \
- if t[0] not in reserved ])
- _data = [_data[n] for n in _names]
- #
- newdates = __getdates(dates=dates, newdates=newdates, length=len(_data),
- freq=freq, start_date=start_date)
- #
- return MultiTimeSeries(_data, dates=newdates, dtype=_dtype,
- names=_names)
-
-
-def fromtextfile(fname, delimitor=None, commentchar='#', missingchar='',
- dates_column=None, varnames=None, vartypes=None,
- dates=None):
- """Creates a multitimeseries from data stored in the file `filename`.
-
-:Parameters:
- - `filename` : file name/handle
- Handle of an opened file.
- - `delimitor` : Character *None*
- Alphanumeric character used to separate columns in the file.
- If None, any (group of) white spacestring(s) will be used.
- - `commentchar` : String *['#']*
- Alphanumeric character used to mark the start of a comment.
- - `missingchar` : String *['']*
- String indicating missing data, and used to create the masks.
- - `datescol` : Integer *[None]*
- Position of the columns storing dates. If None, a position will be
- estimated from the variable names.
- - `varnames` : Sequence *[None]*
- Sequence of the variable names. If None, a list will be created from
- the first non empty line of the file.
- - `vartypes` : Sequence *[None]*
- Sequence of the variables dtypes. If None, the sequence will be estimated
- from the first non-commented line.
-
-
- Ultra simple: the varnames are in the header, one line"""
- # Try to open the file ......................
- f = openfile(fname)
- # Get the first non-empty line as the varnames
- while True:
- line = f.readline()
- firstline = line[:line.find(commentchar)].strip()
- _varnames = firstline.split(delimitor)
- if len(_varnames) > 1:
- break
- if varnames is None:
- varnames = _varnames
- # Get the data ..............................
- _variables = MA.asarray([line.strip().split(delimitor) for line in f
- if line[0] != commentchar and len(line) > 1])
- (nvars, nfields) = _variables.shape
- # Check if we need to get the dates..........
- if dates_column is None:
- dates_column = [i for (i,n) in enumerate(list(varnames))
- if n.lower() in ['_dates','dates']]
- elif isinstance(dates_column,(int,float)):
- if dates_column > nfields:
- raise ValueError,\
- "Invalid column number: %i > %i" % (dates_column, nfields)
- dates_column = [dates_column,]
- if len(dates_column) > 0:
- cols = range(nfields)
- [cols.remove(i) for i in dates_column]
- newdates = date_array(_variables[:,dates_column[-1]])
- _variables = _variables[:,cols]
- varnames = [varnames[i] for i in cols]
- if vartypes is not None:
- vartypes = [vartypes[i] for i in cols]
- nfields -= len(dates_column)
- else:
- newdates = None
- # Try to guess the dtype ....................
- if vartypes is None:
- vartypes = _guessvartypes(_variables[0])
- else:
- vartypes = [numeric.dtype(v) for v in vartypes]
- if len(vartypes) != nfields:
- msg = "Attempting to %i dtypes for %i fields!"
- msg += " Reverting to default."
- warnings.warn(msg % (len(vartypes), nfields))
- vartypes = _guessvartypes(_variables[0])
- # Construct the descriptor ..................
- mdescr = [(n,f) for (n,f) in zip(varnames, vartypes)]
- # Get the data and the mask .................
- # We just need a list of masked_arrays. It's easier to create it like that:
- _mask = (_variables.T == missingchar)
- _datalist = [masked_array(a,mask=m,dtype=t)
- for (a,m,t) in zip(_variables.T, _mask, vartypes)]
- #
- newdates = __getdates(dates=dates, newdates=newdates, length=nvars,
- freq=None, start_date=None)
- return MultiTimeSeries(_datalist, dates=newdates, dtype=mdescr)
-
-
-
-################################################################################
-if __name__ == '__main__':
- import numpy as N
- from maskedarray.testutils import assert_equal
- if 1:
- d = N.arange(5)
- m = MA.make_mask([1,0,0,1,1])
- base_d = N.r_[d,d[::-1]].reshape(2,-1).T
- base_m = N.r_[[m, m[::-1]]].T
- base = MA.array(base_d, mask=base_m)
- mrec = MR.fromarrays(base.T,)
- dlist = ['2007-%02i' % (i+1) for i in d]
- dates = date_array(dlist)
- ts = time_series(mrec,dates)
- mts = MultiTimeSeries(mrec,dates)
- self_data = [d, m, mrec, dlist, dates, ts, mts]
-
- assert(isinstance(mts.f0, TimeSeries))
- #
- if 1:
- recfirst = mts._data[0]
- print recfirst, type(recfirst)
- print mrec[0], type(mrec[0])
-
Copied: trunk/scipy/sandbox/timeseries/trecords.py (from rev 3319, trunk/scipy/sandbox/timeseries/tmulti.py)
===================================================================
--- trunk/scipy/sandbox/timeseries/tmulti.py 2007-09-18 15:04:50 UTC (rev 3319)
+++ trunk/scipy/sandbox/timeseries/trecords.py 2007-09-19 00:51:14 UTC (rev 3327)
@@ -0,0 +1,524 @@
+# pylint: disable-msg=W0201, W0212
+"""
+Support for multi-variable time series, through masked recarrays.
+
+:author: Pierre GF Gerard-Marchant & Matt Knox
+:contact: pierregm_at_uga_dot_edu - mattknox_ca_at_hotmail_dot_com
+:version: $Id$
+"""
+__author__ = "Pierre GF Gerard-Marchant & Matt Knox ($Author$)"
+__version__ = '1.0'
+__revision__ = "$Revision$"
+__date__ = '$Date$'
+
+
+import sys
+
+import numpy
+from numpy import bool_, complex_, float_, int_, str_, object_
+import numpy.core.fromnumeric as fromnumeric
+import numpy.core.numeric as numeric
+from numpy.core.numeric import ndarray
+import numpy.core.numerictypes as ntypes
+import numpy.core.umath as umath
+from numpy.core.defchararray import chararray
+from numpy.core.records import find_duplicate
+from numpy.core.records import format_parser, recarray, record
+from numpy.core.records import fromarrays as recfromarrays
+
+import maskedarray as MA
+#MaskedArray = MA.MaskedArray
+from maskedarray.core import MaskedArray, MAError, default_fill_value, \
+ masked_print_option
+from maskedarray.core import masked, nomask, getmask, getmaskarray, make_mask,\
+ make_mask_none, mask_or, masked_array, filled
+
+import maskedarray.mrecords as MR
+from maskedarray.mrecords import _checknames, _guessvartypes, openfile,\
+ MaskedRecords
+from maskedarray.mrecords import fromrecords as mrecfromrecords
+
+from tseries import TimeSeries, time_series, _getdatalength
+from dates import Date, DateArray, date_array
+
+#ndarray = numeric.ndarray
+_byteorderconv = numpy.core.records._byteorderconv
+_typestr = ntypes._typestr
+
+reserved_fields = MR.reserved_fields + ['_dates']
+
+import warnings
+
+__all__ = [
+'TimeSeriesRecords','fromarrays','fromrecords','fromtextfile',
+]
+
+def _getformats(data):
+ """Returns the formats of each array of arraylist as a comma-separated
+ string."""
+ if isinstance(data, record):
+ return ",".join([desc[1] for desc in data.dtype.descr])
+
+ formats = ''
+ for obj in data:
+ obj = numeric.asarray(obj)
+# if not isinstance(obj, ndarray):
+## if not isinstance(obj, ndarray):
+# raise ValueError, "item in the array list must be an ndarray."
+ formats += _typestr[obj.dtype.type]
+ if issubclass(obj.dtype.type, ntypes.flexible):
+ formats += `obj.itemsize`
+ formats += ','
+ return formats[:-1]
+
+
+
+
+class TimeSeriesRecords(TimeSeries, MaskedRecords, object):
+ """
+
+:IVariables:
+ - `__localfdict` : Dictionary
+ Dictionary of local fields (`f0_data`, `f0_mask`...)
+ - `__globalfdict` : Dictionary
+ Dictionary of global fields, as the combination of a `_data` and a `_mask`.
+ (`f0`)
+ """
+ _defaultfieldmask = nomask
+ _defaulthardmask = False
+ def __new__(cls, data, dates=None, mask=nomask, dtype=None,
+ freq=None, observed=None, start_date=None,
+ hard_mask=False, fill_value=None,
+# offset=0, strides=None,
+ formats=None, names=None, titles=None,
+ byteorder=None, aligned=False):
+ tsoptions = dict(fill_value=fill_value, hard_mask=hard_mask,)
+ mroptions = dict(fill_value=fill_value, hard_mask=hard_mask,
+ formats=formats, names=names, titles=titles,
+ byteorder=byteorder, aligned=aligned)
+ #
+ if isinstance(data, TimeSeriesRecords):
+# if copy:
+# data = data.copy()
+ data._hardmask = data._hardmask | hard_mask
+ return data
+ # .......................................
+ _data = MaskedRecords(data, mask=mask, dtype=dtype, **mroptions).view(cls)
+ if dates is None:
+ length = _getdatalength(data)
+ newdates = date_array(start_date=start_date, length=length,
+ freq=freq)
+ elif not hasattr(dates, 'freq'):
+ newdates = date_array(dlist=dates, freq=freq)
+ else:
+ newdates = dates
+ _data._dates = newdates
+ _data._observed = observed
+ cls._defaultfieldmask = _data._fieldmask
+ #
+ return _data
+
+ def __array_finalize__(self,obj):
+ if isinstance(obj, (MaskedRecords)):
+ self.__dict__.update(_fieldmask=obj._fieldmask,
+ _hardmask=obj._hardmask,
+ _fill_value=obj._fill_value,
+ _names = obj.dtype.names
+ )
+ if isinstance(obj, TimeSeriesRecords):
+ self.__dict__.update(observed=obj.observed,
+ _dates=obj._dates)
+ else:
+ self.__dict__.update(observed=None,
+ _dates=[])
+ else:
+ self.__dict__.update(_dates = [],
+ observed=None,
+ _fieldmask = nomask,
+ _hardmask = False,
+ fill_value = None,
+ _names = self.dtype.names
+ )
+ return
+
+
+ def _getdata(self):
+ "Returns the data as a recarray."
+ return self.view(recarray)
+ _data = property(fget=_getdata)
+
+ def _getseries(self):
+ "Returns the data as a MaskedRecord array."
+ return self.view(MaskedRecords)
+ _series = property(fget=_getseries)
+
+ #......................................................
+ def __getattribute__(self, attr):
+ getattribute = MaskedRecords.__getattribute__
+ _dict = getattribute(self,'__dict__')
+ if attr in _dict.get('_names',[]):
+ obj = getattribute(self,attr).view(TimeSeries)
+ obj._dates = _dict['_dates']
+ return obj
+ return getattribute(self,attr)
+
+
+ def __setattr__(self, attr, val):
+ newattr = attr not in self.__dict__
+ try:
+ # Is attr a generic attribute ?
+ ret = object.__setattr__(self, attr, val)
+ except:
+ # Not a generic attribute: exit if it's not a valid field
+ fielddict = self.dtype.names or {}
+ if attr not in fielddict:
+ exctype, value = sys.exc_info()[:2]
+ raise exctype, value
+ else:
+ if attr not in list(self.dtype.names) + ['_dates','_mask']:
+ return ret
+ if newattr: # We just added this one
+ try: # or this setattr worked on an internal
+ # attribute.
+ object.__delattr__(self, attr)
+ except:
+ return ret
+ # Case #1.: Basic field ............
+ base_fmask = self._fieldmask
+ _names = self.dtype.names
+ if attr in _names:
+ fval = filled(val)
+ mval = getmaskarray(val)
+ if self._hardmask:
+ mval = mask_or(mval, base_fmask.__getattr__(attr))
+ self._data.__setattr__(attr, fval)
+ base_fmask.__setattr__(attr, mval)
+ return
+ elif attr == '_mask':
+ if self._hardmask:
+ val = make_mask(val)
+ if val is not nomask:
+# mval = getmaskarray(val)
+ for k in _names:
+ m = mask_or(val, base_fmask.__getattr__(k))
+ base_fmask.__setattr__(k, m)
+ else:
+ mval = getmaskarray(val)
+ for k in _names:
+ base_fmask.__setattr__(k, mval)
+ return
+ #............................................
+ def __getitem__(self, indx):
+ """Returns all the fields sharing the same fieldname base.
+ The fieldname base is either `_data` or `_mask`."""
+ _localdict = self.__dict__
+ # We want a field ........
+ if indx in self.dtype.names:
+ obj = self._data[indx].view(TimeSeries)
+ obj._dates = _localdict['_dates']
+ obj._mask = make_mask(_localdict['_fieldmask'][indx])
+ return obj
+ # We want some elements ..
+ (sindx, dindx) = self._TimeSeries__checkindex(indx)
+# obj = numeric.array(self._data[sindx],
+# copy=False, subok=True).view(type(self))
+ obj = numeric.array(self._data[sindx], copy=False, subok=True)
+ obj = obj.view(type(self))
+ obj.__dict__.update(_dates=_localdict['_dates'][dindx],
+ _fieldmask=_localdict['_fieldmask'][sindx],
+ _fill_value=_localdict['_fill_value'])
+ return obj
+
+ def __getslice__(self, i, j):
+ """Returns the slice described by [i,j]."""
+ _localdict = self.__dict__
+ (si, di) = super(TimeSeriesRecords, self)._TimeSeries__checkindex(i)
+ (sj, dj) = super(TimeSeriesRecords, self)._TimeSeries__checkindex(j)
+ newdata = self._data[si:sj].view(type(self))
+ newdata.__dict__.update(_dates=_localdict['_dates'][di:dj],
+ _mask=_localdict['_fieldmask'][si:sj])
+ return newdata
+
+ def __setslice__(self, i, j, value):
+ """Sets the slice described by [i,j] to `value`."""
+ self.view(MaskedRecords).__setslice__(i,j,value)
+ return
+
+ #......................................................
+ def __str__(self):
+ """x.__str__() <==> str(x)
+Calculates the string representation, using masked for fill if it is enabled.
+Otherwise, fills with fill value.
+ """
+ if self.size > 1:
+ mstr = ["(%s)" % ",".join([str(i) for i in s])
+ for s in zip(*[getattr(self,f) for f in self.dtype.names])]
+ return "[%s]" % ", ".join(mstr)
+ else:
+ mstr = numeric.asarray(self._data.item(), dtype=object_)
+ mstr[list(self._fieldmask)] = masked_print_option
+ return str(mstr)
+
+ def __repr__(self):
+ """x.__repr__() <==> repr(x)
+Calculates the repr representation, using masked for fill if it is enabled.
+Otherwise fill with fill value.
+ """
+ _names = self.dtype.names
+ _dates = self._dates
+ if numeric.size(_dates) > 2 and self._dates.isvalid():
+ timestr = "[%s ... %s]" % (str(_dates[0]),str(_dates[-1]))
+ else:
+ timestr = str(_dates)
+ fmt = "%%%is : %%s" % (max([len(n) for n in _names])+4,)
+ reprstr = [fmt % (f,getattr(self,f)) for f in self.dtype.names]
+ reprstr.insert(0,'TimeSeriesRecords(')
+ reprstr.extend([fmt % ('dates', timestr),
+ fmt % (' fill_value', self._fill_value),
+ ' )'])
+ return str("\n".join(reprstr))
+ #.............................................
+ def copy(self):
+ "Returns a copy of the argument."
+ _localdict = self.__dict__
+ return TimeSeriesRecords(_localdict['_data'].copy(),
+ dates=_localdict['_dates'].copy(),
+ mask=_localdict['_fieldmask'].copy(),
+ dtype=self.dtype)
+
+
+#####---------------------------------------------------------------------------
+#---- --- Constructors ---
+#####---------------------------------------------------------------------------
+
+def fromarrays(arraylist, dates=None,
+ dtype=None, shape=None, formats=None,
+ names=None, titles=None, aligned=False, byteorder=None):
+ """Creates a mrecarray from a (flat) list of masked arrays.
+
+:Parameters:
+ - `arraylist` : Sequence
+ A list of (masked) arrays. Each element of the sequence is first converted
+ to a masked array if needed. If a 2D array is passed as argument, it is
+ processed line by line
+ - `dtype` : numeric.dtype
+ Data type descriptor.
+ - `shape` : Integer *[None]*
+ Number of records. If None, `shape` is defined from the shape of the first
+ array in the list.
+ - `formats` :
+ (Description to write)
+ - `names` :
+ (description to write)
+ - `titles`:
+ (Description to write)
+ - `aligned`: Boolen *[False]*
+ (Description to write, not used anyway)
+ - `byteorder`: Boolen *[None]*
+ (Description to write, not used anyway)
+
+
+ """
+ arraylist = [MA.asarray(x) for x in arraylist]
+ # Define/check the shape.....................
+ if shape is None or shape == 0:
+ shape = arraylist[0].shape
+ if isinstance(shape, int):
+ shape = (shape,)
+ # Define formats from scratch ...............
+ if formats is None and dtype is None:
+ formats = _getformats(arraylist)
+ # Define the dtype ..........................
+ if dtype is not None:
+ descr = numeric.dtype(dtype)
+ _names = descr.names
+ else:
+ parsed = format_parser(formats, names, titles, aligned, byteorder)
+ _names = parsed._names
+ descr = parsed._descr
+ # Determine shape from data-type.............
+ if len(descr) != len(arraylist):
+ msg = "Mismatch between the number of fields (%i) and the number of "\
+ "arrays (%i)"
+ raise ValueError, msg % (len(descr), len(arraylist))
+ d0 = descr[0].shape
+ nn = len(d0)
+ if nn > 0:
+ shape = shape[:-nn]
+ # Make sure the shape is the correct one ....
+ for k, obj in enumerate(arraylist):
+ nn = len(descr[k].shape)
+ testshape = obj.shape[:len(obj.shape)-nn]
+ if testshape != shape:
+ raise ValueError, "Array-shape mismatch in array %d" % k
+ # Reconstruct the descriptor, by creating a _data and _mask version
+ return TimeSeriesRecords(arraylist, dtype=descr)
+
+def __getdates(dates=None, newdates=None, length=None, freq=None,
+ start_date=None):
+ """Determines new dates (private function not meant to be used)."""
+ if dates is None:
+ if newdates is not None:
+ if not hasattr(newdates, 'freq'):
+ newdates = date_array(dlist=newdates, freq=freq)
+ else:
+ newdates = date_array(start_date=start_date, length=length,
+ freq=freq)
+ elif not hasattr(dates, 'freq'):
+ newdates = date_array(dlist=dates, freq=freq)
+ else:
+ newdates = dates
+ return newdates
+
+#..............................................................................
+def fromrecords(reclist, dates=None, freq=None, start_date=None,
+ dtype=None, shape=None, formats=None, names=None,
+ titles=None, aligned=False, byteorder=None):
+ """Creates a MaskedRecords from a list of records.
+
+ The data in the same field can be heterogeneous, they will be promoted
+ to the highest data type. This method is intended for creating
+ smaller record arrays. If used to create large array without formats
+ defined, it can be slow.
+
+ If formats is None, then this will auto-detect formats. Use a list of
+ tuples rather than a list of lists for faster processing.
+ """
+ # reclist is in fact a mrecarray .................
+ if isinstance(reclist, TimeSeriesRecords):
+ mdescr = reclist.dtype
+ shape = reclist.shape
+ return TimeSeriesRecords(reclist, dtype=mdescr)
+ # No format, no dtype: create from to arrays .....
+ _data = mrecfromrecords(reclist, dtype=dtype, shape=shape, formats=formats,
+ names=names, titles=titles, aligned=aligned,
+ byteorder=byteorder)
+ _dtype = _data.dtype
+ # Check the names for a '_dates' .................
+ newdates = None
+ _names = list(_dtype.names)
+ reserved = [n for n in _names if n.lower() in ['dates', '_dates']]
+ if len(reserved) > 0:
+ newdates = _data[reserved[-1]]
+ [_names.remove(n) for n in reserved]
+ _dtype = numeric.dtype([t for t in _dtype.descr \
+ if t[0] not in reserved ])
+ _data = [_data[n] for n in _names]
+ #
+ newdates = __getdates(dates=dates, newdates=newdates, length=len(_data),
+ freq=freq, start_date=start_date)
+ #
+ return TimeSeriesRecords(_data, dates=newdates, dtype=_dtype,
+ names=_names)
+
+
+def fromtextfile(fname, delimitor=None, commentchar='#', missingchar='',
+ dates_column=None, varnames=None, vartypes=None,
+ dates=None):
+ """Creates a TimeSeriesRecords from data stored in the file `filename`.
+
+:Parameters:
+ - `filename` : file name/handle
+ Handle of an opened file.
+ - `delimitor` : Character *None*
+ Alphanumeric character used to separate columns in the file.
+ If None, any (group of) white spacestring(s) will be used.
+ - `commentchar` : String *['#']*
+ Alphanumeric character used to mark the start of a comment.
+ - `missingchar` : String *['']*
+ String indicating missing data, and used to create the masks.
+ - `datescol` : Integer *[None]*
+ Position of the columns storing dates. If None, a position will be
+ estimated from the variable names.
+ - `varnames` : Sequence *[None]*
+ Sequence of the variable names. If None, a list will be created from
+ the first non empty line of the file.
+ - `vartypes` : Sequence *[None]*
+ Sequence of the variables dtypes. If None, the sequence will be estimated
+ from the first non-commented line.
+
+
+ Ultra simple: the varnames are in the header, one line"""
+ # Try to open the file ......................
+ f = openfile(fname)
+ # Get the first non-empty line as the varnames
+ while True:
+ line = f.readline()
+ firstline = line[:line.find(commentchar)].strip()
+ _varnames = firstline.split(delimitor)
+ if len(_varnames) > 1:
+ break
+ if varnames is None:
+ varnames = _varnames
+ # Get the data ..............................
+ _variables = MA.asarray([line.strip().split(delimitor) for line in f
+ if line[0] != commentchar and len(line) > 1])
+ (nvars, nfields) = _variables.shape
+ # Check if we need to get the dates..........
+ if dates_column is None:
+ dates_column = [i for (i,n) in enumerate(list(varnames))
+ if n.lower() in ['_dates','dates']]
+ elif isinstance(dates_column,(int,float)):
+ if dates_column > nfields:
+ raise ValueError,\
+ "Invalid column number: %i > %i" % (dates_column, nfields)
+ dates_column = [dates_column,]
+ if len(dates_column) > 0:
+ cols = range(nfields)
+ [cols.remove(i) for i in dates_column]
+ newdates = date_array(_variables[:,dates_column[-1]])
+ _variables = _variables[:,cols]
+ varnames = [varnames[i] for i in cols]
+ if vartypes is not None:
+ vartypes = [vartypes[i] for i in cols]
+ nfields -= len(dates_column)
+ else:
+ newdates = None
+ # Try to guess the dtype ....................
+ if vartypes is None:
+ vartypes = _guessvartypes(_variables[0])
+ else:
+ vartypes = [numeric.dtype(v) for v in vartypes]
+ if len(vartypes) != nfields:
+ msg = "Attempting to %i dtypes for %i fields!"
+ msg += " Reverting to default."
+ warnings.warn(msg % (len(vartypes), nfields))
+ vartypes = _guessvartypes(_variables[0])
+ # Construct the descriptor ..................
+ mdescr = [(n,f) for (n,f) in zip(varnames, vartypes)]
+ # Get the data and the mask .................
+ # We just need a list of masked_arrays. It's easier to create it like that:
+ _mask = (_variables.T == missingchar)
+ _datalist = [masked_array(a,mask=m,dtype=t)
+ for (a,m,t) in zip(_variables.T, _mask, vartypes)]
+ #
+ newdates = __getdates(dates=dates, newdates=newdates, length=nvars,
+ freq=None, start_date=None)
+ return TimeSeriesRecords(_datalist, dates=newdates, dtype=mdescr)
+
+
+
+################################################################################
+if __name__ == '__main__':
+ import numpy as N
+ from maskedarray.testutils import assert_equal
+ if 1:
+ d = N.arange(5)
+ m = MA.make_mask([1,0,0,1,1])
+ base_d = N.r_[d,d[::-1]].reshape(2,-1).T
+ base_m = N.r_[[m, m[::-1]]].T
+ base = MA.array(base_d, mask=base_m)
+ mrec = MR.fromarrays(base.T,)
+ dlist = ['2007-%02i' % (i+1) for i in d]
+ dates = date_array(dlist)
+ ts = time_series(mrec,dates)
+ mts = TimeSeriesRecords(mrec,dates)
+ self_data = [d, m, mrec, dlist, dates, ts, mts]
+
+ assert(isinstance(mts.f0, TimeSeries))
+ #
+ if 1:
+ recfirst = mts._data[0]
+ print recfirst, type(recfirst)
+ print mrec[0], type(mrec[0])
+
Modified: trunk/scipy/sandbox/timeseries/tseries.py
===================================================================
--- trunk/scipy/sandbox/timeseries/tseries.py 2007-09-18 23:22:21 UTC (rev 3326)
+++ trunk/scipy/sandbox/timeseries/tseries.py 2007-09-19 00:51:14 UTC (rev 3327)
@@ -33,16 +33,14 @@
import const as _c
-import tdates
-from tdates import DateError, InsufficientDateError
-from tdates import Date, isDate, DateArray, isDateArray, \
+import dates
+from dates import DateError, InsufficientDateError
+from dates import Date, isDate, DateArray, isDateArray, \
date_array, date_array_fromlist, date_array_fromrange, thisday, today, \
check_freq, check_freq_str
import cseries
-
-
__all__ = [
'TimeSeriesError','TimeSeriesCompatibilityError','TimeSeries','isTimeSeries',
'time_series', 'tsmasked',
More information about the Scipy-svn
mailing list