[Scipy-svn] r2675 - in trunk/Lib/sandbox/timeseries: . tests
scipy-svn@scip...
scipy-svn@scip...
Fri Feb 2 21:21:32 CST 2007
Author: pierregm
Date: 2007-02-02 21:21:30 -0600 (Fri, 02 Feb 2007)
New Revision: 2675
Modified:
trunk/Lib/sandbox/timeseries/tests/test_timeseries.py
trunk/Lib/sandbox/timeseries/tseries.py
Log:
tseries: add concatenate_series and compressed
Modified: trunk/Lib/sandbox/timeseries/tests/test_timeseries.py
===================================================================
--- trunk/Lib/sandbox/timeseries/tests/test_timeseries.py 2007-02-03 03:15:48 UTC (rev 2674)
+++ trunk/Lib/sandbox/timeseries/tests/test_timeseries.py 2007-02-03 03:21:30 UTC (rev 2675)
@@ -29,7 +29,7 @@
#reload(tseries)
from timeseries.tseries import Date, date_array_fromlist, date_array, thisday
from timeseries.tseries import time_series, TimeSeries, adjust_endpoints, \
- mask_period, align_series, fill_missing_dates, tsmasked
+ mask_period, align_series, fill_missing_dates, tsmasked, concatenate_series
class test_creation(NumpyTestCase):
"Base test class for MaskedArrays."
@@ -453,6 +453,54 @@
except:
exception = True
assert(exception)
+
+ def test_compressed(self):
+ "Tests compress"
+ dlist = ['2007-01-%02i' % i for i in range(1,16)]
+ dates = date_array_fromlist(dlist)
+ data = masked_array(numeric.arange(15), mask=[1,0,0,0,0]*3, dtype=float_)
+ series = time_series(data, dlist)
+ #
+ keeper = N.array([0,1,1,1,1]*3, dtype=bool_)
+ c_series = series.compressed()
+ assert_equal(c_series._data, [1,2,3,4,6,7,8,9,11,12,13,14])
+ assert_equal(c_series._mask, nomask)
+ assert_equal(c_series._dates, dates[keeper])
+ #
+ series_st = time_series(MA.column_stack((data,data[::-1])),
+ dates=dates)
+ c_series = series_st.compressed()
+ d = [1,2,3,6,7,8,11,12,13]
+ assert_equal(c_series._data, N.c_[(d,list(reversed(d)))])
+ assert_equal(c_series._mask, nomask)
+ assert_equal(c_series._dates, dates[d])
+
+ def test_concatenate(self):
+ "Tests concatenate"
+ dlist = ['2007-%02i' % i for i in range(1,6)]
+ dates = date_array_fromlist(dlist)
+ data = masked_array(numeric.arange(5), mask=[1,0,0,0,0], dtype=float_)
+ #
+ ser_1 = time_series(data, dates)
+ ser_2 = time_series(data, dates=dates+10)
+ newseries = concatenate_series([ser_1, ser_2])
+ assert_equal(newseries._data,[0,1,2,3,4,0,0,0,0,0,0,1,2,3,4])
+ assert_equal(newseries._mask,[1,0,0,0,0]+[1]*5+[1,0,0,0,0])
+ #
+ ser_1 = time_series(data, dates)
+ ser_2 = time_series(data, dates=dates+10)
+ newseries = concatenate_series([ser_1, ser_2], keep_gap=False)
+ assert_equal(newseries._data,[0,1,2,3,4,0,1,2,3,4])
+ assert_equal(newseries._mask,[1,0,0,0,0]+[1,0,0,0,0])
+ assert newseries.has_missing_dates()
+ #
+ ser_2 = time_series(data, dates=dates+3)
+ newseries = concatenate_series([ser_1, ser_2])
+ assert_equal(newseries._data,[0,1,2,0,1,2,3,4])
+ assert_equal(newseries._mask,[1,0,0,1,0,0,0,0])
+ #
+
+
###############################################################################
Modified: trunk/Lib/sandbox/timeseries/tseries.py
===================================================================
--- trunk/Lib/sandbox/timeseries/tseries.py 2007-02-03 03:15:48 UTC (rev 2674)
+++ trunk/Lib/sandbox/timeseries/tseries.py 2007-02-03 03:21:30 UTC (rev 2675)
@@ -64,7 +64,7 @@
'day_of_week','day_of_year','day','month','quarter','year','hour','minute','second',
'tofile','asrecords','flatten','adjust_endpoints','align_series','aligned',
'mask_period','mask_inside_period','mask_outside_period',
-'convert','fill_missing_dates', 'stack', 'tsmasked'
+'convert','fill_missing_dates', 'stack'
]
#...............................................................................
@@ -148,7 +148,7 @@
if len(set(shapes)) > 1:
errItems = tuple(set(shapes))
raise TimeSeriesCompatibilityError('size', "1: %s" % str(errItems[0].shape),
- "2: %s" % str(errItems[1].shape))
+ "2: %s" % str(errItems[1].shape))
return True
@@ -175,6 +175,18 @@
return numeric.asarray(numeric.shape(data))[:-1].prod()
else:
return numeric.size(data)
+
+def _compare_frequencies(*series):
+ """Compares the frequencies of a sequence of series.
+ Returns the common frequency, or raises an exception if series have different
+ frequencies."""
+ unique_freqs = numpy.unique([x.freqstr for x in series])
+ try:
+ common_freq = unique_freqs.item()
+ except ValueError:
+ raise TimeSeriesError, \
+ "All series must have same frequency!"
+ return common_freq
##### --------------------------------------------------------------------------
##--- ... Time Series ...
@@ -374,7 +386,7 @@
if self is masked:
raise MAError, 'Cannot alter the masked element.'
(sindx, dindx) = self.__checkindex(indx)
- #....
+ #....
if value is tsmasked:
self._series[sindx] = masked
elif isinstance(value, TimeSeries):
@@ -1053,7 +1065,27 @@
"""Masks values falling outside a given range of dates."""
return mask_period(data, start_date=start_date, end_date=end_date,
inside=False, include_edges=include_edges, inplace=inplace)
-#..........................................................
+
+#...............................................................................
+def compressed(series):
+ """Suppresses missing values from a time series."""
+ if series._mask is nomask:
+ return series
+ if series.ndim == 1:
+ keeper = ~(series._mask)
+ elif series.ndim == 2:
+ # Both dates and data are 2D: ravel first
+ if series._dates.ndim == 2:
+ series = series.ravel()
+ keeper = ~(series._mask)
+ # a 2D series: suppress the rows (dates are in columns)
+ else:
+ keeper = ~(series._mask.any(-1))
+ else:
+ raise NotImplementedError
+ return series[keeper]
+TimeSeries.compressed = compressed
+#...............................................................................
def adjust_endpoints(a, start_date=None, end_date=None):
"""Returns a TimeSeries going from `start_date` to `end_date`.
If `start_date` and `end_date` both fall into the initial range of dates,
@@ -1139,14 +1171,7 @@
if len(series) < 2:
return series
unique_freqs = numpy.unique([x.freqstr for x in series])
- try:
- common_freq = unique_freqs.item()
- except ValueError:
- raise TimeSeriesError, \
- "All series must have same frequency!"
- if common_freq == 'U':
- raise TimeSeriesError, \
- "Cannot adjust a series with 'Undefined' frequency."
+ common_freq = _compare_frequencies(*series)
valid_states = [x.isvalid() for x in series]
if not numpy.all(valid_states):
raise TimeSeriesError, \
@@ -1272,11 +1297,7 @@
newseries = TimeSeries(newdata, series._dates, **options)
return newseries
TimeSeries.tshift = tshift
-#....................................................................
-
-
-
-#....................................................................
+#...............................................................................
def fill_missing_dates(data, dates=None, freq=None,fill_value=None):
"""Finds and fills the missing dates in a time series.
The data corresponding to the initially missing dates are masked, or filled to
@@ -1317,7 +1338,6 @@
n = len(dflat)
if not dflat.has_missing_dates():
return time_series(data, dflat)
-
# ...and now, fill it ! ......
(tstart, tend) = dflat[[0,-1]]
newdates = date_array(start_date=tstart, end_date=tend, include_last=True)
@@ -1367,8 +1387,7 @@
else:
nshp = tuple([-1,] + list(data.shape[1:]))
return time_series(newdata.reshape(nshp), newdates)
-
-#....................................................................
+#...............................................................................
def stack(*series):
"""performs a column_stack on the data from each series, and the
resulting series has the same dates as each individual series. All series
@@ -1380,65 +1399,48 @@
_timeseriescompat_multiple(*series)
return time_series(MA.column_stack(series), series[0]._dates,
**_attrib_dict(series[0]))
-
+#...............................................................................
+def concatenate_series(series, keep_gap=True):
+ """Concatenates a sequence of series, by chronological order.
+ Overlapping data are processed in a FIFO basis: the data from the first series
+ of the sequence will be overwritten by the data of the second series, and so forth.
+ If keep_gap is true, any gap between consecutive, non overlapping series are
+ kept: the corresponding data are masked.
+ """
+ common_f = _compare_frequencies(*series)
+ start_date = min([s.start_date for s in series if s.start_date is not None])
+ end_date = max([s.end_date for s in series if s.end_date is not None])
+ newdtype = max([s.dtype for s in series])
+ whichone = numeric.zeros((end_date-start_date+1), dtype=int_)
+ newseries = time_series(numeric.empty((end_date-start_date+1), dtype=newdtype),
+ dates=date_array(start_date, end_date, freq=common_f),
+ mask=True)
+ newdata = newseries._data
+ newmask = newseries._mask
+ for (k,s) in enumerate(series):
+ start = s.start_date - start_date
+ end = start + len(s)
+ whichone[start:end] = k+1
+ newdata[start:end] = s._data
+ if s._mask is nomask:
+ newmask[start:end] = False
+ else:
+ newmask[start:end] = s._mask
+ keeper = whichone.astype(bool_)
+ if not keep_gap:
+ newseries = newseries[keeper]
+ else:
+ newdata[~keeper] = 0
+ return newseries
+
################################################################################
if __name__ == '__main__':
from maskedarray.testutils import assert_equal
import numpy as N
-# if 0:
-# dlist = ['2007-01-%02i' % i for i in range(1,16)]
-# dates = date_array(dlist)
-# data = masked_array(numeric.arange(15, dtype=float_), mask=[1,0,0,0,0]*3)
-## btseries = BaseTimeSeries(data._data, dates)
-# tseries = time_series(data, dlist)
-# dseries = numpy.log(tseries)
-# if 0:
-# mlist = ['2005-%02i' % i for i in range(1,13)]
-# mlist += ['2006-%02i' % i for i in range(1,13)]
-# mdata = numpy.arange(24)
-# mser1 = time_series(mdata, mlist, observed='SUMMED')
-# #
-# mlist2 = ['2004-%02i' % i for i in range(1,13)]
-# mlist2 += ['2005-%02i' % i for i in range(1,13)]
-# mser2 = time_series(mdata, mlist2, observed='SUMMED')
-# #
-# today = thisday('m')
-# (malg1,malg2) = aligned(mser1, mser2)
-#
-# C = convert(mser2,'A')
-# D = convert(mser2,'A',func=None)
-#
-# if 0:
-# dlist = ['2007-01-%02i' % i for i in range(1,16)]
-# dates = date_array(dlist)
-# print "."*50+"\ndata"
-# data = masked_array(numeric.arange(15)-6, mask=[1,0,0,0,0]*3)
-# print "."*50+"\nseries"
-# tseries = time_series(data, dlist)
-#
-# if 0:
-# dlist_1 = ['2007-01-%02i' % i for i in range(1,8)]
-# dlist_2 = ['2007-01-%02i' % i for i in numpy.arange(1,28)[::4]]
-# data = masked_array(numeric.arange(7), mask=[1,0,0,0,0,0,0])
-# tseries_1 = time_series(data, dlist_1)
-# tseries_2 = time_series(data, dlist_2)
-# tseries_3 = time_series(data[::-1], dlist_2)
-#
-# try:
-# tseries = tseries_1 + tseries_2
-# except TimeSeriesCompatibilityError:
-# print "I knew it!"
-# tseries = tseries_2 + tseries_3
-# assert_equal(tseries._dates, tseries_3._dates)
-# assert_equal(tseries._mask, [1,0,0,0,0,0,1])
-#
-# if 0:
-# mser3 = time_series(MA.mr_[malg1._series, 100+malg2._series].reshape(2,-1).T,
-# dates=malg1.dates)
-# data = mser3._series._data
+
if 1:
dlist = ['2007-01-%02i' % i for i in range(1,16)]
dates = date_array_fromlist(dlist)
More information about the Scipy-svn
mailing list