[Scipy-svn] r3698 - in trunk/scipy/sandbox/timeseries: include lib lib/tests src

scipy-svn@scip... scipy-svn@scip...
Sun Dec 23 13:17:56 CST 2007


Author: mattknox_ca
Date: 2007-12-23 13:17:49 -0600 (Sun, 23 Dec 2007)
New Revision: 3698

Modified:
   trunk/scipy/sandbox/timeseries/include/c_tseries.h
   trunk/scipy/sandbox/timeseries/lib/moving_funcs.py
   trunk/scipy/sandbox/timeseries/lib/tests/test_moving_funcs.py
   trunk/scipy/sandbox/timeseries/src/c_tseries.c
   trunk/scipy/sandbox/timeseries/src/cseries.c
Log:
fixed problem with mov_covar and mov_corr functions

Modified: trunk/scipy/sandbox/timeseries/include/c_tseries.h
===================================================================
--- trunk/scipy/sandbox/timeseries/include/c_tseries.h	2007-12-23 17:42:51 UTC (rev 3697)
+++ trunk/scipy/sandbox/timeseries/include/c_tseries.h	2007-12-23 19:17:49 UTC (rev 3698)
@@ -9,8 +9,6 @@
 PyObject *MaskedArray_mov_median(PyObject *, PyObject *, PyObject *);
 PyObject *MaskedArray_mov_min(PyObject *, PyObject *, PyObject *);
 PyObject *MaskedArray_mov_max(PyObject *, PyObject *, PyObject *);
-PyObject *MaskedArray_mov_average(PyObject *, PyObject *, PyObject *);
-PyObject *MaskedArray_mov_stddev(PyObject *, PyObject *, PyObject *);
 
 void import_c_tseries(PyObject *);
 

Modified: trunk/scipy/sandbox/timeseries/lib/moving_funcs.py
===================================================================
--- trunk/scipy/sandbox/timeseries/lib/moving_funcs.py	2007-12-23 17:42:51 UTC (rev 3697)
+++ trunk/scipy/sandbox/timeseries/lib/moving_funcs.py	2007-12-23 19:17:49 UTC (rev 3698)
@@ -13,13 +13,12 @@
 
 __all__ = ['mov_sum', 'mov_median', 'mov_min', 'mov_max',
            'mov_average', 'mov_mean', 'mov_average_expw',
-           'mov_stddev', 'mov_var', 
-#           'mov_covar', 'mov_corr',
+           'mov_stddev', 'mov_var', 'mov_covar', 'mov_corr',
            'cmov_average', 'cmov_mean', 'cmov_window'
            ]
 
 import numpy as N
-from numpy import bool_, float_
+from numpy import bool_, float_, sqrt
 narray = N.array
 
 from scipy.signal import convolve, get_window
@@ -28,8 +27,8 @@
 from maskedarray import MaskedArray, nomask, getmask, getmaskarray, masked
 marray = MA.array
 
-from timeseries.cseries import MA_mov_stddev, MA_mov_sum, MA_mov_average, \
-                               MA_mov_median, MA_mov_min, MA_mov_max
+from timeseries.cseries import MA_mov_sum, MA_mov_median, MA_mov_min, \
+                               MA_mov_max
 
 def _process_result_dict(orig_data, result_dict):
     "process the results from the c function"
@@ -49,7 +48,6 @@
 
     if data.ndim == 1:
         kwargs['array'] = data
-
         result_dict = cfunc(**kwargs)
         return _process_result_dict(data, result_dict)
 
@@ -74,6 +72,14 @@
         raise ValueError, "Data should be at most 2D"
 
 #...............................................................................
+def _mov_sum(data, span, dtype=None, type_num_double=False):
+    """ helper function for calculating moving sum. Resulting dtype can be
+determined in one of two ways. See C-code for more details."""
+    kwargs = {'span':span, 'type_num_double':type_num_double}
+    if dtype is not None:
+        kwargs['dtype'] = dtype
+    return _moving_func(data, MA_mov_sum, kwargs)
+#...............................................................................
 def mov_sum(data, span, dtype=None):
     """Calculates the moving sum of a series.
 
@@ -82,11 +88,7 @@
     $$span$$
     $$dtype$$"""
 
-    kwargs = {'span':span}
-    if dtype is not None:
-        kwargs['dtype'] = dtype
- 
-    return _moving_func(data, MA_mov_sum, kwargs)
+    return _mov_sum(data, span, dtype=dtype)
 #...............................................................................
 def mov_median(data, span, dtype=None):
     """Calculates the moving median of a series.
@@ -137,25 +139,9 @@
     $$data$$
     $$span$$
     $$dtype$$"""
-    
-    kwargs = {'span':span}
-    if dtype is not None:
-        kwargs['dtype'] = dtype
-
-    return _moving_func(data, MA_mov_average, kwargs)
+    return _mov_sum(data, span, dtype=dtype, type_num_double=True)/span
 mov_mean = mov_average
 #...............................................................................
-def _mov_var_stddev(data, span, is_variance, bias, dtype):
-    "helper function for mov_var and mov_stddev functions"
-
-    kwargs = {'span':span,
-              'is_variance':is_variance,
-              'bias':bias}
-    if dtype is not None:
-        kwargs['dtype'] = dtype
-
-    return _moving_func(data, MA_mov_stddev, kwargs)
-#...............................................................................
 def mov_var(data, span, bias=False, dtype=None):
     """Calculates the moving variance of a 1-D array.
 
@@ -164,9 +150,7 @@
     $$span$$
     $$bias$$
     $$dtype$$"""
-    
-    return _mov_var_stddev(data=data, span=span,
-                           is_variance=1, bias=int(bias), dtype=dtype)
+    return mov_covar(data, data, span, bias=bias, dtype=dtype)
 #...............................................................................
 def mov_stddev(data, span, bias=False, dtype=None):
     """Calculates the moving standard deviation of a 1-D array.
@@ -176,42 +160,49 @@
     $$span$$
     $$bias$$
     $$dtype$$"""
+    return sqrt(mov_var(data, span, bias=bias, dtype=dtype))
+#...............................................................................
+def mov_covar(x, y, span, bias=False, dtype=None):
+    """Calculates the moving covariance of two 1-D arrays.
+
+*Parameters*:
+    $$x$$
+    $$y$$
+    $$span$$
+    $$bias$$
+    $$dtype$$"""
     
-    return _mov_var_stddev(data=data, span=span,
-                           is_variance=0, bias=int(bias), dtype=dtype)
+    if bias: denom = span
+    else: denom = span - 1
+    
+    sum_prod = _mov_sum(x*y, span, dtype=dtype, type_num_double=True)
+    sum_x = _mov_sum(x, span, dtype=dtype, type_num_double=True)
+    sum_y = _mov_sum(y, span, dtype=dtype, type_num_double=True)
+    
+    return sum_prod/denom - (sum_x * sum_y) / (span*denom)
 #...............................................................................
-#def mov_covar(x, y, span, bias=False, dtype=None):
-#    """Calculates the moving covariance of two 1-D arrays.
-#
-#*Parameters*:
-#    $$x$$
-#    $$y$$
-#    $$span$$
-#    $$bias$$
-#    $$dtype$$"""
-#    
-#    result = x - mov_average(x, span, dtype=dtype)
-#    result = result * (y - mov_average(y, span, dtype=dtype))
-#    
-#    if bias: denom = span
-#    else: denom = span - 1
-#    
-#    return result/denom
-##...............................................................................
-#def mov_corr(x, y, span, dtype=None):
-#    """Calculates the moving correlation of two 1-D arrays.
-#
-#*Parameters*:
-#    $$x$$
-#    $$y$$
-#    $$span$$
-#    $$dtype$$"""
-#
-#    result = mov_covar(x, y, span, bias=True, dtype=dtype)
-#    result = result / mov_stddev(x, span, bias=True, dtype=dtype)
-#    result = result / mov_stddev(y, span, bias=True, dtype=dtype)
-#   
-#    return result
+def mov_corr(x, y, span, dtype=None):
+    """Calculates the moving correlation of two 1-D arrays.
+
+*Parameters*:
+    $$x$$
+    $$y$$
+    $$span$$
+    $$dtype$$"""
+
+    sum_x = _mov_sum(x, span, dtype=dtype, type_num_double=True)
+    sum_y = _mov_sum(y, span, dtype=dtype, type_num_double=True)
+    
+    sum_prod = _mov_sum(x*y, span, dtype=dtype, type_num_double=True)
+    _covar = sum_prod/span - (sum_x * sum_y) / (span ** 2)
+    
+    sum_prod = _mov_sum(x**2, span, dtype=dtype, type_num_double=True)
+    _stddev_x = sqrt(sum_prod/span - (sum_x ** 2) / (span ** 2))
+
+    sum_prod = _mov_sum(y**2, span, dtype=dtype, type_num_double=True)
+    _stddev_y = sqrt(sum_prod/span - (sum_y ** 2) / (span ** 2))
+
+    return _covar / (_stddev_x * _stddev_y)
 #...............................................................................
 def mov_average_expw(data, span, tol=1e-6):
     """Calculates the exponentially weighted moving average of a series.

Modified: trunk/scipy/sandbox/timeseries/lib/tests/test_moving_funcs.py
===================================================================
--- trunk/scipy/sandbox/timeseries/lib/tests/test_moving_funcs.py	2007-12-23 17:42:51 UTC (rev 3697)
+++ trunk/scipy/sandbox/timeseries/lib/tests/test_moving_funcs.py	2007-12-23 19:17:49 UTC (rev 3698)
@@ -140,6 +140,13 @@
                 assert_equal(result._mask, result_mask)
                 assert_equal(result._dates, data._dates)
 
+    def test_covar(self):
+        # test that covariance of series with itself is equal to variance
+        data = self.maskeddata
+        for bias in [True, False]:
+            covar = MF.mov_covar(data, data, 3, bias=bias)
+            var = MF.mov_var(data, 3, bias=bias)
+            assert_equal(covar, var)
 
 #------------------------------------------------------------------------------
 if __name__ == "__main__":

Modified: trunk/scipy/sandbox/timeseries/src/c_tseries.c
===================================================================
--- trunk/scipy/sandbox/timeseries/src/c_tseries.c	2007-12-23 17:42:51 UTC (rev 3697)
+++ trunk/scipy/sandbox/timeseries/src/c_tseries.c	2007-12-23 19:17:49 UTC (rev 3698)
@@ -478,19 +478,26 @@
              *result_dict=NULL;
     PyArray_Descr *dtype=NULL;
 
-    int rtype, span;
+    int rtype, span, type_num_double;
 
-    static char *kwlist[] = {"array", "span", "dtype", NULL};
+    static char *kwlist[] = {"array", "span", "type_num_double", "dtype", NULL};
 
     if (!PyArg_ParseTupleAndKeywords(args, kwds,
-                "Oi|O&:mov_sum(array, span, dtype)", kwlist,
-                &orig_arrayobj, &span,
+                "Oii|O&:mov_sum(array, span, type_num_double , dtype)", kwlist,
+                &orig_arrayobj, &span, &type_num_double,
                 PyArray_DescrConverter2, &dtype)) return NULL;
 
     check_mov_args(orig_arrayobj, span, 1,
                    &orig_ndarray, &result_mask);
 
-    rtype = _get_type_num(((PyArrayObject*)orig_ndarray)->descr, dtype);
+	if (type_num_double) {
+		/* if the moving sum is being used as an intermediate step in something
+		like a standard deviation calculation, etc... then _get_type_num_double
+		should be used to determine the appropriate return type. */
+		rtype = _get_type_num_double(((PyArrayObject*)orig_ndarray)->descr, dtype);
+	} else {
+    	rtype = _get_type_num(((PyArrayObject*)orig_ndarray)->descr, dtype);
+	}
 
     result_ndarray = calc_mov_sum((PyArrayObject*)orig_ndarray,
                                   span, rtype);
@@ -506,54 +513,6 @@
     return result_dict;
 }
 
-PyObject *
-MaskedArray_mov_average(PyObject *self, PyObject *args, PyObject *kwds)
-{
-    PyObject *orig_arrayobj=NULL, *orig_ndarray=NULL,
-             *result_ndarray=NULL, *result_mask=NULL,
-             *result_dict=NULL,
-             *mov_sum=NULL, *denom=NULL;
-    PyArray_Descr *dtype=NULL;
-
-    int rtype, span;
-
-    static char *kwlist[] = {"array", "span", "dtype", NULL};
-
-    if (!PyArg_ParseTupleAndKeywords(args, kwds,
-                "Oi|O&:mov_average(array, span, dtype)", kwlist,
-                &orig_arrayobj, &span,
-                PyArray_DescrConverter2, &dtype)) return NULL;
-
-
-    check_mov_args(orig_arrayobj, span, 2,
-                   &orig_ndarray, &result_mask);
-
-    rtype = _get_type_num_double(((PyArrayObject*)orig_ndarray)->descr, dtype);
-
-    mov_sum = calc_mov_sum((PyArrayObject*)orig_ndarray, span, rtype);
-    ERR_CHECK(mov_sum)
-
-    denom = PyFloat_FromDouble(1.0/(double)(span));
-
-    result_ndarray = np_multiply(mov_sum, denom);
-    ERR_CHECK(result_ndarray)
-
-    Py_DECREF(mov_sum);
-    Py_DECREF(denom);
-
-    result_dict = PyDict_New();
-    MEM_CHECK(result_dict)
-    PyDict_SetItemString(result_dict, "array", result_ndarray);
-    PyDict_SetItemString(result_dict, "mask", result_mask);
-
-    Py_DECREF(result_ndarray);
-    Py_DECREF(result_mask);
-    return result_dict;
-}
-
-
-
-//calc_mov_median(PyArrayObject *orig_ndarray, int span, int rtype)
 PyObject*
 calc_mov_ranked(PyArrayObject *orig_ndarray, int span, int rtype, char rank_type)
 {
@@ -844,94 +803,4 @@
     return result_dict;
 }
 
-PyObject *
-MaskedArray_mov_stddev(PyObject *self, PyObject *args, PyObject *kwds)
-{
-
-    PyObject *orig_ndarray=NULL, *orig_arrayobj=NULL,
-             *result_ndarray=NULL, *result_mask=NULL,
-             *result_dict=NULL,
-             *result_temp1=NULL, *result_temp2=NULL, *result_temp3=NULL,
-             *mov_sum=NULL, *mov_sum_sq=NULL,
-             *denom1=NULL, *denom2=NULL;
-
-    PyArray_Descr *dtype=NULL;
-
-    int rtype, span, is_variance, bias;
-
-    static char *kwlist[] = {"array", "span", "is_variance", "bias", "dtype", NULL};
-
-    if (!PyArg_ParseTupleAndKeywords(args, kwds,
-                "Oiii|O&:mov_stddev(array, span, is_variance, bias, dtype)",
-                kwlist, &orig_arrayobj, &span, &is_variance, &bias,
-                PyArray_DescrConverter2, &dtype)) return NULL;
-
-
-    check_mov_args(orig_arrayobj, span, 2,
-                   &orig_ndarray, &result_mask);
-
-    rtype = _get_type_num_double(((PyArrayObject*)orig_ndarray)->descr, dtype);
-
-    mov_sum = calc_mov_sum((PyArrayObject*)orig_ndarray, span, rtype);
-    ERR_CHECK(mov_sum)
-
-    result_temp1 = np_multiply(orig_ndarray, orig_ndarray);
-    ERR_CHECK(result_temp1)
-
-    mov_sum_sq = calc_mov_sum((PyArrayObject*)result_temp1, span, rtype);
-    Py_DECREF(result_temp1);
-    ERR_CHECK(mov_sum_sq)
-
-
-    /*
-      formulas from:
-      http://en.wikipedia.org/wiki/Standard_deviation#Rapid_calculation_methods
-     */
-    if (bias == 0) {
-        denom1 = PyFloat_FromDouble(1.0/(double)(span-1));
-        denom2 = PyFloat_FromDouble(1.0/(double)(span*(span-1)));
-    } else {
-        denom1 = PyFloat_FromDouble(1.0/(double)span);
-        denom2 = PyFloat_FromDouble(1.0/(double)(span*span));
-    }
-
-    result_temp1 = np_multiply(mov_sum_sq, denom1);
-    ERR_CHECK(result_temp1)
-    Py_DECREF(mov_sum_sq);
-    Py_DECREF(denom1);
-
-    result_temp3 = np_multiply(mov_sum, mov_sum);
-    ERR_CHECK(result_temp3)
-    Py_DECREF(mov_sum);
-
-    result_temp2 = np_multiply(result_temp3, denom2);
-    ERR_CHECK(result_temp2)
-    Py_DECREF(result_temp3);
-    Py_DECREF(denom2);
-
-    result_temp3 = np_subtract(result_temp1, result_temp2);
-    ERR_CHECK(result_temp3)
-    Py_DECREF(result_temp1);
-    Py_DECREF(result_temp2);
-
-    if (is_variance) {
-        result_ndarray = result_temp3;
-    } else {
-        result_temp1 = np_sqrt(result_temp3);
-        ERR_CHECK(result_temp1)
-        Py_DECREF(result_temp3);
-        result_ndarray = result_temp1;
-    }
-
-    result_dict = PyDict_New();
-    MEM_CHECK(result_dict)
-    PyDict_SetItemString(result_dict, "array", result_ndarray);
-    PyDict_SetItemString(result_dict, "mask", result_mask);
-
-    Py_DECREF(result_ndarray);
-    Py_DECREF(result_mask);
-    return result_dict;
-
-}
-
 void import_c_tseries(PyObject *m) { import_array(); }

Modified: trunk/scipy/sandbox/timeseries/src/cseries.c
===================================================================
--- trunk/scipy/sandbox/timeseries/src/cseries.c	2007-12-23 17:42:51 UTC (rev 3697)
+++ trunk/scipy/sandbox/timeseries/src/cseries.c	2007-12-23 19:17:49 UTC (rev 3698)
@@ -12,10 +12,6 @@
      METH_VARARGS | METH_KEYWORDS, ""},
     {"MA_mov_max", (PyCFunction)MaskedArray_mov_max,
      METH_VARARGS | METH_KEYWORDS, ""},
-    {"MA_mov_average", (PyCFunction)MaskedArray_mov_average,
-     METH_VARARGS | METH_KEYWORDS, ""},
-    {"MA_mov_stddev", (PyCFunction)MaskedArray_mov_stddev,
-     METH_VARARGS | METH_KEYWORDS, ""},
 
     {"TS_convert", (PyCFunction)TimeSeries_convert,
      METH_VARARGS, ""},



More information about the Scipy-svn mailing list