[Scipy-svn] r3133 - in trunk/Lib/sandbox/pyem: . examples tests
scipy-svn@scip...
scipy-svn@scip...
Mon Jul 2 03:06:28 CDT 2007
Author: cdavid
Date: 2007-07-02 03:06:13 -0500 (Mon, 02 Jul 2007)
New Revision: 3133
Modified:
trunk/Lib/sandbox/pyem/examples/examples.py
trunk/Lib/sandbox/pyem/examples/pdfestimation.py
trunk/Lib/sandbox/pyem/examples/pdfestimation1d.py
trunk/Lib/sandbox/pyem/examples/utils.py
trunk/Lib/sandbox/pyem/gmm_em.py
trunk/Lib/sandbox/pyem/tests/test_examples.py
Log:
Clean up some examples, and add an example for regularized EM
Modified: trunk/Lib/sandbox/pyem/examples/examples.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples/examples.py 2007-07-02 08:04:38 UTC (rev 3132)
+++ trunk/Lib/sandbox/pyem/examples/examples.py 2007-07-02 08:06:13 UTC (rev 3133)
@@ -7,6 +7,12 @@
def ex3():
import basic_example3
+def pdfestim():
+ import pdfestimation
+
+def pdfestim1d():
+ import pdfestimation1d
+
if __name__ == '__main__':
ex1()
ex2()
Modified: trunk/Lib/sandbox/pyem/examples/pdfestimation.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples/pdfestimation.py 2007-07-02 08:04:38 UTC (rev 3132)
+++ trunk/Lib/sandbox/pyem/examples/pdfestimation.py 2007-07-02 08:06:13 UTC (rev 3133)
@@ -1,5 +1,5 @@
#! /usr/bin/env python
-# Last Change: Sat Jun 09 07:00 PM 2007 J
+# Last Change: Mon Jul 02 03:00 PM 2007 J
# Example of doing pdf estimation with EM algorithm. Requires matplotlib.
import numpy as N
@@ -32,6 +32,7 @@
# bc will contain a list of BIC values for each model trained
bc = []
mode = 'full'
+P.figure()
for k in range(1, 5):
# Train a model of k component, and plots isodensity curve
P.subplot(2, 2, k)
@@ -45,4 +46,3 @@
P.ylabel('waiting time (scaled)')
print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1)
-P.show()
Modified: trunk/Lib/sandbox/pyem/examples/pdfestimation1d.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples/pdfestimation1d.py 2007-07-02 08:04:38 UTC (rev 3132)
+++ trunk/Lib/sandbox/pyem/examples/pdfestimation1d.py 2007-07-02 08:06:13 UTC (rev 3133)
@@ -1,6 +1,13 @@
#! /usr/bin/env python
-# Last Change: Sat Jun 09 04:00 PM 2007 J
+# Last Change: Mon Jul 02 03:00 PM 2007 J
+__doc__ = """This example shows how to do pdfestimation for one dimensional
+data. It estimates a Gaussian mixture model for several number of components,
+and it determines the "best" one according to the Bayesian Information
+Criterion.
+
+It uses old faitfhul waiting time as the one dimension data, and plots the best
+model as well as the BIC as a function of the number of component."""
# Example of doing pdf estimation with EM algorithm. Requires matplotlib.
import numpy as N
from numpy.testing import set_package_path, restore_path
@@ -66,4 +73,3 @@
P.xlabel("number of components")
P.ylabel("BIC")
print "According to the BIC, model with %d components is better" % (N.argmax(bc) + 1)
-P.show()
Modified: trunk/Lib/sandbox/pyem/examples/utils.py
===================================================================
--- trunk/Lib/sandbox/pyem/examples/utils.py 2007-07-02 08:04:38 UTC (rev 3132)
+++ trunk/Lib/sandbox/pyem/examples/utils.py 2007-07-02 08:06:13 UTC (rev 3133)
@@ -1,5 +1,5 @@
#! /usr/bin/env python
-# Last Change: Wed Jun 27 05:00 PM 2007 J
+# Last Change: Mon Jul 02 02:00 PM 2007 J
# Various utilities for examples
@@ -8,7 +8,7 @@
# XXX: Bouah, hackish... Will go away once scipydata found its way
set_package_path()
-from pyem.data import oldfaithful, pendigits
+from scikits.learn.datasets import oldfaithful, pendigits, iris
restore_path()
def get_faithful():
Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py 2007-07-02 08:04:38 UTC (rev 3132)
+++ trunk/Lib/sandbox/pyem/gmm_em.py 2007-07-02 08:06:13 UTC (rev 3133)
@@ -1,5 +1,5 @@
# /usr/bin/python
-# Last Change: Sun Jul 01 06:00 PM 2007 J
+# Last Change: Mon Jul 02 04:00 PM 2007 J
"""Module implementing GMM, a class to estimate Gaussian mixture models using
EM, and EM, a class which use GMM instances to estimate models parameters using
@@ -11,15 +11,13 @@
# - which methods to avoid va shrinking to 0 ? There are several options,
# not sure which ones are appropriates
# - improve EM trainer
-
import numpy as N
-#import numpy.linalg as lin
from numpy.random import randn
#import _c_densities as densities
import densities
-#from kmean import kmean
from scipy.cluster.vq import kmeans2 as kmean
from gauss_mix import GmParamError
+from misc import curry
#from misc import _DEF_ALPHA, _MIN_DBL_DELTA, _MIN_INV_COND
@@ -197,7 +195,7 @@
"""Computes update of the Gaussian Mixture Model (M step) from the
responsabilities gamma and normalized responsabilities ngamma, for
diagonal models."""
- #XXX: caching SS may decrease memory consumption
+ #XXX: caching SS may decrease memory consumption, but is this possible ?
k = self.gm.k
d = self.gm.d
n = data.shape[0]
@@ -422,23 +420,38 @@
self.pval = pval
def train(self, data, model, maxiter = 20, thresh = 1e-5):
+ mode = model.gm.mode
+
+ # Build regularizer
+ if mode == 'diag':
+ regularize = curry(regularize_diag, np = self.pcnt, prior =
+ self.pval * N.ones(model.gm.d))
+ elif mode == 'full':
+ regularize = curry(regularize_full, np = self.pcnt, prior =
+ self.pval * N.eye(model.gm.d))
+ else:
+ raise ValueError("unknown variance mode")
+
model.init(data)
- regularize_full(model.gm.va, self.pcnt, self.pval * N.eye(model.gm.d))
+ regularize(model.gm.va)
+
# Likelihood is kept
like = N.empty(maxiter, N.float)
# Em computation, with computation of the likelihood
g, tgd = model.compute_log_responsabilities(data)
g = N.exp(g)
+ model.update_em(data, g)
+ regularize(model.gm.va)
+
like[0] = N.sum(densities.logsumexp(tgd), axis = 0)
- model.update_em(data, g)
- regularize_full(model.gm.va, self.pcnt, self.pval * N.eye(model.gm.d))
for i in range(1, maxiter):
g, tgd = model.compute_log_responsabilities(data)
g = N.exp(g)
+ model.update_em(data, g)
+ regularize(model.gm.va)
+
like[i] = N.sum(densities.logsumexp(tgd), axis = 0)
- model.update_em(data, g)
- regularize_full(model.gm.va, self.pcnt, self.pval * N.eye(model.gm.d))
if has_em_converged(like[i], like[i-1], thresh):
return like[0:i]
@@ -458,6 +471,18 @@
else:
return False
+def regularize_diag(va, np, prior):
+ """np * n is the number of prior counts (np is a proportion, and n is the
+ number of point).
+
+ diagonal variance version"""
+ d = va.shape[1]
+ k = va.shape[0]
+
+ for i in range(k):
+ va[i] *= 1. / (1 + np)
+ va[i] += np / (1. + np) * prior
+
def regularize_full(va, np, prior):
"""np * n is the number of prior counts (np is a proportion, and n is the
number of point)."""
@@ -470,19 +495,3 @@
if __name__ == "__main__":
pass
- ## # #++++++++++++++++++
- ## # # Export the figure
- ## # #++++++++++++++++++
- ## # F = P.gcf()
- ## # DPI = F.get_dpi()
- ## # DefaultSize = F.get_size_inches()
- ## # # the default is 100dpi for savefig:
- ## # F.savefig("example1.png")
-
- ## # # Now make the image twice as big, while keeping the fonts and all the
- ## # # same size
- ## # F.set_figsize_inches( (DefaultSize[0]*2, DefaultSize[1]*2) )
- ## # Size = F.get_size_inches()
- ## # print "Size in Inches", Size
- ## # F.savefig("example2.png")
- ## P.show()
Modified: trunk/Lib/sandbox/pyem/tests/test_examples.py
===================================================================
--- trunk/Lib/sandbox/pyem/tests/test_examples.py 2007-07-02 08:04:38 UTC (rev 3132)
+++ trunk/Lib/sandbox/pyem/tests/test_examples.py 2007-07-02 08:06:13 UTC (rev 3133)
@@ -1,10 +1,10 @@
#! /usr/bin/env python
-# Last Change: Mon May 28 04:00 PM 2007 J
+# Last Change: Mon Jul 02 03:00 PM 2007 J
from numpy.testing import *
set_package_path()
-from examples.examples import ex1, ex2, ex3
+from examples.examples import ex1, ex2, ex3, pdfestim, pdfestim1d
restore_path()
# #Optional:
@@ -13,14 +13,20 @@
# restore_path()
class test_examples(NumpyTestCase):
- def check_ex1(self, level = 5):
+ def test_ex1(self, level = 3):
ex1()
- def check_ex2(self, level = 5):
+ def test_ex2(self, level = 3):
ex2()
- def check_ex3(self, level = 5):
+ def test_ex3(self, level = 3):
ex3()
+ def test_pdfestim(self, level = 5):
+ pdfestim()
+
+ def test_pdfestim1d(self, level = 5):
+ pdfestim1d()
+
if __name__ == "__main__":
NumpyTest().run()
More information about the Scipy-svn
mailing list