[Scipy-svn] r3078 - in trunk/Lib/sandbox/pyem: . data/oldfaithful
scipy-svn@scip...
scipy-svn@scip...
Fri Jun 8 06:15:47 CDT 2007
Author: cdavid
Date: 2007-06-08 06:15:39 -0500 (Fri, 08 Jun 2007)
New Revision: 3078
Modified:
trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py
trunk/Lib/sandbox/pyem/densities.py
trunk/Lib/sandbox/pyem/gauss_mix.py
trunk/Lib/sandbox/pyem/gmm_em.py
trunk/Lib/sandbox/pyem/online_em.py
Log:
Add function to plot density contours in GM.
Modified: trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py
===================================================================
--- trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py 2007-06-08 04:42:23 UTC (rev 3077)
+++ trunk/Lib/sandbox/pyem/data/oldfaithful/__init__.py 2007-06-08 11:15:39 UTC (rev 3078)
@@ -1,6 +1,6 @@
#! /usr/bin/env python
-# Last Change: Wed Apr 25 06:00 PM 2007 J
-import faith as _faith
+# Last Change: Fri Jun 08 12:00 PM 2007 J
+import data as _faith
__doc__ = _faith.DESCRSHORT
copyright = _faith.COPYRIGHT
source = _faith.SOURCE
Modified: trunk/Lib/sandbox/pyem/densities.py
===================================================================
--- trunk/Lib/sandbox/pyem/densities.py 2007-06-08 04:42:23 UTC (rev 3077)
+++ trunk/Lib/sandbox/pyem/densities.py 2007-06-08 11:15:39 UTC (rev 3078)
@@ -1,7 +1,7 @@
#! /usr/bin/python
#
# Copyrighted David Cournapeau
-# Last Change: Fri Nov 10 10:00 AM 2006 J
+# Last Change: Fri Jun 08 07:00 PM 2007 J
import numpy as N
import numpy.linalg as lin
@@ -188,6 +188,10 @@
else:
raise DenError("mean and variance are not dim conformant")
+ # When X is a sample from multivariante N(mu, sigma), (X-mu)Sigma^-1(X-mu)
+ # follows a Chi2(d) law. Here, we only take 2 dimension, so Chi2 with 2
+ # degree of freedom (See Wasserman. This is easy to see with characteristic
+ # functions)
chi22d = chi2(2)
mahal = N.sqrt(chi22d.ppf(level))
@@ -218,6 +222,26 @@
return elps[0, :], elps[1, :]
+def multiple_gauss_den(data, mu, va):
+ """Helper function to generate several Gaussian
+ pdf (different parameters) from the same data"""
+ mu = N.atleast_2d(mu)
+ va = N.atleast_2d(va)
+
+ K = mu.shape[0]
+ n = data.shape[0]
+ d = mu.shape[1]
+
+ y = N.zeros((K, n))
+ if mu.size == va.size:
+ for i in range(K):
+ y[i] = gauss_den(data, mu[i, :], va[i, :])
+ return y.T
+ else:
+ for i in range(K):
+ y[i] = gauss_den(data, mu[i, :], va[d*i:d*i+d, :])
+ return y.T
+
if __name__ == "__main__":
import pylab
Modified: trunk/Lib/sandbox/pyem/gauss_mix.py
===================================================================
--- trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-08 04:42:23 UTC (rev 3077)
+++ trunk/Lib/sandbox/pyem/gauss_mix.py 2007-06-08 11:15:39 UTC (rev 3078)
@@ -1,5 +1,5 @@
# /usr/bin/python
-# Last Change: Mon Jun 04 07:00 PM 2007 J
+# Last Change: Fri Jun 08 07:00 PM 2007 J
# Module to implement GaussianMixture class.
@@ -344,6 +344,8 @@
def _get_component_pdf(self, x):
"""Returns a list of pdf, one for each component. Summing them gives
the pdf of the mixture."""
+ # XXX: have a public function to compute the pdf at given points
+ # instead...
std = N.sqrt(self.va[:,0])
retval = N.empty((x.size, self.k))
for c in range(self.k):
@@ -352,6 +354,47 @@
return retval
+ def density_on_grid(self, nx = 50, ny = 50, maxlevel = 0.95):
+ """Do all the necessary computation for contour plot of mixture's density.
+
+ Returns X, Y, Z and V as expected by mpl contour function."""
+
+ # Ok, it is a bit gory. Basically, we want to compute the size of the
+ # grid. We use conf_ellipse, which will return a couple of points for
+ # each component, and we can find a grid size which then is just big
+ # enough to contain all ellipses. This won't work well if two
+ # ellipsoids are crossing each other a lot (because this assumes that
+ # at a given point, one component is largely dominant for its
+ # contribution to the pdf).
+
+ # XXX: we need log pdf, not the pdf... this can save some computing
+ Xe, Ye = self.conf_ellipses(level = maxlevel)
+ ax = [N.min(Xe), N.max(Xe), N.min(Ye), N.max(Ye)]
+
+ w = ax[1] - ax[0]
+ h = ax[3] - ax[2]
+ X, Y, den = self._densityctr(N.linspace(ax[0]-0.2*w, ax[1]+0.2*w, nx), \
+ N.linspace(ax[2]-0.2*h, ax[3]+0.2*h, ny))
+ lden = N.log(den)
+ V = [-5, -3, -1, -0.5, ]
+ V.extend(N.linspace(0, N.max(lden), 4).tolist())
+ return X, Y, lden, N.array(V)
+
+ def _densityctr(self, xrange, yrange):
+ """Helper function to compute density contours on a grid."""
+ gr = N.meshgrid(xrange, yrange)
+ X = gr[0].flatten()
+ Y = gr[1].flatten()
+ xdata = N.concatenate((X[:, N.newaxis], Y[:, N.newaxis]), axis = 1)
+ # XXX refactor computing pdf
+ d = densities.multiple_gauss_den(xdata, self.mu, self.va) * self.w
+ d = N.sum(d, 1)
+ d = d.reshape(len(yrange), len(xrange))
+
+ X = gr[0]
+ Y = gr[1]
+ return X, Y, d
+
# Syntactic sugar
def __repr__(self):
repr = ""
Modified: trunk/Lib/sandbox/pyem/gmm_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-08 04:42:23 UTC (rev 3077)
+++ trunk/Lib/sandbox/pyem/gmm_em.py 2007-06-08 11:15:39 UTC (rev 3078)
@@ -1,5 +1,5 @@
# /usr/bin/python
-# Last Change: Fri Jun 01 05:00 PM 2007 J
+# Last Change: Fri Jun 08 08:00 PM 2007 J
# TODO:
# - which methods to avoid va shrinking to 0 ? There are several options,
@@ -65,7 +65,8 @@
d = self.gm.d
init = data[0:k, :]
- (code, label) = kmean(data, init, niter)
+ # XXX: This is bogus: should do better (in kmean or here, do not know yet)
+ (code, label) = kmean(data, init, niter, minit = 'matrix')
w = N.ones(k) / k
mu = code.copy()
@@ -135,7 +136,7 @@
n = data.shape[0]
# compute the gaussian pdf
- tgd = multiple_gauss_den(data, self.gm.mu, self.gm.va)
+ tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
# multiply by the weight
tgd *= self.gm.w
# Normalize to get a pdf
@@ -202,7 +203,7 @@
the data """
assert(self.isinit)
# compute the gaussian pdf
- tgd = multiple_gauss_den(data, self.gm.mu, self.gm.va)
+ tgd = densities.multiple_gauss_den(data, self.gm.mu, self.gm.va)
# multiply by the weight
tgd *= self.gm.w
@@ -367,27 +368,6 @@
else:
return False
-def multiple_gauss_den(data, mu, va):
- """Helper function to generate several Gaussian
- pdf (different parameters) from the same data"""
- mu = N.atleast_2d(mu)
- va = N.atleast_2d(va)
-
- K = mu.shape[0]
- n = data.shape[0]
- d = mu.shape[1]
-
- y = N.zeros((K, n))
- if mu.size == va.size:
- for i in range(K):
- y[i] = densities.gauss_den(data, mu[i, :], va[i, :])
- return y.T
- else:
- for i in range(K):
- y[i] = densities.gauss_den(data, mu[i, :],
- va[d*i:d*i+d, :])
- return y.T
-
if __name__ == "__main__":
import copy
#=============================
Modified: trunk/Lib/sandbox/pyem/online_em.py
===================================================================
--- trunk/Lib/sandbox/pyem/online_em.py 2007-06-08 04:42:23 UTC (rev 3077)
+++ trunk/Lib/sandbox/pyem/online_em.py 2007-06-08 11:15:39 UTC (rev 3078)
@@ -1,5 +1,5 @@
# /usr/bin/python
-# Last Change: Fri Jun 01 05:00 PM 2007 J
+# Last Change: Fri Jun 08 08:00 PM 2007 J
#---------------------------------------------
# This is not meant to be used yet !!!! I am
@@ -67,7 +67,7 @@
self.cxx = N.outer(w, mean(init_data ** 2, 0))
# w, mu and va init is the same that in the standard case
- (code, label) = kmean(init_data, init_data[0:k, :], niter)
+ (code, label) = kmean(init_data, init_data[0:k, :], iter = niter, minit = 'matrix')
mu = code.copy()
va = N.zeros((k, d))
for i in range(k):
@@ -102,7 +102,7 @@
self.cxx = N.outer(w, mean(init_data ** 2, 0))
# w, mu and va init is the same that in the standard case
- (code, label) = kmean(init_data, init_data[0:k, :], niter)
+ (code, label) = kmean(init_data, init_data[0:k, :], iter = niter, minit = 'matrix')
mu = code.copy()
va = N.zeros((k, d))
for i in range(k):
@@ -176,7 +176,7 @@
# w, mu and va init is the same that in the standard case
(code, label) = kmean(init_data[:, N.newaxis], \
- init_data[0:k, N.newaxis], niter)
+ init_data[0:k, N.newaxis], iter = niter)
mu = code.copy()
va = N.zeros((k, 1))
for i in range(k):
More information about the Scipy-svn
mailing list