Skip to content

Commit

Permalink
made all python examples use data generator class
Browse files Browse the repository at this point in the history
  • Loading branch information
karlnapf committed Jul 23, 2012
1 parent 1cbdee8 commit 3f6906d
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 66 deletions.
31 changes: 8 additions & 23 deletions examples/undocumented/python_modular/statistics_linear_time_mmd.py
Expand Up @@ -7,12 +7,10 @@
# Written (C) 2012 Heiko Strathmann
#
from numpy import *
from tools.two_distributions_data import TwoDistributionsData

gen_data=TwoDistributionsData()

def statistics_linear_time_mmd():
from shogun.Features import RealFeatures
from shogun.Features import DataGenerator
from shogun.Kernel import GaussianKernel
from shogun.Statistics import LinearTimeMMD
from shogun.Statistics import BOOTSTRAP, MMD1_GAUSSIAN
Expand All @@ -22,42 +20,29 @@ def statistics_linear_time_mmd():
dim=2
difference=0.5

# data is standard normal distributed. only one dimension of Y has a mean
# shift of difference
# use data generator class to produce example data
# in pratice, this generate data function could be replaced by a method
# that obtains data from a stream
(X,Y)=gen_data.create_mean_data(n,dim,difference)

print "dimension means of X", [mean(x) for x in X]
print "dimension means of Y", [mean(x) for x in Y]
data=DataGenerator.generate_mean_data(n,dim,difference)
print "dimension means of X", mean(data.T[0:n].T)
print "dimension means of Y", mean(data.T[n:2*n+1].T)

# create shogun feature representation
features_x=RealFeatures(X)
features_y=RealFeatures(Y)
features=RealFeatures(data)

# use a kernel width of sigma=2, which is 8 in SHOGUN's parametrization
# which is k(x,y)=exp(-||x-y||^2 / tau), in constrast to the standard
# k(x,y)=exp(-||x-y||^2 / (2*sigma^2)), so tau=2*sigma^2
kernel=GaussianKernel(10,8)

mmd=LinearTimeMMD(kernel,features_x, features_y)
mmd=LinearTimeMMD(kernel,features, n)

# perform test: compute p-value and test if null-hypothesis is rejected for
# a test level of 0.05
# for the linear time mmd, the statistic has to be computed on different
# data than the p-value, so first, compute statistic, and then compute
# p-value on other data
# this annoying property is since the null-distribution should stay normal
# which is not the case if "training/test" data would be the same
statistic=mmd.compute_statistic()
print "test statistic:", statistic

# generate new data (same distributions as old) and new statistic object
(X,Y)=gen_data.create_mean_data(n,dim,difference)
features_x=RealFeatures(X)
features_y=RealFeatures(Y)
mmd=LinearTimeMMD(kernel,features_x, features_y)

# do the same thing using two different way to approximate null-dstribution
# bootstrapping and gaussian approximation (ony for really large samples)
alpha=0.05
Expand Down
Expand Up @@ -8,14 +8,12 @@
#
from numpy import *
#from matplotlib import pyplot
from tools.two_distributions_data import TwoDistributionsData

gen_data=TwoDistributionsData()

# performs learning of optimal non-negative kernel weights for a linear time
# two sample test using the linear time Maximum Mean Discrepancy
def statistics_linear_time_mmd_kernel_choice():
from shogun.Features import RealFeatures, CombinedFeatures
from shogun.Features import DataGenerator
from shogun.Kernel import GaussianKernel, CombinedKernel
from shogun.Statistics import LinearTimeMMD
from shogun.Statistics import BOOTSTRAP, MMD1_GAUSSIAN
Expand All @@ -25,15 +23,13 @@ def statistics_linear_time_mmd_kernel_choice():
dim=5
difference=2

# data is standard normal distributed. only one dimension of Y has a mean
# shift of difference
(X,Y)=gen_data.create_mean_data(n,dim,difference)
# use data generator class to produce example data
# in pratice, this generate data function could be replaced by a method
# that obtains data from a stream
data=DataGenerator.generate_mean_data(n,dim,difference)

# concatenate since MMD class takes data as one feature object
# (it is possible to give two, but then data is copied)
Z=concatenate((X,Y), axis=1)
print "dimension means of X", [mean(x) for x in X]
print "dimension means of Y", [mean(x) for x in Y]
print "dimension means of X", mean(data.T[0:n].T)
print "dimension means of Y", mean(data.T[n:2*n+1].T)

# create kernels/features to choose from
# here: just a bunch of Gaussian Kernels with different widths
Expand All @@ -52,7 +48,7 @@ def statistics_linear_time_mmd_kernel_choice():
# all kernels work on same features
for i in range(len(sigmas)):
kernel.append_kernel(GaussianKernel(10, shogun_sigmas[i]))
features.append_feature_obj(RealFeatures(Z))
features.append_feature_obj(RealFeatures(data))

mmd=LinearTimeMMD(kernel,features, n)

Expand Down
Expand Up @@ -7,12 +7,10 @@
# Written (C) 2012 Heiko Strathmann
#
from numpy import *
from tools.two_distributions_data import TwoDistributionsData

gen_data=TwoDistributionsData()

def statistics_linear_time_mmd():
from shogun.Features import RealFeatures
from shogun.Features import DataGenerator
from shogun.Kernel import GaussianKernel
from shogun.Statistics import QuadraticTimeMMD
from shogun.Statistics import BOOTSTRAP, MMD2_SPECTRUM, MMD2_GAMMA, BIASED, UNBIASED
Expand All @@ -23,23 +21,21 @@ def statistics_linear_time_mmd():
dim=2
difference=0.5

# data is standard normal distributed. only one dimension of Y has a mean
# shift of difference
(X,Y)=gen_data.create_mean_data(n,dim,difference)
# use data generator class to produce example data
data=DataGenerator.generate_mean_data(n,dim,difference)

print "dimension means of X", [mean(x) for x in X]
print "dimension means of Y", [mean(x) for x in Y]
print "dimension means of X", mean(data.T[0:n].T)
print "dimension means of Y", mean(data.T[n:2*n+1].T)

# create shogun feature representation
features_x=RealFeatures(X)
features_y=RealFeatures(Y)
features=RealFeatures(data)

# use a kernel width of sigma=2, which is 8 in SHOGUN's parametrization
# which is k(x,y)=exp(-||x-y||^2 / tau), in constrast to the standard
# k(x,y)=exp(-||x-y||^2 / (2*sigma^2)), so tau=2*sigma^2
kernel=GaussianKernel(10,8)

mmd=QuadraticTimeMMD(kernel,features_x, features_y)
mmd=QuadraticTimeMMD(kernel,features, n)

# perform test: compute p-value and test if null-hypothesis is rejected for
# a test level of 0.05 using different methods to approximate
Expand Down

This file was deleted.

0 comments on commit 3f6906d

Please sign in to comment.