Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
renamed file
  • Loading branch information
karlnapf committed Aug 13, 2012
1 parent df80e5c commit 15cb24b
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 155 deletions.
Expand Up @@ -16,12 +16,15 @@
from shogun.Statistics import LinearTimeMMD
from shogun.Statistics import BOOTSTRAP, MMD1_GAUSSIAN

# for nice plotting that fits into our shogun tutorial
import latex_plot_inits

# parameters, change to get different results
m=10000
m=1000 # set to 10000 for a good test result
dim=2

# setting the difference of the first dimension smaller makes a harder test
difference=0.4
difference=1

# number of samples taken from null and alternative distribution
num_null_samples=500
Expand Down Expand Up @@ -63,23 +66,29 @@

# plot data of p and q
subplot(2,3,1)
grid(True)
gca().xaxis.set_major_locator( MaxNLocator(nbins = 3) ) # reduce number of x-ticks
gca().yaxis.set_major_locator( MaxNLocator(nbins = 3) ) # reduce number of x-ticks
plot(data[0][0:m], data[1][0:m], 'ro', label='$x$')
plot(data[0][m+1:2*m], data[1][m+1:2*m], 'bo', label='$x$', alpha=0.2)
legend()
plot(data[0][m+1:2*m], data[1][m+1:2*m], 'bo', label='$x$', alpha=0.5)
title('Data, shift in $x_1$='+str(difference)+'\nm='+str(m))
xlabel('$x_1, y_1$')
ylabel('$x_2, y_2$')
grid(True)


# histogram of first data dimension and pdf
subplot(2,3,2)
grid(True)
gca().xaxis.set_major_locator( MaxNLocator(nbins = 3) ) # reduce number of x-ticks
gca().yaxis.set_major_locator( MaxNLocator(nbins = 3) ) # reduce number of x-ticks
hist(data[0], bins=50, alpha=0.5, facecolor='r', normed=True)
hist(data[1], bins=50, alpha=0.5, facecolor='b', normed=True)
xs=linspace(min(data[0])-1,max(data[0])+1, 50)
plot(xs,normpdf( xs, 0, 1), 'r', linewidth=3)
plot(xs,normpdf( xs, difference, 1), 'b', linewidth=3)
title('Data PDF in $x_1$')
grid(True)
xlabel('$x_1, y_1$')
ylabel('$p(x_1), p(y_1)$')
title('Data PDF in $x_1, y_1$')

# compute threshold for test level
alpha=0.05
Expand All @@ -93,30 +102,37 @@

# plot alternative distribution with threshold
subplot(2,3,4)
grid(True)
gca().xaxis.set_major_locator( MaxNLocator(nbins = 3) ) # reduce number of x-ticks
gca().yaxis.set_major_locator( MaxNLocator(nbins = 3) ) # reduce number of x-ticks
hist(alt_samples, 20, normed=True);
axvline(thresh_boot, 0, 1, linewidth=2, color='red')
type_two_error=sum(alt_samples<thresh_boot)/float(num_null_samples)
title('Alternative Dist.\n' + 'Type II error is ' + str(type_two_error))
grid(True)

# compute range for all null distribution histograms
hist_range=[min([min(null_samples_boot), min(null_samples_gaussian)]), max([max(null_samples_boot), max(null_samples_gaussian)])]

# plot null distribution with threshold
subplot(2,3,3)
grid(True)
gca().xaxis.set_major_locator( MaxNLocator(nbins = 3) ) # reduce number of x-ticks
gca().yaxis.set_major_locator( MaxNLocator(nbins = 3) ) # reduce number of x-ticks
hist(null_samples_boot, 20, range=hist_range, normed=True);
axvline(thresh_boot, 0, 1, linewidth=2, color='red')
title('Bootstrapped Null Dist.\n' + 'Type I error is ' + str(type_one_error_boot))
grid(True)

# plot null distribution gaussian
subplot(2,3,5)
grid(True)
gca().xaxis.set_major_locator( MaxNLocator(nbins = 3) ) # reduce number of x-ticks
gca().yaxis.set_major_locator( MaxNLocator(nbins = 3) ) # reduce number of x-ticks
hist(null_samples_gaussian, 20, range=hist_range, normed=True);
axvline(thresh_gaussian, 0, 1, linewidth=2, color='red')
title('Null Dist. Gaussian\nType I error is ' + str(type_one_error_gaussian))
grid(True)

# pull plots a bit apart
subplots_adjust(hspace=0.5)
subplots_adjust(wspace=0.5)
show()

This file was deleted.

0 comments on commit 15cb24b

Please sign in to comment.