Consider a standard multivariate Gaussian distribution for →x in n dimensions centered around →μ
f(→x|→μ)=n∏i=11√2πexp(−(xi−μi)22).We want to compare the mean squared error E[||ˉ→x−→μ||2])
of the sample mean (an unbiased estimator) \begin{equation} \overline xi = \frac{1}{m} \sum{j=1}^m x_{ij} \end{equation}
to the James-Stein estimator xJS=(1−n−2||ˉx||2)ˉx
(where i is the component of the vector and j is an index over the samples).
%pylab inline
Populating the interactive namespace from numpy and matplotlib
def jamesSteinEstimator(x):
return x*(1. - (x.size-2.)/sum(x*x))
def RMS(x,mean):
return sqrt(sum((x-mean)*(x-mean)))
def doRun(nDim,nSamp=1000):
mean=zeros(nDim)
data = zeros(nDim*nSamp).reshape(nDim,nSamp)
for i in range(nDim):
mean[i] = 0.5 #set value of the mean's components here
data[i,:] = random.normal(mean[i],1,nSamp).T
data=data.T
avRMS_js = 0
avRMS_mle = 0
for x in data:
avRMS_js += RMS(jamesSteinEstimator(x),mean)
avRMS_mle += RMS(x,mean)
avRMS_js /= nSamp
avRMS_mle /= nSamp
return (avRMS_js, avRMS_mle)
nDimToTest = linspace(2,20)
av_js = zeros(nDimToTest.size)
av_mle = zeros(nDimToTest.size)
for i,nDim in enumerate(nDimToTest):
[av_js[i], av_mle[i]] = doRun(int(nDim))
plt.plot(nDimToTest,av_js,c='b') #James Stein in blue
plt.plot(nDimToTest,av_mle,c='r') #MLE in red
plt.ylabel('Mean Squared Error')
plt.xlabel('Dimensionality')
<matplotlib.text.Text at 0x10dd2b650>