Problem 3

Question: Suppose you have a detector response that has a bias $f(x) = \sqrt{0.01^2 + (0.01 \exp(-x/4))^2}$. Adjust the "smear" function to be a Gaussian with width 0.03, but with varying mean according to f(x). Given a "true" input Gaussian distribution with mean 0.4 and width 0.1, generate the appropriate measured distribution given the above bias and width. Then unfold that measured distribution and compare to the truth. In effect, you are smearing the true distribution and then unsmearing it to get back where you started ("closure test").

  • Part a: Plot the "true" distribution and the "measured" distribution after the detector smearing is applied as described above.
  • Part b (Undergrads: 10 points. Grads : 15 points): Unfold the measured distribution with regularization. Show the regularization optimization, and compare the "best" result to the measured and true values in a plot.
In [1]:
#!/usr/bin/env python
import numpy.linalg as la
import scipy.sparse.linalg as sla
import math
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import numpy as np
import copy

# From https://www.researchgate.net/publication/274138835_NumPy_SciPy_Recipes_for_Data_Science_Regularized_Least_Squares_Optimization
def rlsq_solution_V1(X, y, l):
    n, m = X.shape
    I = np.identity(m)
    w = np.dot(np.dot(la.inv(np.dot(X.T, X) + l*I), X.T), y)
    return w

def trivial_invert(X, y) :
    w = np.dot( np.dot( la.inv( np.dot(X.T,X) ), X.T), y )
    return w

### Detector response smearing function
def smear(x, mu, s ):
    return s * np.random.randn( len(x) ) + mu + x

# We can write sqrt(0.01**2 + (0.01*exp(-x/4))**2) as 0.01*sqrt(1 + exp(-x/2))
def detector_bias(x, a, b):
    #return np.array([x[i] + a * math.sqrt( 1 + math.exp(-x[i]*b*100)) for i in range(len(x))])
    return np.sqrt(1 + np.exp(-x*b*2.)) * a
In [2]:
####### Parameters #######

# Mean and width for true distribution
true_val,true_width = 0.4,0.1


# Bin width for matrix:
binwidth = 0.1
# Number of pseudoexperiments to generate response matrix
N_pe = 100000
# Bias (a * (1+exp(-x*b*2.))) and width for Gaussian smearing
a,b = 0.01, 0.25
sigma = 0.03
# Regulate?
regulate = True




# True values for response matrix
xtrue = np.random.rand(N_pe)
# Smeared values for response matrix... be sure to include the bias. 
xbiased = xtrue + detector_bias(xtrue,a,b)
xreco =  smear( xbiased, mu=0., s=sigma) #smear( xtrue + detector( xtrue,a,b), mu=0., s=sigma )
# Weights to create response matrix... matplotlib's histograms have unity *integrals*, not response
w = np.ones_like( xtrue )
w /= N_pe * binwidth
# Values for the x axis ranges
xvals = np.arange(0,1.0,binwidth)
xbins = np.linspace(0,1.0, num=int(1./binwidth) + 1)


# Draw the response matrix    
plt.figure(1)
G, xbins2d, ybins2d, patches2d = plt.hist2d(xreco,xtrue, bins=[xbins,xbins], weights=w, cmap='Blues')
plt.xlabel('True Length (cm)')
plt.ylabel('Reconstructed Length (cm)')
plt.colorbar()
plt.show()

Part a

In [3]:
# Create a "truth" distribution
truth = np.random.normal( loc=true_val, scale=true_width, size=10000 )
# Create a "measured" distribution
data = truth + detector_bias(truth,a,b)
data =  smear( data, mu=0., s=sigma) #smear( xtrue + detector( xtrue,a,b), mu=0., s=sigma )

# Draw the true and smeared distributions    
plt.figure(2)
returnvals,xbinsout,patches = plt.hist([truth,data],bins=xbins-binwidth*0.5, histtype='step')
t = returnvals[0]
d = returnvals[1]
plt.xlabel('Length (cm)')
plt.ylabel('Number')
plt.show()

Part b

In [4]:
toplot = []
if regulate:
    regparams = np.arange( 0.0, 0.1, 0.01 )
else :
    regparams = [0.0]
regtitles = [ 'Reco, $\lambda=$ %6.2f' % (iv) for iv in regparams]

chi2vals = []
for l in regparams:
    if regulate : 
        m = rlsq_solution_V1(G, d, l)
    else :
        m = trivial_invert(G,d)
    
    toplot.append( m )
    val = math.log( np.sum( (t-m)**2 ) )
    chi2vals.append( val )
In [5]:
# Draw the chi2 as a function of regularization parameter
plt.figure(3)
minchi2 = min( chi2vals )
for ichi2,chi2 in enumerate(chi2vals):
    chi2vals[ichi2] -= minchi2
    
optval = np.argmin(chi2vals)
plt.plot( regparams, chi2vals)
plt.xlabel('Regularization parameter')
plt.ylabel(r'$\log(\Delta \chi^2)$', fontsize=15)
plt.show()
In [6]:
# Draw the true, measured, and reconstructed distributions
plt.figure(4)
truthplot, = plt.plot(xvals, t)
meas, = plt.plot(xvals, d)
reco, = plt.plot(xvals, toplot[optval], '--')
plt.legend( [meas,truthplot,reco], ['Measured', 'Truth'] + [regtitles[optval]], loc='center left', bbox_to_anchor=(1, 0.5))
plt.xlabel('Length (cm)')
plt.ylabel('Number')
plt.show()
In [ ]: