#/bin/python

####Disclaimer: this macro is pretty messy, as it's all in one single file and it's quite long
####It also does not put the plots in the final formatting, but if you translate the macro TPadExample.C 
####from .C to .py you'll get the right layout. 

####Anything else is therefore left as an exercise to the reader 
####(including cleaning the macro up and fully use python's OO capabilities!)

####Various includes

from ROOT import Math, TH1D, TCanvas, THStack, gPad, TLegend, gStyle
from ROOT import kRed, kBlack, kWhite, kBlue, kYellow
from array import array
from math import sqrt

####Functions

def GetBinLowHighEdges(BinCenters) :
  
  #the lists that will be returned later
  BinLowEdges = []
  BinHighEdges = []
  
  #note that here we have included histogram underflows and overflows
  #we will need to remove the under/overflow to do everything in a loop
  for iBin in xrange(1,len(BinCenters)-1) :
    binCenter = BinCenters[iBin]
    #special cases: overflow and underflow
    #if (iBin != len(BinCenters)-1) :
    #assuming equal bin errors on each side, we will have 
    binXError = (BinCenters[iBin+1]-BinCenters[iBin])/2
    #now just subtract (add) this to bin edges to have low (high) edges
    binLowEdge = BinCenters[iBin]-binXError
    binHighEdge = BinCenters[iBin]+binXError
    BinLowEdges.append(binLowEdge)
    BinHighEdges.append(binHighEdge)
    
    #bin center [n + 1] - bin center [n]
    #print BinCenters[iBin] - (
    #for the underflow it doesn't matter what is the low edge, so let's leave it 
  return BinLowEdges, BinHighEdges
  
####Function to prepare the data and bkg histograms

def prepareHistogramFromTextFile (name, title, textFileName) :
  
  #let's make some ROOT arrays with which to eventually fill the histograms:
  #BinContent, BinLowEdges, BinContentError
  BinContent = array('d')
  BinLowEdges = array('d')
  BinCenters = array('d')
  BinContentError = array('d')

  infile = open(textFileName, "r")

  for line in infile:
    #remove all whitespace and newlines
    linestripped = line.strip()
    tokens = linestripped.split(", ")
    #print tokens 
    #--> ['fSumw[0]=0', 'x=-12.5', 'error=0']
    #here we recognise the syntax of TH1::Print("all"):
    #binContent, binCenter, binContentError
    
    #BinContent and BinContentError don't need any extra operations
    #beyond some further stripping...
    binContentTokens = tokens[0].split("=")
    binContentErrorTokens = tokens[2].split("=")
    binCenterTokens = tokens[1].split("=")
    #print binContentTokens, binContentErrorTokens, binCenterTokens
    #--> ['fSumw[0]', '0'] ['error', '0'] ['x', '-12.5']
    #remember strong typing: we want integers (number of events)
    BinContent.append(float(binContentTokens[1]))
    BinContentError.append(float(binContentErrorTokens[1]))
    BinCenters.append(float(binCenterTokens[1]))

  #end of the loop that reads the file line by line

  #we have the bin centers and we need to calculate the edges. 
  #Let's do it in a helper function that returns the two lists of bin edges given bin centers
  BinLowEdgesList, BinHighEdgesList = GetBinLowHighEdges(BinCenters)

  #now a small trick to instantiate the histogram in one go with arrays: 
  #remember that the array containing the bins needs to include the high edge of the last bin
  #(this is actually the only reason why we keep the BinHighEdges)

  BinLowEdges = array('d', BinLowEdgesList)
  BinLowEdges.append(BinHighEdgesList[len(BinHighEdgesList)-1])
      
  #print BinContent, BinContentError, BinCenters, BinLowEdges

  #now we can instantiate the histograms in one line!
  histogram = TH1D(name, title, len(BinLowEdges)-1, BinLowEdges)
  #and in a small loop we fill it (remember to remove over/underflow, for consistency)
  for iBin in xrange(1, histogram.GetNbinsX()) :
    histogram.SetBinContent(iBin, BinContent[iBin])
    histogram.SetBinError(iBin, BinContentError[iBin])

  return histogram  

#stat functions

def GetZVal (p, excess) : 
  #the function normal_quantile converts a p-value into a significance,
  #i.e. the number of standard deviations corresponding to the right-tail of 
  #a Gaussian
  if excess : 
    zval = Math.normal_quantile(1-p,1);
  else :
    zval = Math.normal_quantile(p,1);

  return zval

def GetPValPoisson (nObs, nExp) :
  #Poisson p-value
  #Explanation from D. Casadei:

    #p-value for Poisson distribution, no uncertainty on the parameter

    #-----------------------------------------------------------------

    #Diego Casadei <casadei@cern.ch>   Oct 2011
    #Last update: 4 Nov 2011 (using incomplete Gamma from ROOT)

    #-----------------------------------------------------------------

    #Consider Poi(k|nExp) and compute the p-value which corresponds to
    #the observation of nObs counts.

    #When nObs > nExp there is an excess of observed events and

      #p-value = P(n>=nObs|nExp) = \sum_{n=nObs}^{\infty} Poi(n|nExp)
              #= 1 - \sum_{n=0}^{nObs-1} Poi(n|nExp)
              #= 1 - e^{-nExp} \sum_{n=0}^{nObs-1} nExp^n / n!

    #Otherwise (nObs <= nExp) there is a deficit and

      #p-value = P(n<=nObs|nExp) = \sum_{n=0}^{nObs} Poi(n|nExp)
              #= e^{-nExp} \sum_{n=0}^{nObs} nExp^n / n!
  #everything is provided by ROOT, in terms of the gamma function
  
  #so if we have an excess:
  if nObs>nExp :
    pval = 1-Math.inc_gamma_c(nObs,nExp)
  #and if we have a deficit
  else :
    pval = Math.inc_gamma_c(nObs+1,nExp)
    
  return pval

###########################################################
# Main piece of code
###########################################################

######Step 0: Play with strings to prepare histograms from a text file

#First of all, prepare the two histograms of data and bkg from the two text files
dataHistogram = prepareHistogramFromTextFile ("data", "Data", "data.txt")
bkgHistogram = prepareHistogramFromTextFile ("bkg", "Background", "bkg.txt")

######Step 1: Overlay the two histograms using a THStack

#format the two histograms
dataHistogram.SetMarkerStyle(20)#filled circle
dataHistogram.SetLineColor(kBlack)
dataHistogram.SetMarkerColor(kBlack)
bkgHistogram.SetLineColor(kRed)
bkgHistogram.SetMarkerColor(kRed)

#make a THStack to hold both histograms
stack = THStack("dataAndBkg", "Data and bkg overlaid");
stack.Add(dataHistogram);
stack.Add(bkgHistogram);

#make a TLegend for both
l = TLegend(0.6, 0.6, 0.8, 0.8);
#let's make the legend background white
l.SetFillColor(kWhite);
#arguments: pointer to histogram, text, options: draw line (L)
l.AddEntry(dataHistogram, "Data", "P");
l.AddEntry(bkgHistogram, "Background", "L");

##Optional Step 1: plot 
c=TCanvas()

#we need to Draw() to get the axis
stack.Draw("A");
stack.GetYaxis().SetTitle("Entries");
stack.GetXaxis().SetTitle("Some units");
stack.Draw("Lnostack")
l.Draw("same");

#set log y
gPad.SetLogy()

c.SaveAs("DataAndBackground.png")

######Step 2: Obtain D-B/B and D-B/sqrt(B)

#let's take the relative difference 
#of the two histograms to have an impression of data vs MC

#first of all, clone the numerator
#as Add()/Divide() will overwrite the numerator
relativeDifferenceHistogram = dataHistogram.Clone("relativeDifference")
relativeDifferenceHistogram.SetTitle("Relative difference data/bkg")
#subtract using the Add() function
relativeDifferenceHistogram.Add(bkgHistogram, -1);
#normalise by MC using the Divide() function
relativeDifferenceHistogram.Divide(bkgHistogram);

##Optional Step 2: plot 
c=TCanvas()

#no need for stat box
gStyle.SetOptStat(0)

#we don't need to Draw() to get the axis this time...
relativeDifferenceHistogram.GetYaxis().SetTitle("(D-B)/B");
relativeDifferenceHistogram.GetYaxis().SetRangeUser(-2,2);
relativeDifferenceHistogram.GetXaxis().SetTitle("Some units");
relativeDifferenceHistogram.Draw("P")

c.SaveAs("RelativeDifferenceDataBkg.png")

######Step 3: Obtain D-B/sqrt(B)

#clone and reset histogram from data, so to keep only bins (not necessary...)
DMinusBOverSqrtBHistogram=dataHistogram.Clone("DMinusBOverSqrtB")
DMinusBOverSqrtBHistogram.Reset("ICE")
DMinusBOverSqrtBHistogram.SetTitle("(D-B)/#sqrt{B}")

#this time, we do it bin by bin
for iBin in xrange(1, dataHistogram.GetNbinsX()) :
  dataEntries = dataHistogram.GetBinContent(iBin)
  bkgEntries = bkgHistogram.GetBinContent(iBin)
  dMinusBOverSqrtB = (dataEntries-bkgEntries) / sqrt(bkgEntries)
  DMinusBOverSqrtBHistogram.SetBinContent(iBin, dMinusBOverSqrtB)

##Optional Step 3: plot 
c=TCanvas()

#no need for stat box
gStyle.SetOptStat(0)

#we don't need to Draw() to get the axis this time...
DMinusBOverSqrtBHistogram.GetYaxis().SetTitle("(D-B)/sqrt(B)");
DMinusBOverSqrtBHistogram.SetFillColor(kYellow)
#DMinusBOverSqrtBHistogram.GetYaxis().SetRangeUser(-2,2);
DMinusBOverSqrtBHistogram.GetXaxis().SetTitle("Some units");
DMinusBOverSqrtBHistogram.Draw()

c.SaveAs("DMinusBOverSqrtB.png")

######Step 4: Formatting 

#////**Changing titles and labels

#ratio->GetYaxis()->SetTitle("[Data-MC]/MC");
#ratio->GetXaxis()->SetTitle("fake units");

#//ROOT will have titles and labels of size proportional
#//to the size of the pad  
#//all units below are proportional to size of pad
#//(but I've been lazy and didn't use this information)
#stack->GetYaxis()->SetLabelSize(0.06); 
#stack->GetYaxis()->SetTitleSize(0.07); 
#//Needed to display non-overlapping axis labels
#stack->GetYaxis()->SetTitleOffset(0.7); 
#//   stack->SetMaximum(400); 
#//   stack->SetMinimum(-10); 

#ratio->GetXaxis()->SetLabelSize(0.15);
#ratio->GetYaxis()->SetLabelSize(0.15); 
#ratio->GetYaxis()->SetTitleSize(0.17); 
#//Needed to display non-overlapping axis labels
#ratio->GetYaxis()->SetTitleOffset(0.27); 
#ratio->GetYaxis()->SetRangeUser(-3.5,3.5); 
#ratio->GetXaxis()->SetTitleOffset(0.7); 
#ratio->GetXaxis()->SetTitleSize(0.2); 

#//get a smaller number of labels for lack of space
#//see: http://root.cern.ch/root/html/TAttAxis.html
#//n = n1 + 100*n2 + 10000*n3
#// n1 is the number of primary divisions,
#// n2 is the number of second order divisions and
#// n3 is the number of third order divisions.
#ratio->GetYaxis()->SetNdivisions(504); 

#////**Cleaning up

#//Clean up the canvas for the two pads
#cv->Clear();

#//remove all stat boxes/titles 
#//using the global variable gStyle
#gStyle->SetOptStat(0);
#gStyle->SetOptTitle(0);

#//snip: prepare two histograms to be displayed:
#//- stack (containing the fake data and fake MC)
#//- ratio (containing the ratio (data-MC)/MC)
#// and a TCanvas called cv

#////**Making the pads

#//Set the coordinates of the current pad
#//xLow, yLow, xHigh, yHigh 
#pad1 = new TPad("pad1","pad1",0.05,0.30,1,1);
#pad2 = new TPad("pad2","pad2",0.05,0.05,1,0.30);
#pad1->SetTopMargin(0.02);
#pad1->SetLogy();
#pad2->SetTopMargin(0.0);
#pad1->SetBottomMargin(0.0);
#pad2->SetBottomMargin(0.30);
#pad1->Draw();
#pad2->Draw();
#pad1->cd();

#//now draw the histograms
#stack.Draw("nostack");
#//Update() is used to make the 
#//canvas realise something has happened
#cv->Update();
#pad2->cd();
#ratio->Draw();
#cv->Update();

#////**Preparing the legend
#//draw it on the first pad
#pad1->cd();
#TLegend *l = new TLegend(0.4, 0.4, 0.6, 0.6);
#//let's make the legend background white
#l.SetFillColor(kWhite);
#//arguments: pointer to histogram, text, options: draw line (L)
#l.AddEntry(fakedata, "Fake data", "L");
#l.AddEntry(fakeMC, "Fake MC", "L");
#l.Draw("same");

######Step 5/6: Plotting significance 

#clone and reset histogram from data, so to keep only bins (not necessary...)
SignificanceHistogram=dataHistogram.Clone("Significance")
SignificanceHistogram.Reset("ICE")
SignificanceHistogram.SetTitle("Significance")

SignificanceHistogramPLessThan50=dataHistogram.Clone("SignificancePLessThan50")
SignificanceHistogramPLessThan50.Reset("ICE")
SignificanceHistogramPLessThan50.SetTitle("Significance, only bins with p-val < 0.5")

#bin by bin
for iBin in xrange(1, dataHistogram.GetNbinsX()) :
  dataEntries = dataHistogram.GetBinContent(iBin)
  bkgEntries = bkgHistogram.GetBinContent(iBin)
  pValue = GetPValPoisson(dataEntries, bkgEntries)
  zValue = GetZVal(pValue, dataEntries>bkgEntries)
  SignificanceHistogram.SetBinContent(iBin, zValue)
  if pValue > 0.5 :
    SignificanceHistogramPLessThan50.SetBinContent(iBin, 0)
  else :
    SignificanceHistogramPLessThan50.SetBinContent(iBin, zValue)

##Optional Step 3: plot 
c=TCanvas()

#no need for stat box
gStyle.SetOptStat(0)

#we don't need to Draw() to get the axis this time...
SignificanceHistogram.GetYaxis().SetTitle("Significance");
SignificanceHistogram.SetFillColor(kBlue)
#SignificanceHistogram.GetYaxis().SetRangeUser(-2,2);
SignificanceHistogram.GetXaxis().SetTitle("Some units");
SignificanceHistogram.Draw()

c.SaveAs("Significance.png")

c.Clear()
#we don't need to Draw() to get the axis this time...
SignificanceHistogramPLessThan50.SetFillColor(kRed)
SignificanceHistogramPLessThan50.GetYaxis().SetTitle("Significance");
#SignificanceHistogram.GetYaxis().SetRangeUser(-2,2);
SignificanceHistogramPLessThan50.GetXaxis().SetTitle("Some units");
SignificanceHistogramPLessThan50.Draw()

c.SaveAs("SignificancePLessThan50.png")

  #for (int i=1; i<=Nbins; ++i) { // SKIP UNDER- AND OVER-FLOWS
    #// if (hObs->GetBinContent(i)<=0) continue;

    #unsigned nObs = (int) hObs->GetBinContent(i);
    #float nExp = hExp->GetBinContent(i);
    #float vrnc = hExp->GetBinError(i);
    #vrnc *= vrnc; // variance
    #float zValue = 0;
    #float pValue = 1;
    #if (vrnc>0 && !neglectUncertainty) {
      #// account for systematic uncertainty
      #pValue = pValuePoissonError(nObs, nExp, vrnc);
    #} else {
      #// assume perfect knowledge of Poisson parameter
      #pValue = pValuePoisson(nObs,nExp);
    #}
    #zValue = pValueToSignificance(pValue, (nObs>nExp));
    #if (pValue<0.5) hOut->SetBinContent(i, zValue);
    #if (hPull) hPull->Fill(zValue);
  #}

  #return hOut;

#}