// @(#)root/roostats:$Id$
// Authors: Kevin Belasco 17/06/2009
// Authors: Kyle Cranmer 17/06/2009
/*************************************************************************
* Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. *
* All rights reserved. *
* *
* For the licensing terms see $ROOTSYS/LICENSE. *
* For the list of contributors see $ROOTSYS/README/CREDITS. *
*************************************************************************/
//_________________________________________________
/*
BEGIN_HTML
MCMCInterval is a concrete implementation of the RooStats::ConfInterval
interface. It takes as input Markov Chain of data points in the parameter
space generated by Monte Carlo using the Metropolis algorithm. From the Markov
Chain, the confidence interval can be determined in two ways:
Using a Kernel-Estimated PDF: (not the default method)
A RooNDKeysPdf is constructed from the data set using adaptive kernel width.
With this RooNDKeysPdf F, we then integrate over the most likely domain in the
parameter space (tallest points in the posterior RooNDKeysPdf) until the target
confidence level is reached within an acceptable neighborhood as defined by
SetEpsilon(). More specifically: we calculate the following for different
cutoff values C until we reach the target confidence level: \int_{ F >= C } F
d{normset}.
Important note: this is not the default method because of a bug in constructing
the RooNDKeysPdf from a weighted data set. Configure to use this method by
calling SetUseKeys(true), and the data set will be interpreted without weights.
Using a binned data set: (the default method)
This is the binned analog of the continuous integrative method that uses the
kernel-estimated PDF. The points in the Markov Chain are put into a binned
data set and the interval is then calculated by adding the heights of the bins
in decreasing order until the desired level of confidence has been reached.
Note that this means the actual confidence level is >= the confidence level
prescribed by the client (unless the user calls SetHistStrict(kFALSE)). This
method is the default but may not remain as such in future releases, so you may
wish to explicitly configure to use this method by calling SetUseKeys(false)
These are not the only ways for the confidence interval to be determined, and
other possibilities are being considered being added, especially for the
1-dimensional case.
One can ask an MCMCInterval for the lower and upper limits on a specific
parameter of interest in the interval. Note that this works better for some
distributions (ones with exactly one local maximum) than others, and sometimes
has little value.
END_HTML
*/
//_________________________________________________
#ifndef ROOT_Rtypes
#include "Rtypes.h"
#endif
#ifndef ROOT_TMath
#include "TMath.h"
#endif
#ifndef RooStats_MCMCInterval
#include "RooStats/MCMCInterval.h"
#endif
#ifndef ROOSTATS_MarkovChain
#include "RooStats/MarkovChain.h"
#endif
#ifndef RooStats_Heaviside
#include "RooStats/Heaviside.h"
#endif
#ifndef ROO_DATA_HIST
#include "RooDataHist.h"
#endif
#ifndef ROO_KEYS_PDF
#include "RooNDKeysPdf.h"
#endif
#ifndef ROO_PRODUCT
#include "RooProduct.h"
#endif
#ifndef RooStats_RooStatsUtils
#include "RooStats/RooStatsUtils.h"
#endif
#ifndef ROO_REAL_VAR
#include "RooRealVar.h"
#endif
#ifndef ROO_ARG_LIST
#include "RooArgList.h"
#endif
#ifndef ROOT_TIterator
#include "TIterator.h"
#endif
#ifndef ROOT_TH1
#include "TH1.h"
#endif
#ifndef ROOT_TH1F
#include "TH1F.h"
#endif
#ifndef ROOT_TH2F
#include "TH2F.h"
#endif
#ifndef ROOT_TH3F
#include "TH3F.h"
#endif
#ifndef ROO_MSG_SERVICE
#include "RooMsgService.h"
#endif
#ifndef ROO_GLOBAL_FUNC
#include "RooGlobalFunc.h"
#endif
#ifndef ROOT_TObject
#include "TObject.h"
#endif
#ifndef ROOT_THnSparse
#include "THnSparse.h"
#endif
#ifndef ROO_NUMBER
#include "RooNumber.h"
#endif
//#ifndef ROOT_TFile
//#include "TFile.h"
//#endif
#include
#include
#include
ClassImp(RooStats::MCMCInterval);
using namespace RooFit;
using namespace RooStats;
using namespace std;
static const Double_t DEFAULT_EPSILON = 0.01;
static const Double_t DEFAULT_DELTA = 10e-6;
MCMCInterval::MCMCInterval(const char* name)
: ConfInterval(name)
{
fConfidenceLevel = 0.0;
fHistConfLevel = 0.0;
fKeysConfLevel = 0.0;
fTFConfLevel = 0.0;
fFull = 0.0;
fChain = NULL;
fAxes = NULL;
fDataHist = NULL;
fSparseHist = NULL;
fVector.clear();
fKeysPdf = NULL;
fProduct = NULL;
fHeaviside = NULL;
fKeysDataHist = NULL;
fCutoffVar = NULL;
fHist = NULL;
fNumBurnInSteps = 0;
fHistCutoff = -1;
fKeysCutoff = -1;
fDimension = 1;
fUseKeys = kFALSE;
fUseSparseHist = kFALSE;
fIsHistStrict = kTRUE;
fEpsilon = DEFAULT_EPSILON;
fDelta = DEFAULT_DELTA;
fIntervalType = kShortest;
fTFLower = -1.0 * RooNumber::infinity();
fTFUpper = RooNumber::infinity();
fVecWeight = 0;
fLeftSideTF = -1;
}
MCMCInterval::MCMCInterval(const char* name,
const RooArgSet& parameters, MarkovChain& chain) : ConfInterval(name)
{
fNumBurnInSteps = 0;
fConfidenceLevel = 0.0;
fHistConfLevel = 0.0;
fKeysConfLevel = 0.0;
fTFConfLevel = 0.0;
fFull = 0.0;
fAxes = NULL;
fChain = &chain;
fDataHist = NULL;
fSparseHist = NULL;
fVector.clear();
fKeysPdf = NULL;
fProduct = NULL;
fHeaviside = NULL;
fKeysDataHist = NULL;
fCutoffVar = NULL;
fHist = NULL;
fHistCutoff = -1;
fKeysCutoff = -1;
fUseKeys = kFALSE;
fUseSparseHist = kFALSE;
fIsHistStrict = kTRUE;
fEpsilon = DEFAULT_EPSILON;
SetParameters(parameters);
fDelta = DEFAULT_DELTA;
fIntervalType = kShortest;
fTFLower = -1.0 * RooNumber::infinity();
fTFUpper = RooNumber::infinity();
fVecWeight = 0;
fLeftSideTF = -1;
}
MCMCInterval::~MCMCInterval()
{
// destructor
delete[] fAxes;
delete fHist;
delete fChain;
// kbelasco: check here for memory management errors
delete fDataHist;
delete fSparseHist;
delete fKeysPdf;
delete fProduct;
delete fHeaviside;
delete fKeysDataHist;
delete fCutoffVar;
}
struct CompareDataHistBins {
CompareDataHistBins(RooDataHist* hist) : fDataHist(hist) {}
bool operator() (Int_t bin1 , Int_t bin2) {
fDataHist->get(bin1);
Double_t n1 = fDataHist->weight();
fDataHist->get(bin2);
Double_t n2 = fDataHist->weight();
return (n1 < n2);
}
RooDataHist* fDataHist;
};
struct CompareSparseHistBins {
CompareSparseHistBins(THnSparse* hist) : fSparseHist(hist) {}
bool operator() (Long_t bin1, Long_t bin2) {
Double_t n1 = fSparseHist->GetBinContent(bin1);
Double_t n2 = fSparseHist->GetBinContent(bin2);
return (n1 < n2);
}
THnSparse* fSparseHist;
};
struct CompareVectorIndices {
CompareVectorIndices(MarkovChain* chain, RooRealVar* param) :
fChain(chain), fParam(param) {}
bool operator() (Int_t i, Int_t j) {
Double_t xi = fChain->Get(i)->getRealValue(fParam->GetName());
Double_t xj = fChain->Get(j)->getRealValue(fParam->GetName());
return (xi < xj);
}
MarkovChain* fChain;
RooRealVar* fParam;
};
// kbelasco: for this method, consider running DetermineInterval() if
// fKeysPdf==NULL, fSparseHist==NULL, fDataHist==NULL, or fVector.size()==0
// rather than just returning false. Though this should not be an issue
// because nobody should be able to get an MCMCInterval that has their interval
// or posterior representation NULL/empty since they should only get this
// through the MCMCCalculator
Bool_t MCMCInterval::IsInInterval(const RooArgSet& point) const
{
if (fIntervalType == kShortest) {
if (fUseKeys) {
if (fKeysPdf == NULL)
return false;
// evaluate keyspdf at point and return whether >= cutoff
RooStats::SetParameters(&point, const_cast(&fParameters));
return fKeysPdf->getVal(&fParameters) >= fKeysCutoff;
} else {
if (fUseSparseHist) {
if (fSparseHist == NULL)
return false;
// evalute sparse hist at bin where point lies and return
// whether >= cutoff
RooStats::SetParameters(&point,
const_cast(&fParameters));
Long_t bin;
// kbelasco: consider making x static
Double_t* x = new Double_t[fDimension];
for (Int_t i = 0; i < fDimension; i++)
x[i] = fAxes[i]->getVal();
bin = fSparseHist->GetBin(x, kFALSE);
Double_t weight = fSparseHist->GetBinContent((Long64_t)bin);
delete[] x;
return (weight >= (Double_t)fHistCutoff);
} else {
if (fDataHist == NULL)
return false;
// evaluate data hist at bin where point lies and return whether
// >= cutoff
Int_t bin;
bin = fDataHist->getIndex(point);
fDataHist->get(bin);
return (fDataHist->weight() >= (Double_t)fHistCutoff);
}
}
} else if (fIntervalType == kTailFraction) {
if (fVector.size() == 0)
return false;
// return whether value of point is within the range
Double_t x = point.getRealValue(fAxes[0]->GetName());
if (fTFLower <= x && x <= fTFUpper)
return true;
return false;
}
coutE(InputArguments) << "Error in MCMCInterval::IsInInterval: "
<< "Interval type not set. Returning false." << endl;
return false;
}
void MCMCInterval::SetConfidenceLevel(Double_t cl)
{
fConfidenceLevel = cl;
DetermineInterval();
}
// kbelasco: update this or just take it out
// kbelasco: consider keeping this around but changing the implementation
// to set the number of bins for each RooRealVar and then reacreating the
// histograms
//void MCMCInterval::SetNumBins(Int_t numBins)
//{
// if (numBins > 0) {
// fPreferredNumBins = numBins;
// for (Int_t d = 0; d < fDimension; d++)
// fNumBins[d] = numBins;
// }
// else {
// coutE(Eval) << "* Error in MCMCInterval::SetNumBins: " <<
// "Negative number of bins given: " << numBins << endl;
// return;
// }
//
// // If the histogram already exists, recreate it with the new bin numbers
// if (fHist != NULL)
// CreateHist();
//}
void MCMCInterval::SetAxes(RooArgList& axes)
{
Int_t size = axes.getSize();
if (size != fDimension) {
coutE(InputArguments) << "* Error in MCMCInterval::SetAxes: " <<
"number of variables in axes (" << size <<
") doesn't match number of parameters ("
<< fDimension << ")" << endl;
return;
}
for (Int_t i = 0; i < size; i++)
fAxes[i] = (RooRealVar*)axes.at(i);
}
void MCMCInterval::CreateKeysPdf()
{
// kbelasco: check here for memory leak. does RooNDKeysPdf use
// the RooArgList passed to it or does it make a clone?
// also check for memory leak from chain, does RooNDKeysPdf clone that?
if (fAxes == NULL || fParameters.getSize() == 0) {
coutE(InputArguments) << "Error in MCMCInterval::CreateKeysPdf: "
<< "parameters have not been set." << endl;
return;
}
if (fNumBurnInSteps >= fChain->Size()) {
coutE(InputArguments) <<
"MCMCInterval::CreateKeysPdf: creation of Keys PDF failed: " <<
"Number of burn-in steps (num steps to ignore) >= number of steps " <<
"in Markov chain." << endl;
delete fKeysPdf;
delete fCutoffVar;
delete fHeaviside;
delete fProduct;
fKeysPdf = NULL;
fCutoffVar = NULL;
fHeaviside = NULL;
fProduct = NULL;
return;
}
RooDataSet* chain = fChain->GetAsDataSet(SelectVars(fParameters),
EventRange(fNumBurnInSteps, fChain->Size()));
RooArgList* paramsList = new RooArgList();
for (Int_t i = 0; i < fDimension; i++)
paramsList->add(*fAxes[i]);
// kbelasco: check for memory leaks with chain. who owns it? does
// RooNDKeysPdf take ownership?
fKeysPdf = new RooNDKeysPdf("keysPDF", "Keys PDF", *paramsList, *chain, "a");
fCutoffVar = new RooRealVar("cutoff", "cutoff", 0);
fHeaviside = new Heaviside("heaviside", "Heaviside", *fKeysPdf, *fCutoffVar);
fProduct = new RooProduct("product", "Keys PDF & Heaviside Product",
RooArgSet(*fKeysPdf, *fHeaviside));
}
void MCMCInterval::CreateHist()
{
if (fAxes == NULL || fChain == NULL) {
coutE(Eval) << "* Error in MCMCInterval::CreateHist(): " <<
"Crucial data member was NULL." << endl;
coutE(Eval) << "Make sure to fully construct/initialize." << endl;
return;
}
if (fHist != NULL)
delete fHist;
if (fNumBurnInSteps >= fChain->Size()) {
coutE(InputArguments) <<
"MCMCInterval::CreateHist: creation of histogram failed: " <<
"Number of burn-in steps (num steps to ignore) >= number of steps " <<
"in Markov chain." << endl;
fHist = NULL;
return;
}
if (fDimension == 1)
fHist = new TH1F("posterior", "MCMC Posterior Histogram",
fAxes[0]->numBins(), fAxes[0]->getMin(), fAxes[0]->getMax());
else if (fDimension == 2)
fHist = new TH2F("posterior", "MCMC Posterior Histogram",
fAxes[0]->numBins(), fAxes[0]->getMin(), fAxes[0]->getMax(),
fAxes[1]->numBins(), fAxes[1]->getMin(), fAxes[1]->getMax());
else if (fDimension == 3)
fHist = new TH3F("posterior", "MCMC Posterior Histogram",
fAxes[0]->numBins(), fAxes[0]->getMin(), fAxes[0]->getMax(),
fAxes[1]->numBins(), fAxes[1]->getMin(), fAxes[1]->getMax(),
fAxes[2]->numBins(), fAxes[2]->getMin(), fAxes[2]->getMax());
else {
coutE(Eval) << "* Error in MCMCInterval::CreateHist() : " <<
"TH1* couldn't handle dimension: " << fDimension << endl;
return;
}
// Fill histogram
Int_t size = fChain->Size();
const RooArgSet* entry;
for (Int_t i = fNumBurnInSteps; i < size; i++) {
entry = fChain->Get(i);
if (fDimension == 1)
((TH1F*)fHist)->Fill(entry->getRealValue(fAxes[0]->GetName()),
fChain->Weight());
else if (fDimension == 2)
((TH2F*)fHist)->Fill(entry->getRealValue(fAxes[0]->GetName()),
entry->getRealValue(fAxes[1]->GetName()),
fChain->Weight());
else
((TH3F*)fHist)->Fill(entry->getRealValue(fAxes[0]->GetName()),
entry->getRealValue(fAxes[1]->GetName()),
entry->getRealValue(fAxes[2]->GetName()),
fChain->Weight());
}
if (fDimension >= 1)
fHist->GetXaxis()->SetTitle(fAxes[0]->GetName());
if (fDimension >= 2)
fHist->GetYaxis()->SetTitle(fAxes[1]->GetName());
if (fDimension >= 3)
fHist->GetZaxis()->SetTitle(fAxes[2]->GetName());
}
void MCMCInterval::CreateSparseHist()
{
if (fAxes == NULL || fChain == NULL) {
coutE(InputArguments) << "* Error in MCMCInterval::CreateSparseHist(): "
<< "Crucial data member was NULL." << endl;
coutE(InputArguments) << "Make sure to fully construct/initialize."
<< endl;
return;
}
if (fSparseHist != NULL)
delete fSparseHist;
Double_t* min = new Double_t[fDimension];
Double_t* max = new Double_t[fDimension];
Int_t* bins = new Int_t[fDimension];
for (Int_t i = 0; i < fDimension; i++) {
min[i] = fAxes[i]->getMin();
max[i] = fAxes[i]->getMax();
bins[i] = fAxes[i]->numBins();
}
fSparseHist = new THnSparseF("posterior", "MCMC Posterior Histogram",
fDimension, bins, min, max);
delete[] min;
delete[] max;
delete[] bins;
// kbelasco: it appears we need to call Sumw2() just to get the
// histogram to keep a running total of the weight so that Getsumw doesn't
// just return 0
fSparseHist->Sumw2();
if (fNumBurnInSteps >= fChain->Size()) {
coutE(InputArguments) <<
"MCMCInterval::CreateSparseHist: creation of histogram failed: " <<
"Number of burn-in steps (num steps to ignore) >= number of steps " <<
"in Markov chain." << endl;
}
// Fill histogram
Int_t size = fChain->Size();
const RooArgSet* entry;
Double_t* x = new Double_t[fDimension];
for (Int_t i = fNumBurnInSteps; i < size; i++) {
entry = fChain->Get(i);
for (Int_t ii = 0; ii < fDimension; ii++)
x[ii] = entry->getRealValue(fAxes[ii]->GetName());
fSparseHist->Fill(x, fChain->Weight());
}
delete[] x;
}
void MCMCInterval::CreateDataHist()
{
if (fParameters.getSize() == 0 || fChain == NULL) {
coutE(Eval) << "* Error in MCMCInterval::CreateDataHist(): " <<
"Crucial data member was NULL or empty." << endl;
coutE(Eval) << "Make sure to fully construct/initialize." << endl;
return;
}
if (fNumBurnInSteps >= fChain->Size()) {
coutE(InputArguments) <<
"MCMCInterval::CreateDataHist: creation of histogram failed: " <<
"Number of burn-in steps (num steps to ignore) >= number of steps " <<
"in Markov chain." << endl;
fDataHist = NULL;
return;
}
fDataHist = fChain->GetAsDataHist(SelectVars(fParameters),
EventRange(fNumBurnInSteps, fChain->Size()));
}
void MCMCInterval::CreateVector(RooRealVar* param)
{
fVector.clear();
fVecWeight = 0;
if (fChain == NULL) {
coutE(InputArguments) << "* Error in MCMCInterval::CreateVector(): " <<
"Crucial data member (Markov chain) was NULL." << endl;
coutE(InputArguments) << "Make sure to fully construct/initialize."
<< endl;
return;
}
if (fNumBurnInSteps >= fChain->Size()) {
coutE(InputArguments) <<
"MCMCInterval::CreateVector: creation of vector failed: " <<
"Number of burn-in steps (num steps to ignore) >= number of steps " <<
"in Markov chain." << endl;
}
// Fill vector
Int_t size = fChain->Size() - fNumBurnInSteps;
fVector.resize(size);
Int_t i;
Int_t chainIndex;
for (i = 0; i < size; i++) {
chainIndex = i + fNumBurnInSteps;
fVector[i] = chainIndex;
fVecWeight += fChain->Weight(chainIndex);
}
stable_sort(fVector.begin(), fVector.end(),
CompareVectorIndices(fChain, param));
}
void MCMCInterval::SetParameters(const RooArgSet& parameters)
{
fParameters.removeAll();
fParameters.add(parameters);
fDimension = fParameters.getSize();
if (fAxes != NULL)
delete[] fAxes;
fAxes = new RooRealVar*[fDimension];
TIterator* it = fParameters.createIterator();
Int_t n = 0;
TObject* obj;
while ((obj = it->Next()) != NULL) {
if (dynamic_cast(obj) != NULL)
fAxes[n] = (RooRealVar*)obj;
else
coutE(Eval) << "* Error in MCMCInterval::SetParameters: " <<
obj->GetName() << " not a RooRealVar*" << std::endl;
n++;
}
delete it;
}
void MCMCInterval::DetermineInterval()
{
switch (fIntervalType) {
case kShortest:
DetermineShortestInterval();
break;
case kTailFraction:
DetermineTailFractionInterval();
break;
default:
coutE(InputArguments) << "MCMCInterval::DetermineInterval(): " <<
"Error: Interval type not set" << endl;
break;
}
}
void MCMCInterval::DetermineShortestInterval()
{
if (fUseKeys)
DetermineByKeys();
else
DetermineByHist();
}
void MCMCInterval::DetermineTailFractionInterval()
{
if (fLeftSideTF < 0 || fLeftSideTF > 1) {
coutE(InputArguments) << "MCMCInterval::DetermineTailFractionInterval: "
<< "Fraction must be in the range [0, 1]. "
<< fLeftSideTF << "is not allowed." << endl;
return;
}
if (fDimension != 1) {
coutE(InputArguments) << "MCMCInterval::DetermineTailFractionInterval(): "
<< "Error: Can only find a tail-fraction interval for 1-D intervals"
<< endl;
return;
}
if (fAxes == NULL) {
coutE(InputArguments) << "MCMCInterval::DetermineTailFractionInterval(): "
<< "Crucial data member was NULL." << endl;
coutE(InputArguments) << "Make sure to fully construct/initialize."
<< endl;
return;
}
// kbelasco: fill in code here to find interval
//
// also make changes so that calling GetPosterior...() returns NULL
// when fIntervalType == kTailFraction, since there really
// is no posterior for this type of interval determination
if (fVector.size() == 0)
CreateVector(fAxes[0]);
if (fVector.size() == 0 || fVecWeight == 0) {
// if size is still 0, then creation failed.
// if fVecWeight == 0, then there are no entries (indicates the same
// error as fVector.size() == 0 because that only happens when
// fNumBurnInSteps >= fChain->Size())
// either way, reset and return
fVector.clear();
fTFLower = -1.0 * RooNumber::infinity();
fTFUpper = RooNumber::infinity();
fTFConfLevel = 0.0;
fVecWeight = 0;
return;
}
RooRealVar* param = fAxes[0];
Double_t c = fConfidenceLevel;
Double_t leftTailCutoff = fVecWeight * (1 - c) * fLeftSideTF;
Double_t rightTailCutoff = fVecWeight * (1 - c) * (1 - fLeftSideTF);
Double_t leftTailSum = 0;
Double_t rightTailSum = 0;
// kbelasco: consider changing these values to +infinty and -infinity
Double_t ll = param->getMin();
Double_t ul = param->getMax();
Double_t x;
Double_t w;
// save a lot of GetName() calls if compiler does not already optimize this
const char* name = param->GetName();
// find lower limit
Int_t i;
for (i = 0; i < (Int_t)fVector.size(); i++) {
x = fChain->Get(fVector[i])->getRealValue(name);
w = fChain->Weight();
if (TMath::Abs(leftTailSum + w - leftTailCutoff) <
TMath::Abs(leftTailSum - leftTailCutoff)) {
// moving the lower limit to x would bring us closer to the desired
// left tail size
ll = x;
leftTailSum += w;
} else
break;
}
// find upper limit
for (i = (Int_t)fVector.size() - 1; i >= 0; i--) {
x = fChain->Get(fVector[i])->getRealValue(name);
w = fChain->Weight();
if (TMath::Abs(rightTailSum + w - rightTailCutoff) <
TMath::Abs(rightTailSum - rightTailCutoff)) {
// moving the lower limit to x would bring us closer to the desired
// left tail size
ul = x;
rightTailSum += w;
} else
break;
}
fTFLower = ll;
fTFUpper = ul;
fTFConfLevel = 1 - (leftTailSum + rightTailSum) / fVecWeight;
}
void MCMCInterval::DetermineByKeys()
{
if (fKeysPdf == NULL)
CreateKeysPdf();
if (fKeysPdf == NULL) {
// if fKeysPdf is still NULL, then it means CreateKeysPdf failed
// so clear all the data members this function would normally determine
// and return
fFull = 0.0;
fKeysCutoff = -1;
fKeysConfLevel = 0.0;
return;
}
// now we have a keys pdf of the posterior
Double_t cutoff = 0.0;
fCutoffVar->setVal(cutoff);
RooAbsReal* integral = fProduct->createIntegral(fParameters, NormSet(fParameters));
Double_t full = integral->getVal(fParameters);
fFull = full;
delete integral;
if (full < 0.98) {
coutW(Eval) << "Warning: Integral of Keys PDF came out to " << full
<< " intead of expected value 1. Will continue using this "
<< "factor to normalize further integrals of this PDF." << endl;
}
// kbelasco: Is there a better way to set the search range?
// from 0 to max value of Keys
// kbelasco: how to get max value?
//Double_t max = product.maxVal(product.getMaxVal(fParameters));
Double_t volume = 1.0;
TIterator* it = fParameters.createIterator();
RooRealVar* var;
while ((var = (RooRealVar*)it->Next()) != NULL)
volume *= (var->getMax() - var->getMin());
delete it;
Double_t topCutoff = full / volume;
Double_t bottomCutoff = topCutoff;
Double_t confLevel = CalcConfLevel(topCutoff, full);
if (AcceptableConfLevel(confLevel)) {
fKeysConfLevel = confLevel;
fKeysCutoff = topCutoff;
return;
}
Bool_t changed = kFALSE;
// find high end of range
while (confLevel > fConfidenceLevel) {
topCutoff *= 2.0;
confLevel = CalcConfLevel(topCutoff, full);
if (AcceptableConfLevel(confLevel)) {
fKeysConfLevel = confLevel;
fKeysCutoff = topCutoff;
return;
}
changed = kTRUE;
}
if (changed) {
bottomCutoff = topCutoff / 2.0;
} else {
changed = kFALSE;
bottomCutoff /= 2.0;
confLevel = CalcConfLevel(bottomCutoff, full);
if (AcceptableConfLevel(confLevel)) {
fKeysConfLevel = confLevel;
fKeysCutoff = bottomCutoff;
return;
}
while (confLevel < fConfidenceLevel) {
bottomCutoff /= 2.0;
confLevel = CalcConfLevel(bottomCutoff, full);
if (AcceptableConfLevel(confLevel)) {
fKeysConfLevel = confLevel;
fKeysCutoff = bottomCutoff;
return;
}
changed = kTRUE;
}
if (changed) {
topCutoff = bottomCutoff * 2.0;
}
}
coutI(Eval) << "range set: [" << bottomCutoff << ", " << topCutoff << "]"
<< endl;
cutoff = (topCutoff + bottomCutoff) / 2.0;
confLevel = CalcConfLevel(cutoff, full);
// need to use WithinDeltaFraction() because sometimes the integrating the
// posterior in this binary search seems to not have enough granularity to
// find an acceptable conf level (small no. of strange cases).
// WithinDeltaFraction causes the search to terminate when
// topCutoff is essentially equal to bottomCutoff (compared to the magnitude
// of their mean).
while (!AcceptableConfLevel(confLevel) &&
!WithinDeltaFraction(topCutoff, bottomCutoff)) {
if (confLevel > fConfidenceLevel)
bottomCutoff = cutoff;
else if (confLevel < fConfidenceLevel)
topCutoff = cutoff;
cutoff = (topCutoff + bottomCutoff) / 2.0;
coutI(Eval) << "cutoff range: [" << bottomCutoff << ", "
<< topCutoff << "]" << endl;
confLevel = CalcConfLevel(cutoff, full);
}
fKeysCutoff = cutoff;
fKeysConfLevel = confLevel;
}
void MCMCInterval::DetermineByHist()
{
if (fUseSparseHist)
DetermineBySparseHist();
else
DetermineByDataHist();
}
void MCMCInterval::DetermineBySparseHist()
{
Long_t numBins;
if (fSparseHist == NULL)
CreateSparseHist();
if (fSparseHist == NULL) {
// if fSparseHist is still NULL, then CreateSparseHist failed
fHistCutoff = -1;
fHistConfLevel = 0.0;
return;
}
numBins = (Long_t)fSparseHist->GetNbins();
std::vector bins(numBins);
for (Int_t ibin = 0; ibin < numBins; ibin++)
bins[ibin] = (Long_t)ibin;
std::stable_sort(bins.begin(), bins.end(), CompareSparseHistBins(fSparseHist));
Double_t nEntries = fSparseHist->GetSumw();
Double_t sum = 0;
Double_t content;
Int_t i;
// see above note on indexing to understand numBins - 3
for (i = numBins - 1; i >= 0; i--) {
content = fSparseHist->GetBinContent(bins[i]);
if ((sum + content) / nEntries >= fConfidenceLevel) {
fHistCutoff = content;
if (fIsHistStrict) {
sum += content;
i--;
break;
} else {
i++;
break;
}
}
sum += content;
}
if (fIsHistStrict) {
// keep going to find the sum
for ( ; i >= 0; i--) {
content = fSparseHist->GetBinContent(bins[i]);
if (content == fHistCutoff)
sum += content;
else
break; // content must be < fHistCutoff
}
} else {
// backtrack to find the cutoff and sum
for ( ; i < numBins; i++) {
content = fSparseHist->GetBinContent(bins[i]);
if (content > fHistCutoff) {
fHistCutoff = content;
break;
} else // content == fHistCutoff
sum -= content;
if (i == numBins - 1)
// still haven't set fHistCutoff correctly yet, and we have no bins
// left, so set fHistCutoff to something higher than the tallest bin
fHistCutoff = content + 1.0;
}
}
fHistConfLevel = sum / nEntries;
}
void MCMCInterval::DetermineByDataHist()
{
Int_t numBins;
if (fDataHist == NULL)
CreateDataHist();
if (fDataHist == NULL) {
// if fDataHist is still NULL, then CreateDataHist failed
fHistCutoff = -1;
fHistConfLevel = 0.0;
return;
}
numBins = fDataHist->numEntries();
std::vector bins(numBins);
for (Int_t ibin = 0; ibin < numBins; ibin++)
bins[ibin] = ibin;
std::stable_sort(bins.begin(), bins.end(), CompareDataHistBins(fDataHist));
Double_t nEntries = fDataHist->sum(kFALSE);
Double_t sum = 0;
Double_t content;
Int_t i;
for (i = numBins - 1; i >= 0; i--) {
fDataHist->get(bins[i]);
content = fDataHist->weight();
if ((sum + content) / nEntries >= fConfidenceLevel) {
fHistCutoff = content;
if (fIsHistStrict) {
sum += content;
i--;
break;
} else {
i++;
break;
}
}
sum += content;
}
if (fIsHistStrict) {
// keep going to find the sum
for ( ; i >= 0; i--) {
fDataHist->get(bins[i]);
content = fDataHist->weight();
if (content == fHistCutoff)
sum += content;
else
break; // content must be < fHistCutoff
}
} else {
// backtrack to find the cutoff and sum
for ( ; i < numBins; i++) {
fDataHist->get(bins[i]);
content = fDataHist->weight();
if (content > fHistCutoff) {
fHistCutoff = content;
break;
} else // content == fHistCutoff
sum -= content;
if (i == numBins - 1)
// still haven't set fHistCutoff correctly yet, and we have no bins
// left, so set fHistCutoff to something higher than the tallest bin
fHistCutoff = content + 1.0;
}
}
fHistConfLevel = sum / nEntries;
}
Double_t MCMCInterval::GetActualConfidenceLevel()
{
if (fIntervalType == kShortest) {
if (fUseKeys)
return fKeysConfLevel;
else
return fHistConfLevel;
} else if (fIntervalType == kTailFraction) {
return fTFConfLevel;
} else {
coutE(InputArguments) << "MCMCInterval::GetActualConfidenceLevel: "
<< "not implemented for this type of interval. Returning 0." << endl;
return 0;
}
}
Double_t MCMCInterval::LowerLimit(RooRealVar& param)
{
switch (fIntervalType) {
case kShortest:
return LowerLimitShortest(param);
case kTailFraction:
return LowerLimitTailFraction(param);
default:
coutE(InputArguments) << "MCMCInterval::LowerLimit(): " <<
"Error: Interval type not set" << endl;
return RooNumber::infinity();
}
}
Double_t MCMCInterval::UpperLimit(RooRealVar& param)
{
switch (fIntervalType) {
case kShortest:
return UpperLimitShortest(param);
case kTailFraction:
return UpperLimitTailFraction(param);
default:
coutE(InputArguments) << "MCMCInterval::UpperLimit(): " <<
"Error: Interval type not set" << endl;
return RooNumber::infinity();
}
}
Double_t MCMCInterval::LowerLimitTailFraction(RooRealVar& /*param*/)
{
if (fTFLower == -1.0 * RooNumber::infinity())
DetermineTailFractionInterval();
return fTFLower;
}
Double_t MCMCInterval::UpperLimitTailFraction(RooRealVar& /*param*/)
{
if (fTFUpper == RooNumber::infinity())
DetermineTailFractionInterval();
return fTFUpper;
}
Double_t MCMCInterval::LowerLimitShortest(RooRealVar& param)
{
if (fUseKeys)
return LowerLimitByKeys(param);
else
return LowerLimitByHist(param);
}
Double_t MCMCInterval::UpperLimitShortest(RooRealVar& param)
{
if (fUseKeys)
return UpperLimitByKeys(param);
else
return UpperLimitByHist(param);
}
// Determine the lower limit for param on this interval
// using the binned data set
Double_t MCMCInterval::LowerLimitByHist(RooRealVar& param)
{
if (fUseSparseHist)
return LowerLimitBySparseHist(param);
else
return LowerLimitByDataHist(param);
}
// Determine the upper limit for param on this interval
// using the binned data set
Double_t MCMCInterval::UpperLimitByHist(RooRealVar& param)
{
if (fUseSparseHist)
return UpperLimitBySparseHist(param);
else
return UpperLimitByDataHist(param);
}
// Determine the lower limit for param on this interval
// using the binned data set
Double_t MCMCInterval::LowerLimitBySparseHist(RooRealVar& param)
{
if (fDimension != 1) {
coutE(InputArguments) << "In MCMCInterval::LowerLimitBySparseHist: "
<< "Sorry, will not compute lower limit unless dimension == 1" << endl;
return param.getMin();
}
if (fHistCutoff < 0)
DetermineBySparseHist(); // this initializes fSparseHist
if (fHistCutoff < 0) {
// if fHistCutoff < 0 still, then determination of interval failed
coutE(Eval) << "In MCMCInterval::LowerLimitBySparseHist: "
<< "couldn't determine cutoff. Check that num burn in steps < num "
<< "steps in the Markov chain. Returning param.getMin()." << endl;
return param.getMin();
}
std::vector coord(fDimension);
for (Int_t d = 0; d < fDimension; d++) {
if (strcmp(fAxes[d]->GetName(), param.GetName()) == 0) {
Long_t numBins = (Long_t)fSparseHist->GetNbins();
Double_t lowerLimit = param.getMax();
Double_t val;
for (Long_t i = 0; i < numBins; i++) {
if (fSparseHist->GetBinContent(i, &coord[0]) >= fHistCutoff) {
val = fSparseHist->GetAxis(d)->GetBinCenter(coord[d]);
if (val < lowerLimit)
lowerLimit = val;
}
}
return lowerLimit;
}
}
return param.getMin();
}
// Determine the lower limit for param on this interval
// using the binned data set
Double_t MCMCInterval::LowerLimitByDataHist(RooRealVar& param)
{
if (fHistCutoff < 0)
DetermineByDataHist(); // this initializes fDataHist
if (fHistCutoff < 0) {
// if fHistCutoff < 0 still, then determination of interval failed
coutE(Eval) << "In MCMCInterval::LowerLimitByDataHist: "
<< "couldn't determine cutoff. Check that num burn in steps < num "
<< "steps in the Markov chain. Returning param.getMin()." << endl;
return param.getMin();
}
for (Int_t d = 0; d < fDimension; d++) {
if (strcmp(fAxes[d]->GetName(), param.GetName()) == 0) {
Int_t numBins = fDataHist->numEntries();
Double_t lowerLimit = param.getMax();
Double_t val;
for (Int_t i = 0; i < numBins; i++) {
fDataHist->get(i);
if (fDataHist->weight() >= fHistCutoff) {
val = fDataHist->get()->getRealValue(param.GetName());
if (val < lowerLimit)
lowerLimit = val;
}
}
return lowerLimit;
}
}
return param.getMin();
}
// Determine the upper limit for param on this interval
// using the binned data set
Double_t MCMCInterval::UpperLimitBySparseHist(RooRealVar& param)
{
if (fDimension != 1) {
coutE(InputArguments) << "In MCMCInterval::UpperLimitBySparseHist: "
<< "Sorry, will not compute upper limit unless dimension == 1" << endl;
return param.getMax();
}
if (fHistCutoff < 0)
DetermineBySparseHist(); // this initializes fSparseHist
if (fHistCutoff < 0) {
// if fHistCutoff < 0 still, then determination of interval failed
coutE(Eval) << "In MCMCInterval::UpperLimitBySparseHist: "
<< "couldn't determine cutoff. Check that num burn in steps < num "
<< "steps in the Markov chain. Returning param.getMax()." << endl;
return param.getMax();
}
std::vector coord(fDimension);
for (Int_t d = 0; d < fDimension; d++) {
if (strcmp(fAxes[d]->GetName(), param.GetName()) == 0) {
Long_t numBins = (Long_t)fSparseHist->GetNbins();
Double_t upperLimit = param.getMin();
Double_t val;
for (Long_t i = 0; i < numBins; i++) {
if (fSparseHist->GetBinContent(i, &coord[0]) >= fHistCutoff) {
val = fSparseHist->GetAxis(d)->GetBinCenter(coord[d]);
if (val > upperLimit)
upperLimit = val;
}
}
return upperLimit;
}
}
return param.getMax();
}
// Determine the upper limit for param on this interval
// using the binned data set
Double_t MCMCInterval::UpperLimitByDataHist(RooRealVar& param)
{
if (fHistCutoff < 0)
DetermineByDataHist(); // this initializes fDataHist
if (fHistCutoff < 0) {
// if fHistCutoff < 0 still, then determination of interval failed
coutE(Eval) << "In MCMCInterval::UpperLimitByDataHist: "
<< "couldn't determine cutoff. Check that num burn in steps < num "
<< "steps in the Markov chain. Returning param.getMax()." << endl;
return param.getMax();
}
for (Int_t d = 0; d < fDimension; d++) {
if (strcmp(fAxes[d]->GetName(), param.GetName()) == 0) {
Int_t numBins = fDataHist->numEntries();
Double_t upperLimit = param.getMin();
Double_t val;
for (Int_t i = 0; i < numBins; i++) {
fDataHist->get(i);
if (fDataHist->weight() >= fHistCutoff) {
val = fDataHist->get()->getRealValue(param.GetName());
if (val > upperLimit)
upperLimit = val;
}
}
return upperLimit;
}
}
return param.getMax();
}
// Determine the lower limit for param on this interval
// using the keys pdf
Double_t MCMCInterval::LowerLimitByKeys(RooRealVar& param)
{
if (fKeysCutoff < 0)
DetermineByKeys();
if (fKeysDataHist == NULL)
CreateKeysDataHist();
if (fKeysCutoff < 0 || fKeysDataHist == NULL) {
// failure in determination of cutoff and/or creation of histogram
coutE(Eval) << "in MCMCInterval::LowerLimitByKeys(): "
<< "couldn't find lower limit, check that the number of burn in "
<< "steps < number of total steps in the Markov chain. Returning "
<< "param.getMin()" << endl;
return param.getMin();
}
for (Int_t d = 0; d < fDimension; d++) {
if (strcmp(fAxes[d]->GetName(), param.GetName()) == 0) {
Int_t numBins = fKeysDataHist->numEntries();
Double_t lowerLimit = param.getMax();
Double_t val;
for (Int_t i = 0; i < numBins; i++) {
fKeysDataHist->get(i);
if (fKeysDataHist->weight() >= fKeysCutoff) {
val = fKeysDataHist->get()->getRealValue(param.GetName());
if (val < lowerLimit)
lowerLimit = val;
}
}
return lowerLimit;
}
}
return param.getMin();
}
// Determine the upper limit for param on this interval
// using the keys pdf
Double_t MCMCInterval::UpperLimitByKeys(RooRealVar& param)
{
if (fKeysCutoff < 0)
DetermineByKeys();
if (fKeysDataHist == NULL)
CreateKeysDataHist();
if (fKeysCutoff < 0 || fKeysDataHist == NULL) {
// failure in determination of cutoff and/or creation of histogram
coutE(Eval) << "in MCMCInterval::UpperLimitByKeys(): "
<< "couldn't find upper limit, check that the number of burn in "
<< "steps < number of total steps in the Markov chain. Returning "
<< "param.getMax()" << endl;
return param.getMax();
}
for (Int_t d = 0; d < fDimension; d++) {
if (strcmp(fAxes[d]->GetName(), param.GetName()) == 0) {
Int_t numBins = fKeysDataHist->numEntries();
Double_t upperLimit = param.getMin();
Double_t val;
for (Int_t i = 0; i < numBins; i++) {
fKeysDataHist->get(i);
if (fKeysDataHist->weight() >= fKeysCutoff) {
val = fKeysDataHist->get()->getRealValue(param.GetName());
if (val > upperLimit)
upperLimit = val;
}
}
return upperLimit;
}
}
return param.getMax();
}
// Determine the approximate maximum value of the Keys PDF
Double_t MCMCInterval::GetKeysMax()
{
if (fKeysCutoff < 0)
DetermineByKeys();
if (fKeysDataHist == NULL)
CreateKeysDataHist();
if (fKeysDataHist == NULL) {
// failure in determination of cutoff and/or creation of histogram
coutE(Eval) << "in MCMCInterval::KeysMax(): "
<< "couldn't find Keys max value, check that the number of burn in "
<< "steps < number of total steps in the Markov chain. Returning 0"
<< endl;
return 0;
}
Int_t numBins = fKeysDataHist->numEntries();
Double_t max = 0;
Double_t w;
for (Int_t i = 0; i < numBins; i++) {
fKeysDataHist->get(i);
w = fKeysDataHist->weight();
if (w > max)
max = w;
}
return max;
}
Double_t MCMCInterval::GetHistCutoff()
{
if (fHistCutoff < 0)
DetermineByHist();
return fHistCutoff;
}
Double_t MCMCInterval::GetKeysPdfCutoff()
{
if (fKeysCutoff < 0)
DetermineByKeys();
// kbelasco: if fFull hasn't been set (because Keys creation failed because
// fNumBurnInSteps >= fChain->Size()) then this will return infinity, which
// seems ok to me since it will indicate error
return fKeysCutoff / fFull;
}
Double_t MCMCInterval::CalcConfLevel(Double_t cutoff, Double_t full)
{
RooAbsReal* integral;
Double_t confLevel;
fCutoffVar->setVal(cutoff);
integral = fProduct->createIntegral(fParameters, NormSet(fParameters));
confLevel = integral->getVal(fParameters) / full;
coutI(Eval) << "cutoff = " << cutoff << ", conf = " << confLevel << endl;
//cout << "tmp: cutoff = " << cutoff << ", conf = " << confLevel << endl;
delete integral;
return confLevel;
}
TH1* MCMCInterval::GetPosteriorHist()
{
if(fConfidenceLevel == 0)
coutE(InputArguments) << "Error in MCMCInterval::GetPosteriorHist: "
<< "confidence level not set " << endl;
if (fHist == NULL)
CreateHist();
if (fHist == NULL)
// if fHist is still NULL, then CreateHist failed
return NULL;
return (TH1*) fHist->Clone("MCMCposterior_hist");
}
RooNDKeysPdf* MCMCInterval::GetPosteriorKeysPdf()
{
if (fConfidenceLevel == 0)
coutE(InputArguments) << "Error in MCMCInterval::GetPosteriorKeysPdf: "
<< "confidence level not set " << endl;
if (fKeysPdf == NULL)
CreateKeysPdf();
if (fKeysPdf == NULL)
// if fKeysPdf is still NULL, then it means CreateKeysPdf failed
return NULL;
return (RooNDKeysPdf*) fKeysPdf->Clone("MCMCPosterior_keys");
}
RooProduct* MCMCInterval::GetPosteriorKeysProduct()
{
if (fConfidenceLevel == 0)
coutE(InputArguments) << "MCMCInterval::GetPosteriorKeysProduct: "
<< "confidence level not set " << endl;
if (fProduct == NULL) {
CreateKeysPdf();
DetermineByKeys();
}
if (fProduct == NULL)
// if fProduct is still NULL, then it means CreateKeysPdf failed
return NULL;
return (RooProduct*) fProduct->Clone("MCMCPosterior_keysproduct");
}
RooArgSet* MCMCInterval::GetParameters() const
{
// returns list of parameters
return new RooArgSet(fParameters);
}
Bool_t MCMCInterval::AcceptableConfLevel(Double_t confLevel)
{
return (TMath::Abs(confLevel - fConfidenceLevel) < fEpsilon);
}
Bool_t MCMCInterval::WithinDeltaFraction(Double_t a, Double_t b)
{
return (TMath::Abs(a - b) < TMath::Abs(fDelta * (a + b)/2));
}
void MCMCInterval::CreateKeysDataHist()
{
if (fAxes == NULL)
return;
if (fProduct == NULL)
DetermineByKeys();
if (fProduct == NULL)
// if fProduct still NULL, then creation failed
return;
//RooAbsBinning** savedBinning = new RooAbsBinning*[fDimension];
Int_t* savedBins = new Int_t[fDimension];
Int_t i;
Double_t numBins;
RooRealVar* var;
// kbelasco: Note - the accuracy is only increased here if the binning for
// each RooRealVar is uniform
// kbelasco: look into why saving the binnings and replacing them doesn't
// work (replaces with 1 bin always).
// Note: this code modifies the binning for the parameters (if they are
// uniform) and sets them back to what they were. If the binnings are not
// uniform, this code does nothing.
// first scan through fAxes to make sure all binnings are uniform, or else
// we can't change the number of bins because there seems to be an error
// when setting the binning itself rather than just the number of bins
Bool_t tempChangeBinning = true;
for (i = 0; i < fDimension; i++) {
if (!fAxes[i]->getBinning(NULL, false, false).isUniform()) {
tempChangeBinning = false;
break;
}
}
// kbelasco: for 1 dimension this should be fine, but for more dimensions
// the total nubmer of bins in the histogram increases exponentially with
// the dimension, so don't do this above 1-D for now.
if (fDimension >= 2)
tempChangeBinning = false;
if (tempChangeBinning) {
// set high nubmer of bins for high accuracy on lower/upper limit by keys
for (i = 0; i < fDimension; i++) {
var = fAxes[i];
//savedBinning[i] = &var->getBinning("__binning_clone", false, true);
savedBins[i] = var->getBinning(NULL, false, false).numBins();
numBins = (var->getMax() - var->getMin()) / fEpsilon;
var->setBins((Int_t)numBins);
}
}
fKeysDataHist = new RooDataHist("_productDataHist",
"Keys PDF & Heaviside Product Data Hist", fParameters);
fKeysDataHist = fProduct->fillDataHist(fKeysDataHist, &fParameters, 1.);
if (tempChangeBinning) {
// set the binning back to normal
for (i = 0; i < fDimension; i++)
//fAxes[i]->setBinning(*savedBinning[i], NULL);
//fAxes[i]->setBins(savedBinning[i]->numBins(), NULL);
fAxes[i]->setBins(savedBins[i], NULL);
}
//delete[] savedBinning;
delete[] savedBins;
}
Bool_t MCMCInterval::CheckParameters(const RooArgSet& parameterPoint) const
{
// check that the parameters are correct
if (parameterPoint.getSize() != fParameters.getSize() ) {
coutE(Eval) << "MCMCInterval: size is wrong, parameters don't match" << std::endl;
return kFALSE;
}
if ( ! parameterPoint.equals( fParameters ) ) {
coutE(Eval) << "MCMCInterval: size is ok, but parameters don't match" << std::endl;
return kFALSE;
}
return kTRUE;
}