// @(#)root/roostats:$Id$
// Author: Kyle Cranmer 28/07/2008
/*************************************************************************
* Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. *
* All rights reserved. *
* *
* For the licensing terms see $ROOTSYS/LICENSE. *
* For the list of contributors see $ROOTSYS/README/CREDITS. *
*************************************************************************/
//_________________________________________________
/*
BEGIN_HTML
BernsteinCorrection is a utility in RooStats to augment a nominal PDF with a polynomial
correction term. This is useful for incorporating systematic variations to the nominal PDF.
The Bernstein basis polynomails are particularly appropriate because they are positive definite.
This tool was inspired by the work of Glen Cowan together with Stephan Horner, Sascha Caron,
Eilam Gross, and others.
The initial implementation is independent work. The major step forward in the approach was
to provide a well defined algorithm that specifies the order of polynomial to be included
in the correction. This is an emperical algorithm, so in addition to the nominal model it
needs either a real data set or a simulated one. In the early work, the nominal model was taken
to be a histogram from Monte Carlo simulations, but in this implementation it is generalized to an
arbitrary PDF (which includes a RooHistPdf). The algorithm basically consists of a
hypothesis test of an nth-order correction (null) against a n+1-th order correction (alternate).
The quantity q = -2 log LR is used to determine whether the n+1-th order correction is a major
improvement to the n-th order correction. The distribution of q is expected to be roughly
\chi^2 with one degree of freedom if the n-th order correction is a good model for the data.
Thus, one only moves to the n+1-th order correction of q is relatively large. The chance that
one moves from the n-th to the n+1-th order correction when the n-th order correction
(eg. a type 1 error) is sufficient is given by the Prob(\chi^2_1 > threshold). The constructor
of this class allows you to directly set this tolerance (in terms of probability that the n+1-th
term is added unnecessarily).
To do:
Add another method to the utility that will make the sampling distribution for -2 log lambda
for various m vs. m+1 order corrections using a nominal model and perhaps having two ways of
generating the toys (either via a histogram or via an independent model that is supposed to
reflect reality). That will allow one to make plots like Glen has at the end of his DRAFT
very easily.
END_HTML
*/
//
#ifndef RooStats_BernsteinCorrection
#include "RooStats/BernsteinCorrection.h"
#endif
#include "RooGlobalFunc.h"
#include "RooDataSet.h"
#include "RooRealVar.h"
#include "RooAbsPdf.h"
#include "RooFit.h"
#include "RooFitResult.h"
#include "TMath.h"
#include
#include
#include
#include
#include
#include "RooProdPdf.h"
#include "RooNLLVar.h"
#include "RooWorkspace.h"
#include "RooDataHist.h"
#include "RooHistPdf.h"
#include "RooBernstein.h"
ClassImp(RooStats::BernsteinCorrection) ;
using namespace RooFit;
using namespace RooStats;
using namespace std;
//____________________________________
BernsteinCorrection::BernsteinCorrection(Double_t tolerance):
fMaxCorrection(100), fTolerance(tolerance){
}
//____________________________________
Int_t BernsteinCorrection::ImportCorrectedPdf(RooWorkspace* wks,
const char* nominalName,
const char* varName,
const char* dataName){
// Main method for Bernstein correction.
// get ingredients out of workspace
RooRealVar* x = wks->var(varName);
RooAbsPdf* nominal = wks->pdf(nominalName);
RooAbsData* data = wks->data(dataName);
if (!x || !nominal || !data) {
cout << "Error: wrong name for pdf or variable or dataset - return -1 " << std::endl;
return -1;
}
// initialize alg, by checking how well nominal model fits
RooFitResult* nominalResult = nominal->fitTo(*data,Save(),Minos(kFALSE), Hesse(kFALSE),PrintLevel(-1));
Double_t lastNll= nominalResult->minNll();
// setup a log
std::stringstream log;
log << "------ Begin Bernstein Correction Log --------" << endl;
// Local variables that we want to keep in scope after loop
RooArgList coeff;
vector coefficients;
Double_t q = 1E6;
Int_t degree = -1;
// The while loop
bool keepGoing = true;
while( keepGoing) {
degree++;
// we need to generate names for vars on the fly
std::stringstream str;
str<<"_"<fitTo(*data,Save(),Minos(kFALSE), Hesse(kFALSE),PrintLevel(-1));
// Hypothesis test between previous correction (null)
// and this one (alternate). Use -2 log LR for test statistic
q = 2*(lastNll - result->minNll()); // -2 log lambda, goes like significance^2
// check if we should keep going based on rate of Type I error
keepGoing = (degree < 1 || TMath::Prob(q,1) < fTolerance);
if(!keepGoing){
// terminate loop, import corrected PDF
RooMsgService::instance().setGlobalKillBelow(RooFit::FATAL) ;
wks->import(*corrected);
RooMsgService::instance().setGlobalKillBelow(RooFit::DEBUG) ;
} else {
// memory management
delete corrected;
delete poly;
}
// for the log
if(degree != 0){
log << "degree = " << degree
<< " -log L("<minNll()
<< " q = " << q
<< " P(chi^2_1 > q) = " << TMath::Prob(q,1) << endl;;
}
// update last result for next iteration in loop
lastNll = result->minNll();
delete result;
}
log << "------ End Bernstein Correction Log --------" << endl;
cout << log.str();
return degree;
}
//____________________________________
void BernsteinCorrection::CreateQSamplingDist(RooWorkspace* wks,
const char* nominalName,
const char* varName,
const char* dataName,
TH1F* samplingDist,
TH1F* samplingDistExtra,
Int_t degree,
Int_t nToys){
// Create sampling distribution for q given degree-1 vs. degree corrections
// get ingredients out of workspace
RooRealVar* x = wks->var(varName);
RooAbsPdf* nominal = wks->pdf(nominalName);
RooAbsData* data = wks->data(dataName);
if (!x || !nominal || !data) {
cout << "Error: wrong name for pdf or variable or dataset ! " << std::endl;
return;
}
// setup a log
std::stringstream log;
log << "------ Begin Bernstein Correction Log --------" << endl;
// Local variables that we want to keep in scope after loop
RooArgList coeff; // n-th degree correction
RooArgList coeffNull; // n-1 correction
RooArgList coeffExtra; // n+1 correction
vector coefficients;
cout << "make coefs" << endl;
for(int i = 0; i<=degree+1; ++i) {
// we need to generate names for vars on the fly
std::stringstream str;
str<<"_"<numEntries());
// check to see how well this correction fits
RooFitResult* result
= corrected->fitTo(*tmpData,Save(),Minos(kFALSE),
Hesse(kFALSE),PrintLevel(-1));
RooFitResult* resultNull
= correctedNull->fitTo(*tmpData,Save(),Minos(kFALSE),
Hesse(kFALSE),PrintLevel(-1));
RooFitResult* resultExtra
= correctedExtra->fitTo(*tmpData,Save(),Minos(kFALSE),
Hesse(kFALSE),PrintLevel(-1));
// Hypothesis test between previous correction (null)
// and this one (alternate). Use -2 log LR for test statistic
q = 2*(resultNull->minNll() - result->minNll());
qExtra = 2*(result->minNll() - resultExtra->minNll());
samplingDist->Fill(q);
samplingDistExtra->Fill(qExtra);
cout << resultNull->minNll() << " " << result->minNll() << " " << q << endl;
delete tmpData;
delete result;
delete resultNull;
delete resultExtra;
}
// return samplingDist;
}