// @(#)root/roostats:$Id$
// Author: Kyle Cranmer January 2009
/*************************************************************************
* Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. *
* All rights reserved. *
* *
* For the licensing terms see $ROOTSYS/LICENSE. *
* For the list of contributors see $ROOTSYS/README/CREDITS. *
*************************************************************************/
//_________________________________________________
/*
BEGIN_HTML
NeymanConstruction is a concrete implementation of the NeymanConstruction interface that, as the name suggests,
performs a NeymanConstruction.
It produces a RooStats::PointSetInterval, which is a concrete implementation of the ConfInterval interface.
The Neyman Construction is not a uniquely defined statistical technique, it requires that one specify an ordering rule
or ordering principle, which is usually incoded by choosing a specific test statistic and limits of integration
(corresponding to upper/lower/central limits). As a result, this class must be configured with the corresponding
information before it can produce an interval. Common configurations, such as the Feldman-Cousins approach, can be
enforced by other light weight classes.
The Neyman Construction considers every point in the parameter space independently, no assumptions are
made that the interval is connected or of a particular shape. As a result, the PointSetInterval class is used to
represent the result. The user indicate which points in the parameter space to perform the constrution by providing
a PointSetInterval instance with the desired points.
This class is fairly light weight, because the choice of parameter points to be considered is factorized and so is the
creation of the sampling distribution of the test statistic (which is done by a concrete class implementing the DistributionCreator interface). As a result, this class basically just drives the construction by:
- using a DistributionCreator to create the SamplingDistribution of a user-defined test statistic for each parameter point of interest,
- defining the acceptance region in the data by finding the thresholds on the test statistic such that the integral of the sampling distribution is of the appropriate size and consistent with the limits of integration (eg. upper/lower/central limits),
- and finally updating the PointSetInterval based on whether the value of the test statistic evaluated on the data are in the acceptance region.
END_HTML
*/
//
#ifndef RooStats_NeymanConstruction
#include "RooStats/NeymanConstruction.h"
#endif
#ifndef RooStats_RooStatsUtils
#include "RooStats/RooStatsUtils.h"
#endif
#ifndef RooStats_PointSetInterval
#include "RooStats/PointSetInterval.h"
#endif
#include "RooStats/SamplingDistribution.h"
#include "RooStats/ToyMCSampler.h"
#include "RooStats/ModelConfig.h"
#include "RooMsgService.h"
#include "RooGlobalFunc.h"
#include "RooDataSet.h"
#include "TFile.h"
#include "TTree.h"
#include "TMath.h"
#include "TH1F.h"
ClassImp(RooStats::NeymanConstruction) ;
using namespace RooFit;
using namespace RooStats;
using namespace std;
//_______________________________________________________
NeymanConstruction::NeymanConstruction(RooAbsData& data, ModelConfig& model):
fSize(0.05),
fData(data),
fModel(model),
fTestStatSampler(0),
fPointsToTest(0),
fLeftSideFraction(0),
fConfBelt(0), // constructed with tree data
fAdaptiveSampling(false),
fAdditionalNToysFactor(1.),
fSaveBeltToFile(false),
fCreateBelt(false)
{
// default constructor
// fWS = new RooWorkspace();
// fOwnsWorkspace = true;
// fDataName = "";
// fPdfName = "";
}
//_______________________________________________________
NeymanConstruction::~NeymanConstruction() {
// default constructor
// if(fOwnsWorkspace && fWS) delete fWS;
// if(fConfBelt) delete fConfBelt;
}
//_______________________________________________________
PointSetInterval* NeymanConstruction::GetInterval() const {
// Main interface to get a RooStats::ConfInterval.
// It constructs a RooStats::SetInterval.
TFile* f=0;
if(fSaveBeltToFile){
//coverity[FORWARD_NULL]
oocoutI(f,Contents) << "NeymanConstruction saving ConfidenceBelt to file SamplingDistributions.root" << endl;
f = new TFile("SamplingDistributions.root","recreate");
}
if(!&fData) {
oocoutF((TObject*)0,Contents) << "Neyman Construction: data is not set, can't get interval" << endl;
return 0;
}
Int_t npass = 0;
RooArgSet* point;
// strange problems when using snapshots.
// RooArgSet* fPOI = (RooArgSet*) fModel.GetParametersOfInterest()->snapshot();
RooArgSet* fPOI = new RooArgSet(*fModel.GetParametersOfInterest());
RooDataSet* pointsInInterval = new RooDataSet("pointsInInterval",
"points in interval",
*(fPointsToTest->get(0)) );
// loop over points to test
for(Int_t i=0; inumEntries(); ++i){
// get a parameter point from the list of points to test.
point = (RooArgSet*) fPointsToTest->get(i);//->clone("temp");
// set parameters of interest to current point
*fPOI = *point;
// set test stat sampler to use this point
fTestStatSampler->SetParametersForTestStat(*fPOI);
// get the value of the test statistic for this data set
Double_t thisTestStatistic = fTestStatSampler->EvaluateTestStatistic(fData, *fPOI );
/*
cout << "NC CHECK: " << i << endl;
point->Print();
fPOI->Print("v");
fData.Print();
cout <<"thisTestStatistic = " << thisTestStatistic << endl;
*/
// find the lower & upper thresholds on the test statistic that
// define the acceptance region in the data
SamplingDistribution* samplingDist=0;
Double_t sigma;
Double_t upperEdgeOfAcceptance, upperEdgeMinusSigma, upperEdgePlusSigma;
Double_t lowerEdgeOfAcceptance, lowerEdgeMinusSigma, lowerEdgePlusSigma;
Int_t additionalMC=0;
// the adaptive sampling algorithm wants at least one toy event to be outside
// of the requested pvalue including the sampling variaton. That leads to an equation
// N-1 = (1-alpha)N + Z sqrt(N - (1-alpha)N) // for upper limit and
// 1 = alpha N - Z sqrt(alpha N) // for lower limit
//
// solving for N gives:
// N = 1/alpha * [3/2 + sqrt(5)] for Z = 1 (which is used currently)
// thus, a good guess for the first iteration of events is N=3.73/alpha~4/alpha
// should replace alpha here by smaller tail probability: eg. alpha*Min(leftsideFrac, 1.-leftsideFrac)
// totalMC will be incremented by 2 before first call, so initiated it at half the value
Int_t totalMC = (Int_t) (2./fSize/TMath::Min(fLeftSideFraction,1.-fLeftSideFraction));
if(fLeftSideFraction==0. || fLeftSideFraction ==1.){
totalMC = (Int_t) (2./fSize);
}
// use control
Double_t tmc = Double_t(totalMC)*fAdditionalNToysFactor;
totalMC = (Int_t) tmc;
ToyMCSampler* toyMCSampler = dynamic_cast(fTestStatSampler);
if(fAdaptiveSampling && toyMCSampler) {
do{
// this will be executed first, then while conditioned checked
// as an exit condition for the loop.
// the next line is where most of the time will be spent
// generating the sampling dist of the test statistic.
additionalMC = 2*totalMC; // grow by a factor of two
samplingDist =
toyMCSampler->AppendSamplingDistribution(*point,
samplingDist,
additionalMC);
if (!samplingDist) {
oocoutE((TObject*)0,Eval) << "Neyman Construction: error generating sampling distribution" << endl;
return 0;
}
totalMC=samplingDist->GetSize();
//cout << "without sigma upper = " <<
//samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ) << endl;
sigma = 1;
upperEdgeOfAcceptance =
samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ,
sigma, upperEdgePlusSigma);
sigma = -1;
samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ,
sigma, upperEdgeMinusSigma);
sigma = 1;
lowerEdgeOfAcceptance =
samplingDist->InverseCDF( fLeftSideFraction * fSize ,
sigma, lowerEdgePlusSigma);
sigma = -1;
samplingDist->InverseCDF( fLeftSideFraction * fSize ,
sigma, lowerEdgeMinusSigma);
ooccoutD(samplingDist,Eval) << "NeymanConstruction: "
<< "total MC = " << totalMC
<< " this test stat = " << thisTestStatistic << endl
<< " upper edge -1sigma = " << upperEdgeMinusSigma
<< ", upperEdge = "< upperEdgeMinusSigma)
|| (thisTestStatistic >= upperEdgeOfAcceptance &&
thisTestStatistic < upperEdgePlusSigma)
|| (thisTestStatistic <= lowerEdgeOfAcceptance &&
thisTestStatistic > lowerEdgeMinusSigma)
|| (thisTestStatistic >= lowerEdgeOfAcceptance &&
thisTestStatistic < lowerEdgePlusSigma)
) && (totalMC < 100./fSize)
) ; // need ; here
} else {
// the next line is where most of the time will be spent
// generating the sampling dist of the test statistic.
samplingDist = fTestStatSampler->GetSamplingDistribution(*point);
if (!samplingDist) {
oocoutE((TObject*)0,Eval) << "Neyman Construction: error generating sampling distribution" << endl;
return 0;
}
lowerEdgeOfAcceptance =
samplingDist->InverseCDF( fLeftSideFraction * fSize );
upperEdgeOfAcceptance =
samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) );
}
// add acceptance region to ConfidenceBelt
if(fConfBelt && fCreateBelt){
// cout << "conf belt set " << fConfBelt << endl;
fConfBelt->AddAcceptanceRegion(*point, i,
lowerEdgeOfAcceptance,
upperEdgeOfAcceptance);
}
// printout some debug info
TIter itr = point->createIterator();
RooRealVar* myarg;
ooccoutP(samplingDist,Eval) << "NeymanConstruction: Prog: "<< i+1<<"/"<numEntries()
<< " total MC = " << samplingDist->GetSize()
<< " this test stat = " << thisTestStatistic << endl;
ooccoutP(samplingDist,Eval) << " ";
while ((myarg = (RooRealVar *)itr.Next())) {
ooccoutP(samplingDist,Eval) << myarg->GetName() << "=" << myarg->getVal() << " ";
}
ooccoutP(samplingDist,Eval) << "[" << lowerEdgeOfAcceptance << ", "
<< upperEdgeOfAcceptance << "] " << " in interval = " <<
(thisTestStatistic >= lowerEdgeOfAcceptance && thisTestStatistic <= upperEdgeOfAcceptance)
<< endl << endl;
// Check if this data is in the acceptance region
if(thisTestStatistic >= lowerEdgeOfAcceptance && thisTestStatistic <= upperEdgeOfAcceptance) {
// if so, set this point to true
// fPointsToTest->add(*point, 1.); // this behaves differently for Hist and DataSet
pointsInInterval->add(*point);
++npass;
}
if(fSaveBeltToFile){
//write to file
samplingDist->Write();
string tmpName = "hist_";
tmpName+=samplingDist->GetName();
TH1F* h = new TH1F(tmpName.c_str(),"",500,0.,5.);
for(int ii=0; iiGetSize(); ++ii){
h->Fill(samplingDist->GetSamplingDistribution().at(ii) );
}
h->Write();
delete h;
}
delete samplingDist;
// delete point; // from dataset
}
oocoutI(pointsInInterval,Eval) << npass << " points in interval" << endl;
// create an interval based pointsInInterval
PointSetInterval* interval
= new PointSetInterval("ClassicalConfidenceInterval", *pointsInInterval);
if(fSaveBeltToFile){
// write belt to file
fConfBelt->Write();
f->Close();
}
delete f;
//delete data;
return interval;
}