// @(#)root/roostats:$Id$ // Author: Sven Kreiss January 2012 // Author: Kyle Cranmer, Lorenzo Moneta, Gregory Schott, Wouter Verkerke /************************************************************************* * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. * * All rights reserved. * * * * For the licensing terms see $ROOTSYS/LICENSE. * * For the list of contributors see $ROOTSYS/README/CREDITS. * *************************************************************************/ #include "RooStats/ToyMCImportanceSampler.h" #ifndef ROO_MSG_SERVICE #include "RooMsgService.h" #endif #include "RooCategory.h" #include "TMath.h" using namespace RooFit; using namespace std; ClassImp(RooStats::ToyMCImportanceSampler) namespace RooStats { ToyMCImportanceSampler::~ToyMCImportanceSampler() { for( unsigned int i=0; i < fImportanceSnapshots.size(); i++ ) if(fImportanceSnapshots[i]) delete fImportanceSnapshots[i]; for( unsigned int i=0; i < fNullSnapshots.size(); i++ ) if(fNullSnapshots[i]) delete fNullSnapshots[i]; } void ToyMCImportanceSampler::ClearCache(void) { ToyMCSampler::ClearCache(); for( unsigned int i=0; i < fImpNLLs.size(); i++ ) if(fImpNLLs[i]) { delete fImpNLLs[i]; fImpNLLs[i] = NULL; } for( unsigned int i=0; i < fNullNLLs.size(); i++ ) if(fNullNLLs[i]) { delete fNullNLLs[i]; fNullNLLs[i] = NULL; } } RooDataSet* ToyMCImportanceSampler::GetSamplingDistributionsSingleWorker(RooArgSet& paramPoint) { if( fNToys == 0 ) return NULL; // remember original #toys, but overwrite it temporarily with the #toys per distribution Int_t allToys = fNToys; // to keep track of which dataset entry comes form which density, define a roocategory as a label RooCategory densityLabel( "densityLabel", "densityLabel" ); densityLabel.defineType( "null", -1 ); for( unsigned int i=0; i < fImportanceDensities.size(); i++ ) densityLabel.defineType( TString::Format( "impDens_%d", i ), i ); RooDataSet* fullResult = NULL; // generate null (negative i) and imp densities (0 and positive i) for( int i = -1; i < (int)fImportanceDensities.size(); i++ ) { if( i < 0 ) { // generate null toys oocoutP((TObject*)0,Generation) << endl << endl << " GENERATING FROM NULL DENSITY " << endl << endl; SetDensityToGenerateFromByIndex( 0, true ); // true = generate from null }else{ oocoutP((TObject*)0,Generation) << endl << endl << " GENERATING IMP DENS/SNAP "<get()->getSize() > Int_t(fTestStatistics.size())) { // add label densityLabel.setIndex( i ); result->addColumn( densityLabel ); result->addColumn( reweight ); } if( !fullResult ) { RooArgSet columns( *result->get() ); RooRealVar weightVar ( "weight", "weight", 1.0 ); columns.add( weightVar ); // cout << endl << endl << "Reweighted data columns: " << endl; // columns.Print("v"); // cout << endl; fullResult = new RooDataSet( result->GetName(), result->GetTitle(), columns, "weight" ); } for( int j=0; j < result->numEntries(); j++ ) { // cout << "entry: " << j << endl; // result->get(j)->Print(); // cout << "weight: " << result->weight() << endl; // cout << "reweight: " << reweight.getVal() << endl; const RooArgSet* row = result->get(j); fullResult->add( *row, result->weight()*reweight.getVal() ); } delete result; } // restore #toys fNToys = allToys; return fullResult; } RooAbsData* ToyMCImportanceSampler::GenerateToyData( RooArgSet& paramPoint, double& weight ) const { if( fNullDensities.size() > 1 ) { ooccoutI((TObject*)NULL,InputArguments) << "Null Densities:" << endl; for( unsigned int i=0; i < fNullDensities.size(); i++) { ooccoutI((TObject*)NULL,InputArguments) << " null density["< weights; weights.push_back( weight ); vector impNLLs; for( unsigned int i=0; i < fImportanceDensities.size(); i++ ) impNLLs.push_back( 0.0 ); vector nullNLLs; for( unsigned int i=0; i < fNullDensities.size(); i++ ) nullNLLs.push_back( 0.0 ); RooAbsData *d = GenerateToyData( weights, impNLLs, nullNLLs ); weight = weights[0]; return d; } RooAbsData* ToyMCImportanceSampler::GenerateToyData( RooArgSet& paramPoint, double& weight, vector& impNLLs, double& nullNLL ) const { if( fNullDensities.size() > 1 ) { ooccoutI((TObject*)NULL,InputArguments) << "Null Densities:" << endl; for( unsigned int i=0; i < fNullDensities.size(); i++) { ooccoutI((TObject*)NULL,InputArguments) << " null density["< weights; weights.push_back( weight ); vector nullNLLs; nullNLLs.push_back( nullNLL ); RooAbsData *d = GenerateToyData( weights, impNLLs, nullNLLs ); weight = weights[0]; nullNLL = nullNLLs[0]; return d; } RooAbsData* ToyMCImportanceSampler::GenerateToyData( vector& weights ) const { if( fNullDensities.size() != weights.size() ) { ooccoutI((TObject*)NULL,InputArguments) << "weights.size() != nullDesnities.size(). You need to provide a vector with the correct size." << endl; //AddNullDensity( fPdf, ¶mPoint ); } vector impNLLs; for( unsigned int i=0; i < fImportanceDensities.size(); i++ ) impNLLs.push_back( 0.0 ); vector nullNLLs; for( unsigned int i=0; i < fNullDensities.size(); i++ ) nullNLLs.push_back( 0.0 ); RooAbsData *d = GenerateToyData( weights, impNLLs, nullNLLs ); return d; } RooAbsData* ToyMCImportanceSampler::GenerateToyData( vector& weights, vector& impNLLVals, vector& nullNLLVals ) const { // This method generates a toy data set for importance sampling for the given parameter point taking // global observables into account. // The values of the generated global observables remain in the pdf's variables. // They have to have those values for the subsequent evaluation of the // test statistics. ooccoutD((TObject*)0,InputArguments) << endl; ooccoutD((TObject*)0,InputArguments) << "GenerateToyDataImportanceSampling" << endl; if(!fObservables) { ooccoutE((TObject*)NULL,InputArguments) << "Observables not set." << endl; return NULL; } if( fNullDensities.size() == 0 ) { oocoutE((TObject*)NULL,InputArguments) << "ToyMCImportanceSampler: Need to specify the null density explicitly." << endl; return NULL; } // catch the case when NLLs are not created (e.g. when ToyMCSampler was streamed for Proof) if( fNullNLLs.size() == 0 && fNullDensities.size() > 0 ) { for( unsigned int i = 0; i < fNullDensities.size(); i++ ) fNullNLLs.push_back( NULL ); } if( fImpNLLs.size() == 0 && fImportanceDensities.size() > 0 ) { for( unsigned int i = 0; i < fImportanceDensities.size(); i++ ) fImpNLLs.push_back( NULL ); } if( fNullDensities.size() != fNullNLLs.size() ) { oocoutE((TObject*)NULL,InputArguments) << "ToyMCImportanceSampler: Something wrong. NullNLLs must be of same size as null densities." << endl; return NULL; } if( (!fGenerateFromNull && fIndexGenDensity >= fImportanceDensities.size()) || (fGenerateFromNull && fIndexGenDensity >= fNullDensities.size()) ) { oocoutE((TObject*)NULL,InputArguments) << "ToyMCImportanceSampler: no importance density given or index out of range." << endl; return NULL; } // paramPoint used to be given as parameter // situation is clear when there is only one null. // WHAT TO DO FOR MANY NULL DENSITIES? RooArgSet paramPoint( *fNullSnapshots[0] ); //cout << "paramPoint: " << endl; //paramPoint.Print("v"); // assign input paramPoint RooArgSet* allVars = fPdf->getVariables(); *allVars = paramPoint; // create nuisance parameter points if(!fNuisanceParametersSampler && fPriorNuisance && fNuisancePars) fNuisanceParametersSampler = new NuisanceParametersSampler(fPriorNuisance, fNuisancePars, fNToys, fExpectedNuisancePar); // generate global observables RooArgSet observables(*fObservables); if(fGlobalObservables && fGlobalObservables->getSize()) { observables.remove(*fGlobalObservables); // WHAT TODO FOR MANY NULL DENSITIES? GenerateGlobalObservables(*fNullDensities[0]); } // save values to restore later. // but this must remain after(!) generating global observables if( !fGenerateFromNull ) { RooArgSet* allVarsImpDens = fImportanceDensities[fIndexGenDensity]->getVariables(); allVars->add(*allVarsImpDens); delete allVarsImpDens; } const RooArgSet* saveVars = (const RooArgSet*)allVars->snapshot(); double globalWeight = 1.0; if(fNuisanceParametersSampler) { // use nuisance parameters? // Construct a set of nuisance parameters that has the parameters // in the input paramPoint removed. Therefore, no parameter in // paramPoint is randomized. // Therefore when a parameter is given (should be held fixed), // but is also in the list of nuisance parameters, the parameter // will be held fixed. This is useful for debugging to hold single // parameters fixed although under "normal" circumstances it is // randomized. RooArgSet allVarsMinusParamPoint(*allVars); allVarsMinusParamPoint.remove(paramPoint, kFALSE, kTRUE); // match by name // get nuisance parameter point and weight fNuisanceParametersSampler->NextPoint(allVarsMinusParamPoint, globalWeight); } // populate input weights vector with this globalWeight for( unsigned int i=0; i < weights.size(); i++ ) weights[i] = globalWeight; RooAbsData* data = NULL; if( fGenerateFromNull ) { //cout << "gen from null" << endl; *allVars = *fNullSnapshots[fIndexGenDensity]; data = Generate(*fNullDensities[fIndexGenDensity], observables); }else{ // need to be careful here not to overwrite the current state of the // nuisance parameters, ie they must not be part of the snapshot //cout << "gen from imp" << endl; if(fImportanceSnapshots[fIndexGenDensity]) *allVars = *fImportanceSnapshots[fIndexGenDensity]; data = Generate(*fImportanceDensities[fIndexGenDensity], observables); } //cout << "data generated: " << data << endl; if (!data) { oocoutE((TObject*)0,InputArguments) << "ToyMCImportanceSampler: error generating data" << endl; return NULL; } // Importance Sampling: adjust weight // Sources: Alex Read, presentation by Michael Woodroofe ooccoutD((TObject*)0,InputArguments) << "About to create/calculate all nullNLLs." << endl; for( unsigned int i=0; i < fNullDensities.size(); i++ ) { //oocoutI((TObject*)0,InputArguments) << "Setting variables to nullSnapshot["<Print("v"); *allVars = *fNullSnapshots[i]; if( !fNullNLLs[i] ) { RooArgSet* allParams = fNullDensities[i]->getParameters(*data); fNullNLLs[i] = fNullDensities[i]->createNLL(*data, RooFit::CloneData(kFALSE), RooFit::Constrain(*allParams), RooFit::ConditionalObservables(fConditionalObs)); delete allParams; }else{ fNullNLLs[i]->setData( *data, kFALSE ); } nullNLLVals[i] = fNullNLLs[i]->getVal(); // FOR DEBuGGING!!!!!!!!!!!!!!!!! if( !fReuseNLL ) { delete fNullNLLs[i]; fNullNLLs[i] = NULL; } } // for each null: find minNLLVal of null and all imp densities ooccoutD((TObject*)0,InputArguments) << "About to find the minimum NLLs." << endl; vector minNLLVals; for( unsigned int i=0; i < nullNLLVals.size(); i++ ) minNLLVals.push_back( nullNLLVals[i] ); for( unsigned int i=0; i < fImportanceDensities.size(); i++ ) { //oocoutI((TObject*)0,InputArguments) << "Setting variables to impSnapshot["<Print("v"); if( fImportanceSnapshots[i] ) *allVars = *fImportanceSnapshots[i]; if( !fImpNLLs[i] ) { RooArgSet* allParams = fImportanceDensities[i]->getParameters(*data); fImpNLLs[i] = fImportanceDensities[i]->createNLL(*data, RooFit::CloneData(kFALSE), RooFit::Constrain(*allParams), RooFit::ConditionalObservables(fConditionalObs)); delete allParams; }else{ fImpNLLs[i]->setData( *data, kFALSE ); } impNLLVals[i] = fImpNLLs[i]->getVal(); // FOR DEBuGGING!!!!!!!!!!!!!!!!! if( !fReuseNLL ) { delete fImpNLLs[i]; fImpNLLs[i] = NULL; } for( unsigned int j=0; j < nullNLLVals.size(); j++ ) { if( impNLLVals[i] < minNLLVals[j] ) minNLLVals[j] = impNLLVals[i]; ooccoutD((TObject*)0,InputArguments) << "minNLLVals["< 0.01 && poi.getError() < 5.0 ) { n = TMath::CeilNint( poi.getVal() / (2.*nStdDevOverlap*poi.getError()) ); // round up oocoutI((TObject*)0,InputArguments) << "Using fitFavoredMu and error to set the number of imp points" << endl; oocoutI((TObject*)0,InputArguments) << "muhat: " << poi.getVal() << " optimize for distance: " << 2.*nStdDevOverlap*poi.getError() << endl; oocoutI((TObject*)0,InputArguments) << "n = " << n << endl; oocoutI((TObject*)0,InputArguments) << "This results in a distance of: " << impMaxMu / n << endl; } // exclude the null, just return the number of importance snapshots return CreateNImpDensitiesForOnePOI( pdf, allPOI, poi, n-1, poiValueForBackground); } int ToyMCImportanceSampler::CreateNImpDensitiesForOnePOI( RooAbsPdf& pdf, const RooArgSet& allPOI, RooRealVar& poi, int n, double poiValueForBackground ) { // n is the number of importance densities // these might not necessarily be the same thing. double impMaxMu = poi.getVal(); // create imp snapshots if( impMaxMu > poiValueForBackground && n > 0 ) { for( int i=1; i <= n; i++ ) { poi.setVal( poiValueForBackground + (double)i/(n)*(impMaxMu - poiValueForBackground) ); oocoutI((TObject*)0,InputArguments) << endl << "create point with poi: " << endl; poi.Print(); // impSnaps without first snapshot because that is null hypothesis AddImportanceDensity( &pdf, &allPOI ); } } return n; } } // end namespace RooStats