/***************************************************************************** * Project: RooFit * * Package: RooFitCore * * @(#)root/roofitcore:$Id$ * Authors: * * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu * * DK, David Kirkby, UC Irvine, dkirkby@uci.edu * * * * Copyright (c) 2000-2005, Regents of the University of California * * and Stanford University. All rights reserved. * * * * Redistribution and use in source and binary forms, * * with or without modification, are permitted according to the terms * * listed in LICENSE (http://roofit.sourceforge.net/license.txt) * *****************************************************************************/ ////////////////////////////////////////////////////////////////////////////// // // BEGIN_HTML // Class RooNLLVar implements a a -log(likelihood) calculation from a dataset // and a PDF. The NLL is calculated as //
//  Sum[data] -log( pdf(x_data) )
// 
// In extended mode, a (Nexpect - Nobserved*log(NExpected) term is added // END_HTML // #include #include "RooFit.h" #include "Riostream.h" #include "TMath.h" #include "RooNLLVar.h" #include "RooAbsData.h" #include "RooAbsPdf.h" #include "RooCmdConfig.h" #include "RooMsgService.h" #include "RooAbsDataStore.h" #include "RooRealMPFE.h" #include "RooRealSumPdf.h" #include "RooRealVar.h" #include "RooProdPdf.h" ClassImp(RooNLLVar) ; RooArgSet RooNLLVar::_emptySet ; //_____________________________________________________________________________ RooNLLVar::RooNLLVar(const char *name, const char* title, RooAbsPdf& pdf, RooAbsData& indata, const RooCmdArg& arg1, const RooCmdArg& arg2,const RooCmdArg& arg3, const RooCmdArg& arg4, const RooCmdArg& arg5,const RooCmdArg& arg6, const RooCmdArg& arg7, const RooCmdArg& arg8,const RooCmdArg& arg9) : RooAbsOptTestStatistic(name,title,pdf,indata, *(const RooArgSet*)RooCmdConfig::decodeObjOnTheFly("RooNLLVar::RooNLLVar","ProjectedObservables",0,&_emptySet ,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9), RooCmdConfig::decodeStringOnTheFly("RooNLLVar::RooNLLVar","RangeWithName",0,"",arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9), RooCmdConfig::decodeStringOnTheFly("RooNLLVar::RooNLLVar","AddCoefRange",0,"",arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9), RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","NumCPU",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9), RooFit::BulkPartition, RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","Verbose",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9), RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","SplitRange",0,0,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9), RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","CloneData",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9)) { // Construct likelihood from given p.d.f and (binned or unbinned dataset) // // Extended() -- Include extended term in calculation // NumCPU() -- Activate parallel processing feature // Range() -- Fit only selected region // SumCoefRange() -- Set the range in which to interpret the coefficients of RooAddPdf components // SplitRange() -- Fit range is split by index catory of simultaneous PDF // ConditionalObservables() -- Define conditional observables // Verbose() -- Verbose output of GOF framework classes // CloneData() -- Clone input dataset for internal use (default is kTRUE) RooCmdConfig pc("RooNLLVar::RooNLLVar") ; pc.allowUndefined() ; pc.defineInt("extended","Extended",0,kFALSE) ; pc.process(arg1) ; pc.process(arg2) ; pc.process(arg3) ; pc.process(arg4) ; pc.process(arg5) ; pc.process(arg6) ; pc.process(arg7) ; pc.process(arg8) ; pc.process(arg9) ; _extended = pc.getInt("extended") ; _weightSq = kFALSE ; _first = kTRUE ; _offset = 0.; _offsetCarry = 0.; _offsetSaveW2 = 0.; _offsetCarrySaveW2 = 0.; _binnedPdf = 0 ; } //_____________________________________________________________________________ RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata, Bool_t extended, const char* rangeName, const char* addCoefRangeName, Int_t nCPU, RooFit::MPSplit interleave, Bool_t verbose, Bool_t splitRange, Bool_t cloneData, Bool_t binnedL) : RooAbsOptTestStatistic(name,title,pdf,indata,RooArgSet(),rangeName,addCoefRangeName,nCPU,interleave,verbose,splitRange,cloneData), _extended(extended), _weightSq(kFALSE), _first(kTRUE), _offsetSaveW2(0.), _offsetCarrySaveW2(0.) { // Construct likelihood from given p.d.f and (binned or unbinned dataset) // For internal use. // If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector // for a binned likelihood calculation _binnedPdf = binnedL ? (RooRealSumPdf*)_funcClone : 0 ; // Retrieve and cache bin widths needed to convert unnormalized binnedPdf values back to yields if (_binnedPdf) { RooArgSet* obs = _funcClone->getObservables(_dataClone) ; if (obs->getSize()!=1) { _binnedPdf = 0 ; } else { RooRealVar* var = (RooRealVar*) obs->first() ; std::list* boundaries = _binnedPdf->binBoundaries(*var,var->getMin(),var->getMax()) ; std::list::iterator biter = boundaries->begin() ; _binw.resize(boundaries->size()-1) ; Double_t lastBound = (*biter) ; biter++ ; int ibin=0 ; while (biter!=boundaries->end()) { _binw[ibin] = (*biter) - lastBound ; lastBound = (*biter) ; ibin++ ; biter++ ; } } } } //_____________________________________________________________________________ RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata, const RooArgSet& projDeps, Bool_t extended, const char* rangeName,const char* addCoefRangeName, Int_t nCPU,RooFit::MPSplit interleave,Bool_t verbose, Bool_t splitRange, Bool_t cloneData, Bool_t binnedL) : RooAbsOptTestStatistic(name,title,pdf,indata,projDeps,rangeName,addCoefRangeName,nCPU,interleave,verbose,splitRange,cloneData), _extended(extended), _weightSq(kFALSE), _first(kTRUE), _offsetSaveW2(0.), _offsetCarrySaveW2(0.) { // Construct likelihood from given p.d.f and (binned or unbinned dataset) // For internal use. // If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector // for a binned likelihood calculation _binnedPdf = binnedL ? (RooRealSumPdf*)_funcClone : 0 ; // Retrieve and cache bin widths needed to convert unnormalized binnedPdf values back to yields if (_binnedPdf) { RooArgSet* obs = _funcClone->getObservables(_dataClone) ; if (obs->getSize()!=1) { _binnedPdf = 0 ; } else { RooRealVar* var = (RooRealVar*) obs->first() ; std::list* boundaries = _binnedPdf->binBoundaries(*var,var->getMin(),var->getMax()) ; std::list::iterator biter = boundaries->begin() ; _binw.resize(boundaries->size()-1) ; Double_t lastBound = (*biter) ; biter++ ; int ibin=0 ; while (biter!=boundaries->end()) { _binw[ibin] = (*biter) - lastBound ; lastBound = (*biter) ; ibin++ ; biter++ ; } } } } //_____________________________________________________________________________ RooNLLVar::RooNLLVar(const RooNLLVar& other, const char* name) : RooAbsOptTestStatistic(other,name), _extended(other._extended), _weightSq(other._weightSq), _first(kTRUE), _offsetSaveW2(other._offsetSaveW2), _offsetCarrySaveW2(other._offsetCarrySaveW2), _binw(other._binw) { // Copy constructor _binnedPdf = other._binnedPdf ? (RooRealSumPdf*)_funcClone : 0 ; } //_____________________________________________________________________________ RooNLLVar::~RooNLLVar() { // Destructor } //_____________________________________________________________________________ void RooNLLVar::applyWeightSquared(Bool_t flag) { if (_gofOpMode==Slave) { if (flag != _weightSq) { _weightSq = flag; std::swap(_offset, _offsetSaveW2); std::swap(_offsetCarry, _offsetCarrySaveW2); } setValueDirty(); } else if ( _gofOpMode==MPMaster) { for (Int_t i=0 ; i<_nCPU ; i++) _mpfeArray[i]->applyNLLWeightSquared(flag); } else if ( _gofOpMode==SimMaster) { for (Int_t i=0 ; i<_nGof ; i++) ((RooNLLVar*)_gofArray[i])->applyWeightSquared(flag); } } //_____________________________________________________________________________ Double_t RooNLLVar::evaluatePartition(Int_t firstEvent, Int_t lastEvent, Int_t stepSize) const { // Calculate and return likelihood on subset of data from firstEvent to lastEvent // processed with a step size of 'stepSize'. If this an extended likelihood and // and the zero event is processed the extended term is added to the return // likelihood. // Throughout the calculation, we use Kahan's algorithm for summing to // prevent loss of precision - this is a factor four more expensive than // straight addition, but since evaluating the PDF is usually much more // expensive than that, we tolerate the additional cost... Int_t i ; Double_t result(0), carry(0); RooAbsPdf* pdfClone = (RooAbsPdf*) _funcClone ; // cout << "RooNLLVar::evaluatePartition(" << GetName() << ") projDeps = " << (_projDeps?*_projDeps:RooArgSet()) << endl ; _dataClone->store()->recalculateCache( _projDeps, firstEvent, lastEvent, stepSize,(_binnedPdf?kFALSE:kTRUE) ) ; Double_t sumWeight(0), sumWeightCarry(0); // If pdf is marked as binned - do a binned likelihood calculation here (sum of log-Poisson for each bin) if (_binnedPdf) { for (i=firstEvent ; iget(i) ; if (!_dataClone->valid()) continue; Double_t eventWeight = _dataClone->weight(); // Calculate log(Poisson(N|mu) for this bin Double_t N = eventWeight ; Double_t mu = _binnedPdf->getVal()*_binw[i] ; //cout << "RooNLLVar::binnedL(" << GetName() << ") N=" << N << " mu = " << mu << endl ; if (mu<=0 && N>0) { // Catch error condition: data present where zero events are predicted logEvalError(Form("Observed %f events in bin %d with zero event yield",N,i)) ; } else if (fabs(mu)<1e-10 && fabs(N)<1e-10) { // Special handling of this case since log(Poisson(0,0)=0 but can't be calculated with usual log-formula // since log(mu)=0. No update of result is required since term=0. } else { Double_t term = -1*(-mu + N*log(mu) - TMath::LnGamma(N+1)) ; // Kahan summation of sumWeight Double_t y = eventWeight - sumWeightCarry; Double_t t = sumWeight + y; sumWeightCarry = (t - sumWeight) - y; sumWeight = t; // Kahan summation of result y = term - carry; t = result + y; carry = (t - result) - y; result = t; } } } else { for (i=firstEvent ; iget(i) ; if (!_dataClone->valid()) continue; Double_t eventWeight = _dataClone->weight(); if (0. == eventWeight * eventWeight) continue ; if (_weightSq) eventWeight = _dataClone->weightSquared() ; Double_t term = -eventWeight * pdfClone->getLogVal(_normSet); Double_t y = eventWeight - sumWeightCarry; Double_t t = sumWeight + y; sumWeightCarry = (t - sumWeight) - y; sumWeight = t; y = term - carry; t = result + y; carry = (t - result) - y; result = t; } // include the extended maximum likelihood term, if requested if(_extended && _setNum==_extSet) { if (_weightSq) { // Calculate sum of weights-squared here for extended term Double_t sumW2(0), sumW2carry(0); for (i=0 ; i<_dataClone->numEntries() ; i++) { _dataClone->get(i); Double_t y = _dataClone->weightSquared() - sumW2carry; Double_t t = sumW2 + y; sumW2carry = (t - sumW2) - y; sumW2 = t; } Double_t expected= pdfClone->expectedEvents(_dataClone->get()); // Adjust calculation of extended term with W^2 weighting: adjust poisson such that // estimate of Nexpected stays at the same value, but has a different variance, rescale // both the observed and expected count of the Poisson with a factor sum[w] / sum[w^2] // i.e. change Poisson(Nobs = sum[w]| Nexp ) --> Poisson( sum[w] * sum[w] / sum[w^2] | Nexp * sum[w] / sum[w^2] ) Double_t observedW2 = ( _dataClone->sumEntries() * _dataClone->sumEntries() ) / sumW2 ; Double_t expectedW2 = expected * _dataClone->sumEntries() / sumW2 ; Double_t extra= expectedW2 - observedW2*log(expectedW2); // Double_t y = pdfClone->extendedTerm(sumW2, _dataClone->get()) - carry; Double_t y = extra - carry ; Double_t t = result + y; carry = (t - result) - y; result = t; } else { Double_t y = pdfClone->extendedTerm(_dataClone->sumEntries(), _dataClone->get()) - carry; Double_t t = result + y; carry = (t - result) - y; result = t; } } } // If part of simultaneous PDF normalize probability over // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n) if (_simCount>1) { Double_t y = sumWeight*log(1.0*_simCount) - carry; Double_t t = result + y; carry = (t - result) - y; result = t; } //timer.Stop() ; //cout << "RooNLLVar::evalPart(" << GetName() << ") SET=" << _setNum << " first=" << firstEvent << ", last=" << lastEvent << ", step=" << stepSize << ") result = " << result << " CPU = " << timer.CpuTime() << endl ; // At the end of the first full calculation, wire the caches if (_first) { _first = kFALSE ; _funcClone->wireAllCaches() ; } // Check if value offset flag is set. if (_doOffset) { // If no offset is stored enable this feature now if (_offset==0 && result !=0 ) { coutI(Minimization) << "RooNLLVar::evaluatePartition(" << GetName() << ") first = "<< firstEvent << " last = " << lastEvent << " Likelihood offset now set to " << result << std::endl ; _offset = result ; _offsetCarry = carry; } // Substract offset Double_t y = -_offset - (carry + _offsetCarry); Double_t t = result + y; carry = (t - result) - y; result = t; } _evalCarry = carry; return result ; }