/*****************************************************************************
* Project: RooFit *
* Package: RooFitCore *
* @(#)root/roofitcore:$Id$
* Authors: *
* WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
* DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
* *
* Copyright (c) 2000-2005, Regents of the University of California *
* and Stanford University. All rights reserved. *
* *
* Redistribution and use in source and binary forms, *
* with or without modification, are permitted according to the terms *
* listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
*****************************************************************************/
//////////////////////////////////////////////////////////////////////////////
//
// BEGIN_HTML
// Class RooNLLVar implements a a -log(likelihood) calculation from a dataset
// and a PDF. The NLL is calculated as
//
// Sum[data] -log( pdf(x_data) )
//
// In extended mode, a (Nexpect - Nobserved*log(NExpected) term is added
// END_HTML
//
#include
#include "RooFit.h"
#include "Riostream.h"
#include "TMath.h"
#include "RooNLLVar.h"
#include "RooAbsData.h"
#include "RooAbsPdf.h"
#include "RooCmdConfig.h"
#include "RooMsgService.h"
#include "RooAbsDataStore.h"
#include "RooRealMPFE.h"
#include "RooRealSumPdf.h"
#include "RooRealVar.h"
#include "RooProdPdf.h"
ClassImp(RooNLLVar)
;
RooArgSet RooNLLVar::_emptySet ;
//_____________________________________________________________________________
RooNLLVar::RooNLLVar(const char *name, const char* title, RooAbsPdf& pdf, RooAbsData& indata,
const RooCmdArg& arg1, const RooCmdArg& arg2,const RooCmdArg& arg3,
const RooCmdArg& arg4, const RooCmdArg& arg5,const RooCmdArg& arg6,
const RooCmdArg& arg7, const RooCmdArg& arg8,const RooCmdArg& arg9) :
RooAbsOptTestStatistic(name,title,pdf,indata,
*(const RooArgSet*)RooCmdConfig::decodeObjOnTheFly("RooNLLVar::RooNLLVar","ProjectedObservables",0,&_emptySet
,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
RooCmdConfig::decodeStringOnTheFly("RooNLLVar::RooNLLVar","RangeWithName",0,"",arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
RooCmdConfig::decodeStringOnTheFly("RooNLLVar::RooNLLVar","AddCoefRange",0,"",arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","NumCPU",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
RooFit::BulkPartition,
RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","Verbose",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","SplitRange",0,0,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","CloneData",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9))
{
// Construct likelihood from given p.d.f and (binned or unbinned dataset)
//
// Extended() -- Include extended term in calculation
// NumCPU() -- Activate parallel processing feature
// Range() -- Fit only selected region
// SumCoefRange() -- Set the range in which to interpret the coefficients of RooAddPdf components
// SplitRange() -- Fit range is split by index catory of simultaneous PDF
// ConditionalObservables() -- Define conditional observables
// Verbose() -- Verbose output of GOF framework classes
// CloneData() -- Clone input dataset for internal use (default is kTRUE)
RooCmdConfig pc("RooNLLVar::RooNLLVar") ;
pc.allowUndefined() ;
pc.defineInt("extended","Extended",0,kFALSE) ;
pc.process(arg1) ; pc.process(arg2) ; pc.process(arg3) ;
pc.process(arg4) ; pc.process(arg5) ; pc.process(arg6) ;
pc.process(arg7) ; pc.process(arg8) ; pc.process(arg9) ;
_extended = pc.getInt("extended") ;
_weightSq = kFALSE ;
_first = kTRUE ;
_offset = 0.;
_offsetCarry = 0.;
_offsetSaveW2 = 0.;
_offsetCarrySaveW2 = 0.;
_binnedPdf = 0 ;
}
//_____________________________________________________________________________
RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
Bool_t extended, const char* rangeName, const char* addCoefRangeName,
Int_t nCPU, RooFit::MPSplit interleave, Bool_t verbose, Bool_t splitRange, Bool_t cloneData, Bool_t binnedL) :
RooAbsOptTestStatistic(name,title,pdf,indata,RooArgSet(),rangeName,addCoefRangeName,nCPU,interleave,verbose,splitRange,cloneData),
_extended(extended),
_weightSq(kFALSE),
_first(kTRUE), _offsetSaveW2(0.), _offsetCarrySaveW2(0.)
{
// Construct likelihood from given p.d.f and (binned or unbinned dataset)
// For internal use.
// If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector
// for a binned likelihood calculation
_binnedPdf = binnedL ? (RooRealSumPdf*)_funcClone : 0 ;
// Retrieve and cache bin widths needed to convert unnormalized binnedPdf values back to yields
if (_binnedPdf) {
RooArgSet* obs = _funcClone->getObservables(_dataClone) ;
if (obs->getSize()!=1) {
_binnedPdf = 0 ;
} else {
RooRealVar* var = (RooRealVar*) obs->first() ;
std::list* boundaries = _binnedPdf->binBoundaries(*var,var->getMin(),var->getMax()) ;
std::list::iterator biter = boundaries->begin() ;
_binw.resize(boundaries->size()-1) ;
Double_t lastBound = (*biter) ;
biter++ ;
int ibin=0 ;
while (biter!=boundaries->end()) {
_binw[ibin] = (*biter) - lastBound ;
lastBound = (*biter) ;
ibin++ ;
biter++ ;
}
}
}
}
//_____________________________________________________________________________
RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
const RooArgSet& projDeps, Bool_t extended, const char* rangeName,const char* addCoefRangeName,
Int_t nCPU,RooFit::MPSplit interleave,Bool_t verbose, Bool_t splitRange, Bool_t cloneData, Bool_t binnedL) :
RooAbsOptTestStatistic(name,title,pdf,indata,projDeps,rangeName,addCoefRangeName,nCPU,interleave,verbose,splitRange,cloneData),
_extended(extended),
_weightSq(kFALSE),
_first(kTRUE), _offsetSaveW2(0.), _offsetCarrySaveW2(0.)
{
// Construct likelihood from given p.d.f and (binned or unbinned dataset)
// For internal use.
// If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector
// for a binned likelihood calculation
_binnedPdf = binnedL ? (RooRealSumPdf*)_funcClone : 0 ;
// Retrieve and cache bin widths needed to convert unnormalized binnedPdf values back to yields
if (_binnedPdf) {
RooArgSet* obs = _funcClone->getObservables(_dataClone) ;
if (obs->getSize()!=1) {
_binnedPdf = 0 ;
} else {
RooRealVar* var = (RooRealVar*) obs->first() ;
std::list* boundaries = _binnedPdf->binBoundaries(*var,var->getMin(),var->getMax()) ;
std::list::iterator biter = boundaries->begin() ;
_binw.resize(boundaries->size()-1) ;
Double_t lastBound = (*biter) ;
biter++ ;
int ibin=0 ;
while (biter!=boundaries->end()) {
_binw[ibin] = (*biter) - lastBound ;
lastBound = (*biter) ;
ibin++ ;
biter++ ;
}
}
}
}
//_____________________________________________________________________________
RooNLLVar::RooNLLVar(const RooNLLVar& other, const char* name) :
RooAbsOptTestStatistic(other,name),
_extended(other._extended),
_weightSq(other._weightSq),
_first(kTRUE), _offsetSaveW2(other._offsetSaveW2),
_offsetCarrySaveW2(other._offsetCarrySaveW2),
_binw(other._binw) {
// Copy constructor
_binnedPdf = other._binnedPdf ? (RooRealSumPdf*)_funcClone : 0 ;
}
//_____________________________________________________________________________
RooNLLVar::~RooNLLVar()
{
// Destructor
}
//_____________________________________________________________________________
void RooNLLVar::applyWeightSquared(Bool_t flag)
{
if (_gofOpMode==Slave) {
if (flag != _weightSq) {
_weightSq = flag;
std::swap(_offset, _offsetSaveW2);
std::swap(_offsetCarry, _offsetCarrySaveW2);
}
setValueDirty();
} else if ( _gofOpMode==MPMaster) {
for (Int_t i=0 ; i<_nCPU ; i++)
_mpfeArray[i]->applyNLLWeightSquared(flag);
} else if ( _gofOpMode==SimMaster) {
for (Int_t i=0 ; i<_nGof ; i++)
((RooNLLVar*)_gofArray[i])->applyWeightSquared(flag);
}
}
//_____________________________________________________________________________
Double_t RooNLLVar::evaluatePartition(Int_t firstEvent, Int_t lastEvent, Int_t stepSize) const
{
// Calculate and return likelihood on subset of data from firstEvent to lastEvent
// processed with a step size of 'stepSize'. If this an extended likelihood and
// and the zero event is processed the extended term is added to the return
// likelihood.
// Throughout the calculation, we use Kahan's algorithm for summing to
// prevent loss of precision - this is a factor four more expensive than
// straight addition, but since evaluating the PDF is usually much more
// expensive than that, we tolerate the additional cost...
Int_t i ;
Double_t result(0), carry(0);
RooAbsPdf* pdfClone = (RooAbsPdf*) _funcClone ;
// cout << "RooNLLVar::evaluatePartition(" << GetName() << ") projDeps = " << (_projDeps?*_projDeps:RooArgSet()) << endl ;
_dataClone->store()->recalculateCache( _projDeps, firstEvent, lastEvent, stepSize,(_binnedPdf?kFALSE:kTRUE) ) ;
Double_t sumWeight(0), sumWeightCarry(0);
// If pdf is marked as binned - do a binned likelihood calculation here (sum of log-Poisson for each bin)
if (_binnedPdf) {
for (i=firstEvent ; iget(i) ;
if (!_dataClone->valid()) continue;
Double_t eventWeight = _dataClone->weight();
// Calculate log(Poisson(N|mu) for this bin
Double_t N = eventWeight ;
Double_t mu = _binnedPdf->getVal()*_binw[i] ;
//cout << "RooNLLVar::binnedL(" << GetName() << ") N=" << N << " mu = " << mu << endl ;
if (mu<=0 && N>0) {
// Catch error condition: data present where zero events are predicted
logEvalError(Form("Observed %f events in bin %d with zero event yield",N,i)) ;
} else if (fabs(mu)<1e-10 && fabs(N)<1e-10) {
// Special handling of this case since log(Poisson(0,0)=0 but can't be calculated with usual log-formula
// since log(mu)=0. No update of result is required since term=0.
} else {
Double_t term = -1*(-mu + N*log(mu) - TMath::LnGamma(N+1)) ;
// Kahan summation of sumWeight
Double_t y = eventWeight - sumWeightCarry;
Double_t t = sumWeight + y;
sumWeightCarry = (t - sumWeight) - y;
sumWeight = t;
// Kahan summation of result
y = term - carry;
t = result + y;
carry = (t - result) - y;
result = t;
}
}
} else {
for (i=firstEvent ; iget(i) ;
if (!_dataClone->valid()) continue;
Double_t eventWeight = _dataClone->weight();
if (0. == eventWeight * eventWeight) continue ;
if (_weightSq) eventWeight = _dataClone->weightSquared() ;
Double_t term = -eventWeight * pdfClone->getLogVal(_normSet);
Double_t y = eventWeight - sumWeightCarry;
Double_t t = sumWeight + y;
sumWeightCarry = (t - sumWeight) - y;
sumWeight = t;
y = term - carry;
t = result + y;
carry = (t - result) - y;
result = t;
}
// include the extended maximum likelihood term, if requested
if(_extended && _setNum==_extSet) {
if (_weightSq) {
// Calculate sum of weights-squared here for extended term
Double_t sumW2(0), sumW2carry(0);
for (i=0 ; i<_dataClone->numEntries() ; i++) {
_dataClone->get(i);
Double_t y = _dataClone->weightSquared() - sumW2carry;
Double_t t = sumW2 + y;
sumW2carry = (t - sumW2) - y;
sumW2 = t;
}
Double_t expected= pdfClone->expectedEvents(_dataClone->get());
// Adjust calculation of extended term with W^2 weighting: adjust poisson such that
// estimate of Nexpected stays at the same value, but has a different variance, rescale
// both the observed and expected count of the Poisson with a factor sum[w] / sum[w^2]
// i.e. change Poisson(Nobs = sum[w]| Nexp ) --> Poisson( sum[w] * sum[w] / sum[w^2] | Nexp * sum[w] / sum[w^2] )
Double_t observedW2 = ( _dataClone->sumEntries() * _dataClone->sumEntries() ) / sumW2 ;
Double_t expectedW2 = expected * _dataClone->sumEntries() / sumW2 ;
Double_t extra= expectedW2 - observedW2*log(expectedW2);
// Double_t y = pdfClone->extendedTerm(sumW2, _dataClone->get()) - carry;
Double_t y = extra - carry ;
Double_t t = result + y;
carry = (t - result) - y;
result = t;
} else {
Double_t y = pdfClone->extendedTerm(_dataClone->sumEntries(), _dataClone->get()) - carry;
Double_t t = result + y;
carry = (t - result) - y;
result = t;
}
}
}
// If part of simultaneous PDF normalize probability over
// number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
if (_simCount>1) {
Double_t y = sumWeight*log(1.0*_simCount) - carry;
Double_t t = result + y;
carry = (t - result) - y;
result = t;
}
//timer.Stop() ;
//cout << "RooNLLVar::evalPart(" << GetName() << ") SET=" << _setNum << " first=" << firstEvent << ", last=" << lastEvent << ", step=" << stepSize << ") result = " << result << " CPU = " << timer.CpuTime() << endl ;
// At the end of the first full calculation, wire the caches
if (_first) {
_first = kFALSE ;
_funcClone->wireAllCaches() ;
}
// Check if value offset flag is set.
if (_doOffset) {
// If no offset is stored enable this feature now
if (_offset==0 && result !=0 ) {
coutI(Minimization) << "RooNLLVar::evaluatePartition(" << GetName() << ") first = "<< firstEvent << " last = " << lastEvent << " Likelihood offset now set to " << result << std::endl ;
_offset = result ;
_offsetCarry = carry;
}
// Substract offset
Double_t y = -_offset - (carry + _offsetCarry);
Double_t t = result + y;
carry = (t - result) - y;
result = t;
}
_evalCarry = carry;
return result ;
}