// @(#)root/tmva $Id$ // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * * Package: TMVA * * Class : Rule * * Web : http://tmva.sourceforge.net * * * * Description: * * A class describung a 'rule' * * Each internal node of a tree defines a rule from all the parental nodes. * * A rule with 0 or 1 nodes in the list is a root rule -> corresponds to a0. * * Input: a decision tree (in the constructor) * * its coefficient * * * * * * Authors (alphabetical): * * Fredrik Tegenfeldt - Iowa State U., USA * * * * Copyright (c) 2005: * * CERN, Switzerland * * Iowa State U. * * MPI-K Heidelberg, Germany * * * * Redistribution and use in source and binary forms, with or without * * modification, are permitted according to the terms listed in LICENSE * * (http://tmva.sourceforge.net/LICENSE) * **********************************************************************************/ #include #include "TKey.h" #include "TRandom3.h" #include "TMVA/SeparationBase.h" #include "TMVA/GiniIndex.h" #include "TMVA/RuleFit.h" #include "TMVA/MethodRuleFit.h" #include "TMVA/Timer.h" #include "TMVA/Tools.h" #include "TMVA/Factory.h" // for root base dir ClassImp(TMVA::RuleFit) //_______________________________________________________________________ TMVA::RuleFit::RuleFit( const MethodBase *rfbase ) : fVisHistsUseImp( kTRUE ), fLogger( new MsgLogger("RuleFit") ) { // constructor Initialize( rfbase ); std::srand( randSEED ); // initialize random number generator used by std::random_shuffle } //_______________________________________________________________________ TMVA::RuleFit::RuleFit() : fNTreeSample(0) , fNEveEffTrain(0) , fMethodRuleFit(0) , fMethodBase(0) , fVisHistsUseImp( kTRUE ) , fLogger( new MsgLogger("RuleFit") ) { // default constructor std::srand( randSEED ); // initialize random number generator used by std::random_shuffle } //_______________________________________________________________________ TMVA::RuleFit::~RuleFit() { // destructor delete fLogger; } //_______________________________________________________________________ void TMVA::RuleFit::InitNEveEff() { // init effective number of events (using event weights) UInt_t neve = fTrainingEvents.size(); if (neve==0) return; // fNEveEffTrain = CalcWeightSum( &fTrainingEvents ); // } //_______________________________________________________________________ void TMVA::RuleFit::InitPtrs( const MethodBase *rfbase ) { // initialize pointers this->SetMethodBase(rfbase); fRuleEnsemble.Initialize( this ); fRuleFitParams.SetRuleFit( this ); } //_______________________________________________________________________ void TMVA::RuleFit::Initialize( const MethodBase *rfbase ) { // initialize the parameters of the RuleFit method and make rules InitPtrs(rfbase); if (fMethodRuleFit){ fMethodRuleFit->Data()->SetCurrentType(Types::kTraining); UInt_t nevents = fMethodRuleFit->Data()->GetNTrainingEvents(); std::vector tmp; for (Long64_t ievt=0; ievtGetEvent(ievt); tmp.push_back(event); } SetTrainingEvents( tmp ); } // SetTrainingEvents( fMethodRuleFit->GetTrainingEvents() ); InitNEveEff(); MakeForest(); // Make the model - Rule + Linear (if fDoLinear is true) fRuleEnsemble.MakeModel(); // init rulefit params fRuleFitParams.Init(); } //_______________________________________________________________________ void TMVA::RuleFit::SetMethodBase( const MethodBase *rfbase ) { // set MethodBase fMethodBase = rfbase; fMethodRuleFit = dynamic_cast(rfbase); } //_______________________________________________________________________ void TMVA::RuleFit::Copy( const RuleFit& other ) { // copy method if(this != &other) { fMethodRuleFit = other.GetMethodRuleFit(); fMethodBase = other.GetMethodBase(); fTrainingEvents = other.GetTrainingEvents(); // fSubsampleEvents = other.GetSubsampleEvents(); fForest = other.GetForest(); fRuleEnsemble = other.GetRuleEnsemble(); } } //_______________________________________________________________________ Double_t TMVA::RuleFit::CalcWeightSum( const std::vector *events, UInt_t neve ) { // calculate the sum of weights if (events==0) return 0.0; if (neve==0) neve=events->size(); // Double_t sumw=0; for (UInt_t ie=0; ieGetWeight(); } return sumw; } //_______________________________________________________________________ void TMVA::RuleFit::SetMsgType( EMsgType t ) { // set the current message type to that of mlog for this class and all other subtools fLogger->SetMinType(t); fRuleEnsemble.SetMsgType(t); fRuleFitParams.SetMsgType(t); } //_______________________________________________________________________ void TMVA::RuleFit::BuildTree( DecisionTree *dt ) { // build the decision tree using fNTreeSample events from fTrainingEventsRndm if (dt==0) return; if (fMethodRuleFit==0) { Log() << kFATAL << "RuleFit::BuildTree() - Attempting to build a tree NOT from a MethodRuleFit" << Endl; } std::vector evevec; for (UInt_t ie=0; ieBuildTree(evevec); if (fMethodRuleFit->GetPruneMethod() != DecisionTree::kNoPruning) { dt->SetPruneMethod(fMethodRuleFit->GetPruneMethod()); dt->SetPruneStrength(fMethodRuleFit->GetPruneStrength()); dt->PruneTree(); } } //_______________________________________________________________________ void TMVA::RuleFit::MakeForest() { // make a forest of decisiontrees if (fMethodRuleFit==0) { Log() << kFATAL << "RuleFit::BuildTree() - Attempting to build a tree NOT from a MethodRuleFit" << Endl; } Log() << kDEBUG << "Creating a forest with " << fMethodRuleFit->GetNTrees() << " decision trees" << Endl; Log() << kDEBUG << "Each tree is built using a random subsample with " << fNTreeSample << " events" << Endl; // Timer timer( fMethodRuleFit->GetNTrees(), "RuleFit" ); // Double_t fsig; Int_t nsig,nbkg; // TRandom3 rndGen; // // // First save all event weights. // Weights are modifed by the boosting. // Those weights we do not want for the later fitting. // Bool_t useBoost = fMethodRuleFit->UseBoost(); // (AdaBoost (True) or RandomForest/Tree (False) if (useBoost) SaveEventWeights(); for (Int_t i=0; iGetNTrees(); i++) { // timer.DrawProgressBar(i); if (!useBoost) ReshuffleEvents(); nsig=0; nbkg=0; for (UInt_t ie = 0; ieDataInfo().IsSignal(fTrainingEventsRndm[ie])) nsig++; // ignore weights here else nbkg++; } // fsig = Double_t(nsig)/Double_t(nsig+nbkg); // do not implement the above in this release...just set it to default DecisionTree *dt; Bool_t tryAgain=kTRUE; Int_t ntries=0; const Int_t ntriesMax=10; Double_t frnd; while (tryAgain) { frnd = 100*rndGen.Uniform( fMethodRuleFit->GetMinFracNEve(), 0.5*fMethodRuleFit->GetMaxFracNEve() ); Int_t iclass = 0; // event class being treated as signal during training Bool_t useRandomisedTree = !useBoost; dt = new DecisionTree( fMethodRuleFit->GetSeparationBase(), frnd, fMethodRuleFit->GetNCuts(), iclass, useRandomisedTree); dt->SetNVars(fMethodBase->GetNvar()); BuildTree(dt); // reads fNTreeSample events from fTrainingEventsRndm if (dt->GetNNodes()<3) { delete dt; dt=0; } ntries++; tryAgain = ((dt==0) && (ntries N(nodes) = " << fForest.back()->GetNNodes() << " ; n(tries) = " << ntries << Endl; } // Now restore event weights if (useBoost) RestoreEventWeights(); // print statistics on the forest created ForestStatistics(); } //_______________________________________________________________________ void TMVA::RuleFit::SaveEventWeights() { // save event weights - must be done before making the forest fEventWeights.clear(); for (std::vector::iterator e=fTrainingEvents.begin(); e!=fTrainingEvents.end(); e++) { Double_t w = (*e)->GetBoostWeight(); fEventWeights.push_back(w); } } //_______________________________________________________________________ void TMVA::RuleFit::RestoreEventWeights() { // save event weights - must be done before making the forest UInt_t ie=0; if (fEventWeights.size() != fTrainingEvents.size()) { Log() << kERROR << "RuleFit::RestoreEventWeights() called without having called SaveEventWeights() before!" << Endl; return; } for (std::vector::iterator e=fTrainingEvents.begin(); e!=fTrainingEvents.end(); e++) { (*e)->SetBoostWeight(fEventWeights[ie]); ie++; } } //_______________________________________________________________________ void TMVA::RuleFit::Boost( DecisionTree *dt ) { // Boost the events. The algorithm below is the called AdaBoost. // See MethodBDT for details. // Actually, this is a more or less copy of MethodBDT::AdaBoost(). Double_t sumw=0; // sum of initial weights - all events Double_t sumwfalse=0; // idem, only missclassified events // std::vector correctSelected; // <--- boolean stored // for (std::vector::iterator e=fTrainingEvents.begin(); e!=fTrainingEvents.end(); e++) { Bool_t isSignalType = (dt->CheckEvent(*e,kTRUE) > 0.5 ); Double_t w = (*e)->GetWeight(); sumw += w; // if (isSignalType == fMethodBase->DataInfo().IsSignal(*e)) { // correctly classified correctSelected.push_back(kTRUE); } else { // missclassified sumwfalse+= w; correctSelected.push_back(kFALSE); } } // missclassification error Double_t err = sumwfalse/sumw; // calculate boost weight for missclassified events // use for now the exponent = 1.0 // one could have w = ((1-err)/err)^beta Double_t boostWeight = (err>0 ? (1.0-err)/err : 1000.0); Double_t newSumw=0.0; UInt_t ie=0; // set new weight to missclassified events for (std::vector::iterator e=fTrainingEvents.begin(); e!=fTrainingEvents.end(); e++) { if (!correctSelected[ie]) (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostWeight); newSumw+=(*e)->GetWeight(); ie++; } // reweight all events Double_t scale = sumw/newSumw; for (std::vector::iterator e=fTrainingEvents.begin(); e!=fTrainingEvents.end(); e++) { (*e)->SetBoostWeight( (*e)->GetBoostWeight() * scale); } Log() << kDEBUG << "boostWeight = " << boostWeight << " scale = " << scale << Endl; } //_______________________________________________________________________ void TMVA::RuleFit::ForestStatistics() { // summary of statistics of all trees // * end-nodes: average and spread UInt_t ntrees = fForest.size(); if (ntrees==0) return; const DecisionTree *tree; Double_t sumn2 = 0; Double_t sumn = 0; Double_t nd; for (UInt_t i=0; iGetNNodes()); sumn += nd; sumn2 += nd*nd; } Double_t sig = TMath::Sqrt( gTools().ComputeVariance( sumn2, sumn, ntrees )); Log() << kVERBOSE << "Nodes in trees: average & std dev = " << sumn/ntrees << " , " << sig << Endl; } //_______________________________________________________________________ void TMVA::RuleFit::FitCoefficients() { // // Fit the coefficients for the rule ensemble // Log() << kVERBOSE << "Fitting rule/linear terms" << Endl; fRuleFitParams.MakeGDPath(); } //_______________________________________________________________________ void TMVA::RuleFit::CalcImportance() { // calculates the importance of each rule Log() << kVERBOSE << "Calculating importance" << Endl; fRuleEnsemble.CalcImportance(); fRuleEnsemble.CleanupRules(); fRuleEnsemble.CleanupLinear(); fRuleEnsemble.CalcVarImportance(); Log() << kVERBOSE << "Filling rule statistics" << Endl; fRuleEnsemble.RuleResponseStats(); } //_______________________________________________________________________ Double_t TMVA::RuleFit::EvalEvent( const Event& e ) { // evaluate single event return fRuleEnsemble.EvalEvent( e ); } //_______________________________________________________________________ void TMVA::RuleFit::SetTrainingEvents( const std::vector& el ) { // set the training events randomly if (fMethodRuleFit==0) Log() << kFATAL << "RuleFit::SetTrainingEvents - MethodRuleFit not initialized" << Endl; UInt_t neve = el.size(); if (neve==0) Log() << kWARNING << "An empty sample of training events was given" << Endl; // copy vector fTrainingEvents.clear(); fTrainingEventsRndm.clear(); for (UInt_t i=0; i(el[i])); fTrainingEventsRndm.push_back(static_cast< const Event *>(el[i])); } // Re-shuffle the vector, ie, recreate it in a random order std::random_shuffle( fTrainingEventsRndm.begin(), fTrainingEventsRndm.end() ); // fraction events per tree fNTreeSample = static_cast(neve*fMethodRuleFit->GetTreeEveFrac()); Log() << kDEBUG << "Number of events per tree : " << fNTreeSample << " ( N(events) = " << neve << " )" << " randomly drawn without replacement" << Endl; } //_______________________________________________________________________ void TMVA::RuleFit::GetRndmSampleEvents(std::vector< const Event * > & evevec, UInt_t nevents) { // draw a random subsample of the training events without replacement ReshuffleEvents(); if ((nevents0)) { evevec.resize(nevents); for (UInt_t ie=0; ie & hlist) { // normalize rule importance hists // // if all weights are positive, the scale will be 1/maxweight // if minimum weight < 0, then the scale will be 1/max(maxweight,abs(minweight)) // if (hlist.empty()) return; // Double_t wmin=0; Double_t wmax=0; Double_t w,wm; Double_t awmin; Double_t scale; for (UInt_t i=0; iGetMaximum(); wm = hs->GetMinimum(); if (i==0) { wmin=wm; wmax=w; } else { if (w>wmax) wmax=w; if (wmwmax) { scale = 1.0/awmin; usemin = -1.0; usemax = scale*wmax; } else { scale = 1.0/wmax; usemin = scale*wmin; usemax = 1.0; } // for (UInt_t i=0; iScale(scale); hs->SetMinimum(usemin); hs->SetMaximum(usemax); } } //_______________________________________________________________________ void TMVA::RuleFit::FillCut(TH2F* h2, const Rule *rule, Int_t vind) { // Fill cut if (rule==0) return; if (h2==0) return; // Double_t rmin, rmax; Bool_t dormin,dormax; Bool_t ruleHasVar = rule->GetRuleCut()->GetCutRange(vind,rmin,rmax,dormin,dormax); if (!ruleHasVar) return; // Int_t firstbin = h2->GetBin(1,1,1); if(firstbin<0) firstbin=0; Int_t lastbin = h2->GetBin(h2->GetNbinsX(),1,1); Int_t binmin=(dormin ? h2->FindBin(rmin,0.5):firstbin); Int_t binmax=(dormax ? h2->FindBin(rmax,0.5):lastbin); Int_t fbin; Double_t xbinw = h2->GetBinWidth(firstbin); Double_t fbmin = h2->GetBinLowEdge(binmin-firstbin+1); Double_t lbmax = h2->GetBinLowEdge(binmax-firstbin+1)+xbinw; Double_t fbfrac = (dormin ? ((fbmin+xbinw-rmin)/xbinw):1.0); Double_t lbfrac = (dormax ? ((rmax-lbmax+xbinw)/xbinw):1.0); Double_t f; Double_t xc; Double_t val; for (Int_t bin = binmin; binGetBinCenter(fbin); // if (fVisHistsUseImp) { val = rule->GetImportance(); } else { val = rule->GetCoefficient()*rule->GetSupport(); } h2->Fill(xc,0.5,val*f); } } //_______________________________________________________________________ void TMVA::RuleFit::FillLin(TH2F* h2,Int_t vind) { // fill lin if (h2==0) return; if (!fRuleEnsemble.DoLinear()) return; // Int_t firstbin = 1; Int_t lastbin = h2->GetNbinsX(); Double_t xc; Double_t val; if (fVisHistsUseImp) { val = fRuleEnsemble.GetLinImportance(vind); } else { val = fRuleEnsemble.GetLinCoefficients(vind); } for (Int_t bin = firstbin; binGetBinCenter(bin); h2->Fill(xc,0.5,val); } } //_______________________________________________________________________ void TMVA::RuleFit::FillCorr(TH2F* h2,const Rule *rule,Int_t vx, Int_t vy) { // fill rule correlation between vx and vy, weighted with either the importance or the coefficient if (rule==0) return; if (h2==0) return; Double_t val; if (fVisHistsUseImp) { val = rule->GetImportance(); } else { val = rule->GetCoefficient()*rule->GetSupport(); } // Double_t rxmin, rxmax, rymin, rymax; Bool_t dorxmin, dorxmax, dorymin, dorymax; // // Get range in rule for X and Y // Bool_t ruleHasVarX = rule->GetRuleCut()->GetCutRange(vx,rxmin,rxmax,dorxmin,dorxmax); Bool_t ruleHasVarY = rule->GetRuleCut()->GetCutRange(vy,rymin,rymax,dorymin,dorymax); if (!(ruleHasVarX || ruleHasVarY)) return; // min max of varX and varY in hist Double_t vxmin = (dorxmin ? rxmin:h2->GetXaxis()->GetXmin()); Double_t vxmax = (dorxmax ? rxmax:h2->GetXaxis()->GetXmax()); Double_t vymin = (dorymin ? rymin:h2->GetYaxis()->GetXmin()); Double_t vymax = (dorymax ? rymax:h2->GetYaxis()->GetXmax()); // min max bin in X and Y Int_t binxmin = h2->GetXaxis()->FindBin(vxmin); Int_t binxmax = h2->GetXaxis()->FindBin(vxmax); Int_t binymin = h2->GetYaxis()->FindBin(vymin); Int_t binymax = h2->GetYaxis()->FindBin(vymax); // bin widths Double_t xbinw = h2->GetXaxis()->GetBinWidth(binxmin); Double_t ybinw = h2->GetYaxis()->GetBinWidth(binxmin); Double_t xbinmin = h2->GetXaxis()->GetBinLowEdge(binxmin); Double_t xbinmax = h2->GetXaxis()->GetBinLowEdge(binxmax)+xbinw; Double_t ybinmin = h2->GetYaxis()->GetBinLowEdge(binymin); Double_t ybinmax = h2->GetYaxis()->GetBinLowEdge(binymax)+ybinw; // fraction of edges Double_t fxbinmin = (dorxmin ? ((xbinmin+xbinw-vxmin)/xbinw):1.0); Double_t fxbinmax = (dorxmax ? ((vxmax-xbinmax+xbinw)/xbinw):1.0); Double_t fybinmin = (dorymin ? ((ybinmin+ybinw-vymin)/ybinw):1.0); Double_t fybinmax = (dorymax ? ((vymax-ybinmax+ybinw)/ybinw):1.0); // Double_t fx,fy; Double_t xc,yc; // fill histo for (Int_t binx = binxmin; binxGetXaxis()->GetBinCenter(binx); for (Int_t biny = binymin; binyGetYaxis()->GetBinCenter(biny); h2->Fill(xc,yc,val*fx*fy); } } } //_______________________________________________________________________ void TMVA::RuleFit::FillVisHistCut(const Rule* rule, std::vector & hlist) { // help routine to MakeVisHists() - fills for all variables Int_t nhists = hlist.size(); Int_t nvar = fMethodBase->GetNvar(); if (nhists!=nvar) Log() << kFATAL << "BUG TRAP: number of hists is not equal the number of variables!" << Endl; // std::vector vindex; TString hstr; // not a nice way to do a check... for (Int_t ih=0; ihGetTitle(); for (Int_t iv=0; ivGetInputTitle(iv) == hstr) vindex.push_back(iv); } } // for (Int_t iv=0; ivContainsVariable(vindex[iv])) { FillCut(hlist[iv],rule,vindex[iv]); } } else { FillLin(hlist[iv],vindex[iv]); } } } //_______________________________________________________________________ void TMVA::RuleFit::FillVisHistCorr(const Rule * rule, std::vector & hlist) { // help routine to MakeVisHists() - fills for all correlation plots if (rule==0) return; Double_t ruleimp = rule->GetImportance(); if (!(ruleimp>0)) return; if (ruleimpGetNvar(); Int_t ncorr = (nvar*(nvar+1)/2)-nvar; if (nhists!=ncorr) Log() << kERROR << "BUG TRAP: number of corr hists is not correct! ncorr = " << ncorr << " nvar = " << nvar << " nhists = " << nhists << Endl; // std::vector< std::pair > vindex; TString hstr, var1, var2; Int_t iv1=0,iv2=0; // not a nice way to do a check... for (Int_t ih=0; ihGetName(); if (GetCorrVars( hstr, var1, var2 )) { iv1 = fMethodBase->DataInfo().FindVarIndex( var1 ); iv2 = fMethodBase->DataInfo().FindVarIndex( var2 ); vindex.push_back( std::pair(iv2,iv1) ); // pair X, Y } else { Log() << kERROR << "BUG TRAP: should not be here - failed getting var1 and var2" << Endl; } } // for (Int_t ih=0; ihContainsVariable(vindex[ih].first)) || (rule->ContainsVariable(vindex[ih].second)) ) { FillCorr(hlist[ih],rule,vindex[ih].first,vindex[ih].second); } } } //_______________________________________________________________________ Bool_t TMVA::RuleFit::GetCorrVars(TString & title, TString & var1, TString & var2) { // get first and second variables from title var1=""; var2=""; if(!title.BeginsWith("scat_")) return kFALSE; TString titleCopy = title(5,title.Length()); if(titleCopy.Index("_RF2D")>=0) titleCopy.Remove(titleCopy.Index("_RF2D")); Int_t splitPos = titleCopy.Index("_vs_"); if(splitPos>=0) { // there is a _vs_ in the string var1 = titleCopy(0,splitPos); var2 = titleCopy(splitPos+4, titleCopy.Length()); return kTRUE; } else { var1 = titleCopy; return kFALSE; } } //_______________________________________________________________________ void TMVA::RuleFit::MakeVisHists() { // this will create histograms visualizing the rule ensemble const TString directories[5] = { "InputVariables_Id", "InputVariables_Deco", "InputVariables_PCA", "InputVariables_Gauss", "InputVariables_Gauss_Deco" }; const TString corrDirName = "CorrelationPlots"; TDirectory* rootDir = Factory::RootBaseDir(); TDirectory* varDir = 0; TDirectory* corrDir = 0; TDirectory* methodDir = fMethodBase->BaseDir(); TString varDirName; // Bool_t done=(rootDir==0); Int_t type=0; if (done) { Log() << kWARNING << "No basedir - BUG??" << Endl; return; } while (!done) { varDir = (TDirectory*)rootDir->Get( directories[type] ); type++; done = ((varDir!=0) || (type>4)); } if (varDir==0) { Log() << kWARNING << "No input variable directory found - BUG?" << Endl; return; } corrDir = (TDirectory*)varDir->Get( corrDirName ); if (corrDir==0) { Log() << kWARNING << "No correlation directory found" << Endl; Log() << kWARNING << "Check for other warnings related to correlation histograms" << Endl; return; } if (methodDir==0) { Log() << kWARNING << "No rulefit method directory found - BUG?" << Endl; return; } varDirName = varDir->GetName(); varDir->cd(); // // get correlation plot directory corrDir = (TDirectory *)varDir->Get(corrDirName); if (corrDir==0) { Log() << kWARNING << "No correlation directory found : " << corrDirName << Endl; return; } // how many plots are in the var directory? Int_t noPlots = ((varDir->GetListOfKeys())->GetEntries()) / 2; Log() << kDEBUG << "Got number of plots = " << noPlots << Endl; // loop over all objects in directory std::vector h1Vector; std::vector h2CorrVector; TIter next(varDir->GetListOfKeys()); TKey *key; while ((key = (TKey*)next())) { // make sure, that we only look at histograms TClass *cl = gROOT->GetClass(key->GetClassName()); if (!cl->InheritsFrom(TH1F::Class())) continue; TH1F *sig = (TH1F*)key->ReadObj(); TString hname= sig->GetName(); Log() << kDEBUG << "Got histogram : " << hname << Endl; // check for all signal histograms if (hname.Contains("__S")){ // found a new signal plot TString htitle = sig->GetTitle(); htitle.ReplaceAll("signal",""); TString newname = hname; newname.ReplaceAll("__Signal","__RF"); newname.ReplaceAll("__S","__RF"); methodDir->cd(); TH2F *newhist = new TH2F(newname,htitle,sig->GetNbinsX(),sig->GetXaxis()->GetXmin(),sig->GetXaxis()->GetXmax(), 1,sig->GetYaxis()->GetXmin(),sig->GetYaxis()->GetXmax()); varDir->cd(); h1Vector.push_back( newhist ); } } // corrDir->cd(); TString var1,var2; TIter nextCorr(corrDir->GetListOfKeys()); while ((key = (TKey*)nextCorr())) { // make sure, that we only look at histograms TClass *cl = gROOT->GetClass(key->GetClassName()); if (!cl->InheritsFrom(TH2F::Class())) continue; TH2F *sig = (TH2F*)key->ReadObj(); TString hname= sig->GetName(); // check for all signal histograms if ((hname.Contains("scat_")) && (hname.Contains("_Signal"))) { Log() << kDEBUG << "Got histogram (2D) : " << hname << Endl; TString htitle = sig->GetTitle(); htitle.ReplaceAll("(Signal)",""); TString newname = hname; newname.ReplaceAll("_Signal","_RF2D"); methodDir->cd(); const Int_t rebin=2; TH2F *newhist = new TH2F(newname,htitle, sig->GetNbinsX()/rebin,sig->GetXaxis()->GetXmin(),sig->GetXaxis()->GetXmax(), sig->GetNbinsY()/rebin,sig->GetYaxis()->GetXmin(),sig->GetYaxis()->GetXmax()); if (GetCorrVars( newname, var1, var2 )) { Int_t iv1 = fMethodBase->DataInfo().FindVarIndex(var1); Int_t iv2 = fMethodBase->DataInfo().FindVarIndex(var2); if (iv1<0) { sig->GetYaxis()->SetTitle(var1); } else { sig->GetYaxis()->SetTitle(fMethodBase->GetInputTitle(iv1)); } if (iv2<0) { sig->GetXaxis()->SetTitle(var2); } else { sig->GetXaxis()->SetTitle(fMethodBase->GetInputTitle(iv2)); } } corrDir->cd(); h2CorrVector.push_back( newhist ); } } varDir->cd(); // fill rules UInt_t nrules = fRuleEnsemble.GetNRules(); const Rule *rule; for (UInt_t i=0; icd(); // fill rules for (UInt_t i=0; icd(); for (UInt_t i=0; iWrite(); for (UInt_t i=0; iWrite(); } //_______________________________________________________________________ void TMVA::RuleFit::MakeDebugHists() { // this will create a histograms intended rather for debugging or for the curious user TDirectory* methodDir = fMethodBase->BaseDir(); if (methodDir==0) { Log() << kWARNING << " No rulefit method directory found - bug?" << Endl; return; } // methodDir->cd(); std::vector distances; std::vector fncuts; std::vector fnvars; const Rule *ruleA; const Rule *ruleB; Double_t dABmin=1000000.0; Double_t dABmax=-1.0; UInt_t nrules = fRuleEnsemble.GetNRules(); for (UInt_t i=0; iRuleDist( *ruleB, kTRUE ); if (dAB>-0.5) { UInt_t nc = ruleA->GetNcuts(); UInt_t nv = ruleA->GetNumVarsUsed(); distances.push_back(dAB); fncuts.push_back(static_cast(nc)); fnvars.push_back(static_cast(nv)); if (dABdABmax) dABmax=dAB; } } } // TH1F *histDist = new TH1F("RuleDist","Rule distances",100,dABmin,dABmax); TTree *distNtuple = new TTree("RuleDistNtuple","RuleDist ntuple"); Double_t ntDist; Double_t ntNcuts; Double_t ntNvars; distNtuple->Branch("dist", &ntDist, "dist/D"); distNtuple->Branch("ncuts",&ntNcuts, "ncuts/D"); distNtuple->Branch("nvars",&ntNvars, "nvars/D"); // for (UInt_t i=0; iFill(distances[i]); ntDist = distances[i]; ntNcuts = fncuts[i]; ntNvars = fnvars[i]; distNtuple->Fill(); } distNtuple->Write(); }