// @(#)root/tmva $Id$ // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Helge Voss /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * * Package: TMVA * * Class : DataSetFactory * * Web : http://tmva.sourceforge.net * * * * Description: * * Contains all the data information * * * * Authors (alphabetical): * * Andreas Hoecker - CERN, Switzerland * * Joerg Stelzer - CERN, Switzerland * * Peter Speckmayer - CERN, Switzerland * * Eckhard von Toerne - U. of Bonn, Germany * * Helge Voss - MPI-K Heidelberg, Germany * * * * Copyright (c) 2006: * * CERN, Switzerland * * MPI-K Heidelberg, Germany * * * * Redistribution and use in source and binary forms, with or without * * modification, are permitted according to the terms listed in LICENSE * * (http://tmva.sourceforge.net/LICENSE) * **********************************************************************************/ #ifndef ROOT_TMVA_DataSetFactory #define ROOT_TMVA_DataSetFactory ////////////////////////////////////////////////////////////////////////// // // // DataSetFactory // // // // Class that contains all the data information // // // ////////////////////////////////////////////////////////////////////////// #include #include #ifndef ROOT_TString #include "TString.h" #endif #ifndef ROOT_TTree #include "TTree.h" #endif #ifndef ROOT_TCut #include "TCut.h" #endif #ifndef ROOT_TTreeFormula #include "TTreeFormula.h" #endif #ifndef ROOT_TMatrixDfwd #include "TMatrixDfwd.h" #endif #ifndef ROOT_TPrincipal #include "TPrincipal.h" #endif #ifndef ROOT_TRandom3 #include "TRandom3.h" #endif #ifndef ROOT_TMVA_Types #include "TMVA/Types.h" #endif #ifndef ROOT_TMVA_VariableInfo #include "TMVA/VariableInfo.h" #endif #ifndef ROOT_TMVA_Event #include "TMVA/Event.h" #endif namespace TMVA { class DataSet; class DataSetInfo; class DataInputHandler; class TreeInfo; class MsgLogger; // =============== maybe move these elswhere (e.g. into the tools ) // =============== functors ======================= class RandomGenerator { public: RandomGenerator( UInt_t seed ){ fRandom.SetSeed( seed ); } UInt_t operator() ( UInt_t n ) { return fRandom.Integer(n); } private: TRandom3 fRandom; // random generator }; // delete-functor (to be used in e.g. for_each algorithm) template struct DeleteFunctor_t { DeleteFunctor_t& operator()(const T* p) { delete p; return *this; } }; template DeleteFunctor_t DeleteFunctor() { return DeleteFunctor_t(); } template< typename T > class Increment { T value; public: Increment( T start ) : value( start ){ } T operator()() { return value++; } }; template class null_t { private: // returns argF public: typedef F argument_type; F operator()(const F& argF) const { return argF; } }; template inline null_t null() { return null_t(); } template class compose_binary_t : public std::binary_function { private: const F& f; // f(g(argG),h(argH)) const G& g; const H& h; public: compose_binary_t(const F& _f, const G& _g, const H& _h) : f(_f), g(_g), h(_h) { } typename F::result_type operator()(const typename G::argument_type& argG, const typename H::argument_type& argH) const { return f(g(argG),h(argH)); } }; template inline compose_binary_t compose_binary(const F& _f, const G& _g, const H& _h) { return compose_binary_t(_f,_g,_h); } template class compose_unary_t : public std::unary_function { private: const F& f; // f(g(argG)) const G& g; public: compose_unary_t(const F& _f, const G& _g) : f(_f), g(_g) { } typename F::result_type operator()(const typename G::argument_type& argG) const { return f(g(argG)); } }; template inline compose_unary_t compose_unary(const F& _f, const G& _g) { return compose_unary_t(_f,_g); } // =============== functors ======================= // ========================================================= class DataSetFactory { typedef std::vector EventVector; typedef std::vector< EventVector > EventVectorOfClasses; typedef std::map EventVectorOfClassesOfTreeType; typedef std::map EventVectorOfTreeType; typedef std::vector< Double_t > ValuePerClass; typedef std::map ValuePerClassOfTreeType; class EventStats { public: Int_t nTrainingEventsRequested; Int_t nTestingEventsRequested; Int_t nInitialEvents; Int_t nEvBeforeCut; Int_t nEvAfterCut; Float_t nWeEvBeforeCut; Float_t nWeEvAfterCut; Double_t nNegWeights; Float_t* varAvLength; EventStats(): nTrainingEventsRequested(0), nTestingEventsRequested(0), nInitialEvents(0), nEvBeforeCut(0), nEvAfterCut(0), nWeEvBeforeCut(0), nWeEvAfterCut(0), nNegWeights(0), varAvLength(0) {} ~EventStats() { delete[] varAvLength; } Float_t cutScaling() const { return Float_t(nEvAfterCut)/nEvBeforeCut; } }; typedef std::vector< int > NumberPerClass; typedef std::vector< EventStats > EvtStatsPerClass; public: // singleton class static DataSetFactory& Instance() { if (!fgInstance) fgInstance = new DataSetFactory(); return *fgInstance; } static void destroyInstance() { if (fgInstance) { delete fgInstance; fgInstance=0; } } DataSet* CreateDataSet( DataSetInfo &, DataInputHandler& ); protected: ~DataSetFactory(); DataSetFactory(); static DataSetFactory *fgInstance; DataSet* BuildInitialDataSet( DataSetInfo&, TMVA::DataInputHandler& ); DataSet* BuildDynamicDataSet( DataSetInfo& ); // ---------- new versions void BuildEventVector ( DataSetInfo& dsi, DataInputHandler& dataInput, EventVectorOfClassesOfTreeType& eventsmap, EvtStatsPerClass& eventCounts); DataSet* MixEvents ( DataSetInfo& dsi, EventVectorOfClassesOfTreeType& eventsmap, EvtStatsPerClass& eventCounts, const TString& splitMode, const TString& mixMode, const TString& normMode, UInt_t splitSeed); void RenormEvents ( DataSetInfo& dsi, EventVectorOfClassesOfTreeType& eventsmap, const EvtStatsPerClass& eventCounts, const TString& normMode ); void InitOptions ( DataSetInfo& dsi, EvtStatsPerClass& eventsmap, TString& normMode, UInt_t& splitSeed, TString& splitMode, TString& mixMode ); // ------------------------ // auxiliary functions to compute correlations TMatrixD* CalcCorrelationMatrix( DataSet*, const UInt_t classNumber ); TMatrixD* CalcCovarianceMatrix ( DataSet*, const UInt_t classNumber ); void CalcMinMax ( DataSet*, DataSetInfo& dsi ); // resets branch addresses to current event void ResetBranchAndEventAddresses( TTree* ); void ResetCurrentTree() { fCurrentTree = 0; } void ChangeToNewTree( TreeInfo&, const DataSetInfo & ); Bool_t CheckTTreeFormula( TTreeFormula* ttf, const TString& expression, Bool_t& hasDollar ); // verbosity Bool_t Verbose() { return fVerbose; } // data members // verbosity Bool_t fVerbose; //! Verbosity TString fVerboseLevel; //! VerboseLevel Bool_t fScaleWithPreselEff; //! how to deal with requested #events in connection with preselection cuts // the event mutable TTree* fCurrentTree; //! the tree, events are currently read from mutable UInt_t fCurrentEvtIdx; //! the current event (to avoid reading of the same event) // the formulas for reading the original tree std::vector fInputFormulas; //! input variables std::vector fTargetFormulas; //! targets std::vector fCutFormulas; //! cuts std::vector fWeightFormula; //! weights std::vector fSpectatorFormulas; //! spectators mutable MsgLogger* fLogger; //! message logger MsgLogger& Log() const { return *fLogger; } }; } #endif