// @(#)root/mathcore:$Id$ // Authors: B. Rabacal 11/2010 /********************************************************************** * * * Copyright (c) 2010 , LCG ROOT MathLib Team * * * * * **********************************************************************/ // implementation file for class TKDTreeBinning // #include #include #include #include "TKDTreeBinning.h" #include "Fit/BinData.h" #include "TRandom.h" ClassImp(TKDTreeBinning) //________________________________________________________________________________________________ // Begin_Html //

TKDTreeBinning - A class providing multidimensional binning

// The class implements multidimensional binning by constructing a TKDTree inner structure from the // data which is used as the bins. // The bins are retrieved as two double*, one for the minimum bin edges, // the other as the maximum bin edges. For one dimension one of these is enough to correctly define the bins. // For the multidimensional case both minimum and maximum ones are necessary for the bins to be well defined. // The bin edges of d-dimensional data is a d-tet of the bin's thresholds. For example if d=3 the minimum bin // edges of bin b is of the form of the following array: {xbmin, ybmin, zbmin}. // You also have the possibility to sort the bins by their density. //
// Details of usage can be found in $ROOTSYS/tutorials/math/kdTreeBinning.C and more information on // the embedded TKDTree can be found in http://root.cern.ch/lxr/source/math/mathcore/src/TKDTree.cxx or // http://root.cern.ch/lxr/source/math/mathcore/inc/TKDTree.h. // End_Html struct TKDTreeBinning::CompareAsc { // Boolean functor whose predicate depends on the bin's density. Used for ascending sort. CompareAsc(const TKDTreeBinning* treebins) : bins(treebins) {} Bool_t operator()(UInt_t bin1, UInt_t bin2) { return bins->GetBinDensity(bin1) < bins->GetBinDensity(bin2); } const TKDTreeBinning* bins; }; struct TKDTreeBinning::CompareDesc { // Boolean functor whose predicate depends on the bin's density. Used for descending sort. CompareDesc(const TKDTreeBinning* treebins) : bins(treebins) {} Bool_t operator()(UInt_t bin1, UInt_t bin2) { return bins->GetBinDensity(bin1) > bins->GetBinDensity(bin2); } const TKDTreeBinning* bins; }; TKDTreeBinning::TKDTreeBinning(UInt_t dataSize, UInt_t dataDim, Double_t* data, UInt_t nBins, bool adjustBinEdges) // Class's constructor taking the size of the data points, dimension, a data array and the number // of bins (default = 100). It is reccomended to have the number of bins as an exact divider of // the data size. // The data array must be organized with a stride=1 for the points and = N (the dataSize) for the dimension. // // Thus data[] = x1,x2,x3,......xN, y1,y2,y3......yN, z1,z2,...........zN,.... // // Note that the passed dataSize is not the size of the array but is the number of points (N) // The size of the array must be at least dataDim*dataSize // : fData(0), fBinMinEdges(std::vector()), fBinMaxEdges(std::vector()), fDataBins((TKDTreeID*)0), fDim(dataDim), fDataSize(dataSize), fDataThresholds(std::vector >(fDim, std::make_pair(0., 0.))), fIsSorted(kFALSE), fIsSortedAsc(kFALSE), fBinsContent(std::vector()) { if (adjustBinEdges) SetBit(kAdjustBinEdges); if (data) { SetData(data); SetNBins(nBins); } else { if (!fData) this->Warning("TKDTreeBinning", "Data is nil. Nothing is built."); } } TKDTreeBinning::~TKDTreeBinning() { // Class's destructor if (fData) delete[] fData; if (fDataBins) delete fDataBins; } void TKDTreeBinning::SetNBins(UInt_t bins) { // Sets binning inner structure fNBins = bins; if (fDim && fNBins && fDataSize) { if (fDataSize / fNBins) { Bool_t remainingData = fDataSize % fNBins; if (remainingData) { fNBins += 1; this->Info("SetNBins", "Number of bins is not enough to hold the data. Extra bin added."); } fDataBins = new TKDTreeID(fDataSize, fDim, fDataSize / (fNBins - remainingData)); // TKDTree input is data size, data dimension and the content size of bins ("bucket" size) SetTreeData(); fDataBins->Build(); SetBinsEdges(); SetBinsContent(); } else { fDataBins = (TKDTreeID*)0; this->Warning("SetNBins", "Number of bins is bigger than data size. Nothing is built."); } } else { fDataBins = (TKDTreeID*)0; if (!fDim) this->Warning("SetNBins", "Data dimension is nil. Nothing is built."); if (!fNBins) this->Warning("SetNBins", "Number of bins is nil. Nothing is built."); if (!fDataSize) this->Warning("SetNBins", "Data size is nil. Nothing is built."); } } void TKDTreeBinning::SortBinsByDensity(Bool_t sortAsc) { // Sorts bins by their density if (fDim == 1) { // in one dim they are already sorted (no need to do anything) return; } else { UInt_t* indices = new UInt_t[fNBins]; for (UInt_t i = 0; i < fNBins; ++i) indices[i] = i; if (sortAsc) { std::sort(indices, indices + fNBins, CompareAsc(this)); fIsSortedAsc = kTRUE; } else { std::sort(indices, indices + fNBins, CompareDesc(this)); fIsSortedAsc = kFALSE; } std::vector binMinEdges(fNBins * fDim); std::vector binMaxEdges(fNBins * fDim); std::vector binContent(fNBins ); // reajust also content (not needed bbut better in general!) for (UInt_t i = 0; i < fNBins; ++i) { for (UInt_t j = 0; j < fDim; ++j) { binMinEdges[i * fDim + j] = fBinMinEdges[indices[i] * fDim + j]; binMaxEdges[i * fDim + j] = fBinMaxEdges[indices[i] * fDim + j]; } binContent[i] = fBinsContent[indices[i]]; } fBinMinEdges.swap(binMinEdges); fBinMaxEdges.swap(binMaxEdges); fBinsContent.swap(binContent); // not needed anymore if readjusting bin content all the time // re-adjust content of extra bins if exists // since it is different than the others // if ( fDataSize % fNBins != 0) { // UInt_t k = 0; // Bool_t found = kFALSE; // while (!found) { // if (indices[k] == fNBins - 1) { // found = kTRUE; // break; // } // ++k; // } // fBinsContent[fNBins - 1] = fDataBins->GetBucketSize(); // fBinsContent[k] = fDataSize % fNBins-1; // } delete [] indices; fIsSorted = kTRUE; } } void TKDTreeBinning::SetData(Double_t* data) { // Sets the data and finds minimum and maximum by dimensional coordinate fData = new Double_t*[fDim]; for (UInt_t i = 0; i < fDim; ++i) { fData[i] = &data[i * fDataSize]; fDataThresholds[i] = std::make_pair(*std::min_element(fData[i], fData[i] + fDataSize), *std::max_element(fData[i], fData[i] + fDataSize)); } } void TKDTreeBinning::SetTreeData() { // Sets the data for constructing the kD-tree for (UInt_t i = 0; i < fDim; ++i) fDataBins->SetData(i, fData[i]); } void TKDTreeBinning::SetBinsContent() { // Sets the bins' content fBinsContent.reserve(fNBins); for (UInt_t i = 0; i < fNBins; ++i) fBinsContent[i] = fDataBins->GetBucketSize(); if ( fDataSize % fNBins != 0 ) fBinsContent[fNBins - 1] = fDataSize % (fNBins-1); } void TKDTreeBinning::SetBinsEdges() { // Sets the bins' edges //Double_t* rawBinEdges = fDataBins->GetBoundaryExact(fDataBins->GetNNodes()); Double_t* rawBinEdges = fDataBins->GetBoundary(fDataBins->GetNNodes()); fCheckedBinEdges = std::vector > >(fDim, std::vector >(fNBins, std::make_pair(kFALSE, kFALSE))); fCommonBinEdges = std::vector > >(fDim, std::map >()); SetCommonBinEdges(rawBinEdges); if (TestBit(kAdjustBinEdges) ) { ReadjustMinBinEdges(rawBinEdges); ReadjustMaxBinEdges(rawBinEdges); } SetBinMinMaxEdges(rawBinEdges); fCommonBinEdges.clear(); fCheckedBinEdges.clear(); } void TKDTreeBinning::SetBinMinMaxEdges(Double_t* binEdges) { // Sets the bins' minimum and maximum edges fBinMinEdges.reserve(fNBins * fDim); fBinMaxEdges.reserve(fNBins * fDim); for (UInt_t i = 0; i < fNBins; ++i) { for (UInt_t j = 0; j < fDim; ++j) { fBinMinEdges.push_back(binEdges[(i * fDim + j) * 2]); fBinMaxEdges.push_back(binEdges[(i * fDim + j) * 2 + 1]); } } } void TKDTreeBinning::SetCommonBinEdges(Double_t* binEdges) { // Sets indexing on the bin edges which have common boundaries for (UInt_t i = 0; i < fDim; ++i) { for (UInt_t j = 0; j < fNBins; ++j) { Double_t binEdge = binEdges[(j * fDim + i) * 2]; if(fCommonBinEdges[i].find(binEdge) == fCommonBinEdges[i].end()) { std::vector commonBinEdges; for (UInt_t k = 0; k < fNBins; ++k) { UInt_t minBinEdgePos = (k * fDim + i) * 2; if (std::fabs(binEdge - binEdges[minBinEdgePos]) < std::numeric_limits::epsilon()) commonBinEdges.push_back(minBinEdgePos); UInt_t maxBinEdgePos = ++minBinEdgePos; if (std::fabs(binEdge - binEdges[maxBinEdgePos]) < std::numeric_limits::epsilon()) commonBinEdges.push_back(maxBinEdgePos); } fCommonBinEdges[i][binEdge] = commonBinEdges; } } } } void TKDTreeBinning::ReadjustMinBinEdges(Double_t* binEdges) { // Readjusts the bins' minimum edge by shifting it slightly lower // to avoid overlapping with the data for (UInt_t i = 0; i < fDim; ++i) { for (UInt_t j = 0; j < fNBins; ++j) { if (!fCheckedBinEdges[i][j].first) { Double_t binEdge = binEdges[(j * fDim + i) * 2]; Double_t adjustedBinEdge = binEdge; double eps = -10*std::numeric_limits::epsilon(); if (adjustedBinEdge != 0) adjustedBinEdge *= (1. + eps); else adjustedBinEdge += eps; for (UInt_t k = 0; k < fCommonBinEdges[i][binEdge].size(); ++k) { UInt_t binEdgePos = fCommonBinEdges[i][binEdge][k]; Bool_t isMinBinEdge = binEdgePos % 2 == 0; UInt_t bin = isMinBinEdge ? (binEdgePos / 2 - i) / fDim : ((binEdgePos - 1) / 2 - i) / fDim; binEdges[binEdgePos] = adjustedBinEdge; if (isMinBinEdge) fCheckedBinEdges[i][bin].first = kTRUE; else fCheckedBinEdges[i][bin].second = kTRUE; } } } } } void TKDTreeBinning::ReadjustMaxBinEdges(Double_t* binEdges) { // Readjusts the bins' maximum edge // and shift it sligtly higher for (UInt_t i = 0; i < fDim; ++i) { for (UInt_t j = 0; j < fNBins; ++j) { if (!fCheckedBinEdges[i][j].second) { Double_t& binEdge = binEdges[(j * fDim + i) * 2 + 1]; double eps = 10*std::numeric_limits::epsilon(); if (binEdge != 0) binEdge *= (1. + eps); else binEdge += eps; } } } } const Double_t* TKDTreeBinning::GetBinsMinEdges() const { // Returns the bins' minimum edges if (fDataBins) return &fBinMinEdges[0]; this->Warning("GetBinsMinEdges", "Binning kd-tree is nil. No bin edges retrieved."); this->Info("GetBinsMinEdges", "Returning null pointer."); return (Double_t*)0; } const Double_t* TKDTreeBinning::GetBinsMaxEdges() const { // Returns the bins' maximum edges if (fDataBins) return &fBinMaxEdges[0]; this->Warning("GetBinsMaxEdges", "Binning kd-tree is nil. No bin edges retrieved."); this->Info("GetBinsMaxEdges", "Returning null pointer."); return (Double_t*)0; } std::pair TKDTreeBinning::GetBinsEdges() const { // Returns the bins' edges if (fDataBins) return std::make_pair(GetBinsMinEdges(), GetBinsMaxEdges()); this->Warning("GetBinsEdges", "Binning kd-tree is nil. No bin edges retrieved."); this->Info("GetBinsEdges", "Returning null pointer pair."); return std::make_pair((Double_t*)0, (Double_t*)0); } const Double_t* TKDTreeBinning::GetBinMinEdges(UInt_t bin) const { // Returns the bin's minimum edges. 'bin' is between 0 and fNBins - 1 if (fDataBins) if (bin < fNBins) return &fBinMinEdges[bin * fDim]; else this->Warning("GetBinMinEdges", "No such bin. 'bin' is between 0 and %d", fNBins - 1); else this->Warning("GetBinMinEdges", "Binning kd-tree is nil. No bin edges retrieved."); this->Info("GetBinMinEdges", "Returning null pointer."); return (Double_t*)0; } const Double_t* TKDTreeBinning::GetBinMaxEdges(UInt_t bin) const { // Returns the bin's maximum edges. 'bin' is between 0 and fNBins - 1 if (fDataBins) if (bin < fNBins) return &fBinMaxEdges[bin * fDim]; else this->Warning("GetBinMaxEdges", "No such bin. 'bin' is between 0 and %d", fNBins - 1); else this->Warning("GetBinMaxEdges", "Binning kd-tree is nil. No bin edges retrieved."); this->Info("GetBinMaxEdges", "Returning null pointer."); return (Double_t*)0; } std::pair TKDTreeBinning::GetBinEdges(UInt_t bin) const { // Returns the bin's edges. 'bin' is between 0 and fNBins - 1 if (fDataBins) if (bin < fNBins) return std::make_pair(GetBinMinEdges(bin), GetBinMaxEdges(bin)); else this->Warning("GetBinEdges", "No such bin. 'bin' is between 0 and %d", fNBins - 1); else this->Warning("GetBinEdges", "Binning kd-tree is nil. No bin edges retrieved."); this->Info("GetBinEdges", "Returning null pointer pair."); return std::make_pair((Double_t*)0, (Double_t*)0); } UInt_t TKDTreeBinning::GetNBins() const { // Returns the number of bins return fNBins; } UInt_t TKDTreeBinning::GetDim() const { // Returns the number of dimensions return fDim; } UInt_t TKDTreeBinning::GetBinContent(UInt_t bin) const { // Returns the number of points in bin. 'bin' is between 0 and fNBins - 1 if(bin <= fNBins - 1) return fBinsContent[bin]; this->Warning("GetBinContent", "No such bin. Returning 0."); this->Info("GetBinContent", "'bin' is between 0 and %d.", fNBins - 1); return 0; } TKDTreeID* TKDTreeBinning::GetTree() const { // Returns the kD-Tree structure of the binning if (fDataBins) return fDataBins; this->Warning("GetTree", "Binning kd-tree is nil. No embedded kd-tree retrieved. Returning null pointer."); return (TKDTreeID*)0; } const Double_t* TKDTreeBinning::GetDimData(UInt_t dim) const { // Returns the data in the dim coordinate. 'dim' is between 0 and fDim - 1 if(dim < fDim) return fData[dim]; this->Warning("GetDimData", "No such dimensional coordinate. No coordinate data retrieved. Returning null pointer."); this->Info("GetDimData", "'dim' is between 0 and %d.", fDim - 1); return 0; } Double_t TKDTreeBinning::GetDataMin(UInt_t dim) const { // Returns the data minimum in the dim coordinate. 'dim' is between 0 and fDim - 1 if(dim < fDim) return fDataThresholds[dim].first; this->Warning("GetDataMin", "No such dimensional coordinate. No coordinate data minimum retrieved. Returning +inf."); this->Info("GetDataMin", "'dim' is between 0 and %d.", fDim - 1); return std::numeric_limits::infinity(); } Double_t TKDTreeBinning::GetDataMax(UInt_t dim) const { // Returns the data maximum in the dim coordinate. 'dim' is between 0 and fDim - 1 if(dim < fDim) return fDataThresholds[dim].second; this->Warning("GetDataMax", "No such dimensional coordinate. No coordinate data maximum retrieved. Returning -inf."); this->Info("GetDataMax", "'dim' is between 0 and %d.", fDim - 1); return -1 * std::numeric_limits::infinity(); } Double_t TKDTreeBinning::GetBinDensity(UInt_t bin) const { // Returns the density in bin. 'bin' is between 0 and fNBins - 1 if(bin < fNBins) { Double_t volume = GetBinVolume(bin); if (!volume) this->Warning("GetBinDensity", "Volume is null. Returning -1."); return GetBinContent(bin) / volume; } this->Warning("GetBinDensity", "No such bin. Returning -1."); this->Info("GetBinDensity", "'bin' is between 0 and %d.", fNBins - 1); return -1.; } Double_t TKDTreeBinning::GetBinVolume(UInt_t bin) const { // Returns the (hyper)volume of bin. 'bin' is between 0 and fNBins - 1 if(bin < fNBins) { std::pair binEdges = GetBinEdges(bin); Double_t volume = 1.; for (UInt_t i = 0; i < fDim; ++i) { volume *= (binEdges.second[i] - binEdges.first[i]); } return volume; } this->Warning("GetBinVolume", "No such bin. Returning 0."); this->Info("GetBinVolume", "'bin' is between 0 and %d.", fNBins - 1); return 0.; } const double * TKDTreeBinning::GetOneDimBinEdges() const { // Returns the minimum edges for one dimensional binning only. // size of the vector is fNBins + 1 is the vector has been sorted in increasing bin edges // N.B : if one does not call SortOneDimBinEdges the bins are not ordered if (fDim == 1) { // no need to sort here because vector is already sorted in one dim return &fBinMinEdges.front(); } this->Warning("GetOneDimBinEdges", "Data is multidimensional. No sorted bin edges retrieved. Returning null pointer."); this->Info("GetOneDimBinEdges", "This method can only be invoked if the data is a one dimensional set"); return 0; } const Double_t * TKDTreeBinning::SortOneDimBinEdges(Bool_t sortAsc) { if (fDim != 1) { this->Warning("SortOneDimBinEdges", "Data is multidimensional. Cannot sorted bin edges. Returning null pointer."); this->Info("SortOneDimBinEdges", "This method can only be invoked if the data is a one dimensional set"); return 0; } // order bins by increasing (or decreasing ) x positions std::vector indices( fNBins ); TMath::Sort( fNBins, &fBinMinEdges[0], &indices[0], !sortAsc ); std::vector binMinEdges(fNBins ); std::vector binMaxEdges(fNBins ); std::vector binContent(fNBins ); // reajust also content (not needed but better in general!) for (UInt_t i = 0; i < fNBins; ++i) { binMinEdges[i ] = fBinMinEdges[indices[i] ]; binMaxEdges[i ] = fBinMaxEdges[indices[i] ]; binContent[i] = fBinsContent[indices[i] ]; } fBinMinEdges.swap(binMinEdges); fBinMaxEdges.swap(binMaxEdges); fBinsContent.swap(binContent); // Add also the upper(lower) edge to the min (max) list if (sortAsc) { fBinMinEdges.push_back(fBinMaxEdges.back()); return &fBinMinEdges[0]; } fBinMaxEdges.push_back(fBinMinEdges.back()); return &fBinMaxEdges[0]; } const Double_t* TKDTreeBinning::GetBinCenter(UInt_t bin) const { // Returns the geometric center of of the bin. 'bin' is between 0 and fNBins - 1 if(bin < fNBins) { Double_t* result = new Double_t[fDim]; std::pair binEdges = GetBinEdges(bin); for (UInt_t i = 0; i < fDim; ++i) { result[i] = (binEdges.second[i] + binEdges.first[i]) / 2.; } return result; } this->Warning("GetBinCenter", "No such bin. Returning null pointer."); this->Info("GetBinCenter", "'bin' is between 0 and %d.", fNBins - 1); return 0; } const Double_t* TKDTreeBinning::GetBinWidth(UInt_t bin) const { // Returns the geometric center of of the bin. 'bin' is between 0 and fNBins - 1 if(bin < fNBins) { Double_t* result = new Double_t[fDim]; std::pair binEdges = GetBinEdges(bin); for (UInt_t i = 0; i < fDim; ++i) { result[i] = (binEdges.second[i] - binEdges.first[i]); } return result; } this->Warning("GetBinWidth", "No such bin. Returning null pointer."); this->Info("GetBinWidth", "'bin' is between 0 and %d.", fNBins - 1); return 0; } UInt_t TKDTreeBinning::GetBinMaxDensity() const { // Return the bin with maximum density if (fIsSorted) { if (fIsSortedAsc) return fNBins - 1; else return 0; } UInt_t* indices = new UInt_t[fNBins]; for (UInt_t i = 0; i < fNBins; ++i) indices[i] = i; UInt_t result = *std::max_element(indices, indices + fNBins, CompareAsc(this)); delete [] indices; return result; } UInt_t TKDTreeBinning::GetBinMinDensity() const { // Return the bin with minimum density if (fIsSorted) { if (!fIsSortedAsc) return fNBins - 1; else return 0; } UInt_t* indices = new UInt_t[fNBins]; for (UInt_t i = 0; i < fNBins; ++i) indices[i] = i; UInt_t result = *std::min_element(indices, indices + fNBins, CompareAsc(this)); delete [] indices; return result; } void TKDTreeBinning::FillBinData(ROOT::Fit::BinData & data) const { // Fill the bin data set with the result of the TKDTree binning if (!fDataBins) return; data.Initialize(fNBins, fDim); for (unsigned int i = 0; i < fNBins; ++i) { data.Add( GetBinMinEdges(i), GetBinDensity(i), std::sqrt(double(GetBinContent(i) ))/ GetBinVolume(i) ); data.AddBinUpEdge(GetBinMaxEdges(i) ); } }