| File: | cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/root/ROOT/RDF/RInterface.hxx |
| Warning: | line 299, column 7 Potential leak of memory pointed to by 'upcastNodeOnHeap' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /************************************************************************** | |||
| 2 | * basf2 (Belle II Analysis Software Framework) * | |||
| 3 | * Author: The Belle II Collaboration * | |||
| 4 | * * | |||
| 5 | * See git log for contributors and copyright holders. * | |||
| 6 | * This file is licensed under LGPL-3.0, see LICENSE.md. * | |||
| 7 | **************************************************************************/ | |||
| 8 | ||||
| 9 | #include <svd/calibration/SVDdEdxValidationAlgorithm.h> | |||
| 10 | ||||
| 11 | #include <tuple> | |||
| 12 | #include <vector> | |||
| 13 | #include <string> | |||
| 14 | ||||
| 15 | #include <TROOT.h> | |||
| 16 | #include <TStyle.h> | |||
| 17 | #include <TMath.h> | |||
| 18 | #include <TFile.h> | |||
| 19 | #include <TColor.h> | |||
| 20 | #include <TLegend.h> | |||
| 21 | #include <TCanvas.h> | |||
| 22 | #include <TH1D.h> | |||
| 23 | #include <TH1D.h> | |||
| 24 | #include <TH2D.h> | |||
| 25 | #include <TAxis.h> | |||
| 26 | #include <TGraph.h> | |||
| 27 | #include <TMultiGraph.h> | |||
| 28 | #include <TCut.h> | |||
| 29 | ||||
| 30 | #include <RooDataSet.h> | |||
| 31 | #include <RooRealVar.h> | |||
| 32 | #include <RooAddPdf.h> | |||
| 33 | #include <RooGaussian.h> | |||
| 34 | #include <RooChebychev.h> | |||
| 35 | #include <RooBifurGauss.h> | |||
| 36 | #include <RooDstD0BG.h> | |||
| 37 | #include <RooAbsDataStore.h> | |||
| 38 | #include <RooTreeDataStore.h> | |||
| 39 | #include <RooMsgService.h> | |||
| 40 | #include <RooStats/SPlot.h> | |||
| 41 | #include <ROOT/RDataFrame.hxx> | |||
| 42 | ||||
| 43 | using namespace ROOT; | |||
| 44 | using namespace RooFit; | |||
| 45 | using namespace Belle2; | |||
| 46 | ||||
| 47 | SVDdEdxValidationAlgorithm::SVDdEdxValidationAlgorithm() : CalibrationAlgorithm("SVDdEdxValidationCollector"), | |||
| 48 | m_fullValidation(true), m_isMakePlots(true) | |||
| 49 | { | |||
| 50 | setDescription("SVD dE/dx validation algorithm"); | |||
| 51 | } | |||
| 52 | ||||
| 53 | /* Main calibration method */ | |||
| 54 | CalibrationAlgorithm::EResult SVDdEdxValidationAlgorithm::calibrate() | |||
| 55 | { | |||
| 56 | gROOT(ROOT::GetROOT())->SetBatch(true); | |||
| 57 | ||||
| 58 | // const auto exprun = getRunList()[0]; | |||
| 59 | // B2INFO("ExpRun used for calibration: " << exprun.first << " " << exprun.second); | |||
| 60 | ||||
| 61 | // Get data objects | |||
| 62 | auto TTreeLambda = getObjectPtr<TTree>("Lambda"); | |||
| 63 | auto TTreeDstar = getObjectPtr<TTree>("Dstar"); | |||
| 64 | auto TTreeGamma = getObjectPtr<TTree>("Gamma"); | |||
| 65 | ||||
| 66 | if (TTreeLambda->GetEntries() < m_MinEvtsPerTree) { | |||
| 67 | B2WARNING("Not enough data for calibration.")do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Warning, 0, "svd")) { { LogVariableStream varStream ; varStream << "Not enough data for calibration."; Belle2 ::LogSystem::Instance().sendMessage(Belle2::LogMessage(Belle2 ::LogConfig::c_Warning, std::move(varStream), "svd", __PRETTY_FUNCTION__ , "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 67, 0) ); }; } } while(false); | |||
| 68 | return c_NotEnoughData; | |||
| 69 | } | |||
| 70 | ||||
| 71 | // call the calibration functions | |||
| 72 | TTree* TTreeLambdaSW = LambdaMassFit(TTreeLambda); | |||
| 73 | TTree* TTreeDstarSW = DstarMassFit(TTreeDstar); | |||
| 74 | TTree* TTreeGammaWrap = TTreeGamma.get(); | |||
| 75 | ||||
| 76 | std::vector<TString> PIDDetectors; | |||
| 77 | PIDDetectors.push_back("SVDonly"); | |||
| 78 | if (m_fullValidation) { | |||
| 79 | PIDDetectors.push_back("ALL"); | |||
| 80 | PIDDetectors.push_back("noSVD"); | |||
| 81 | } | |||
| 82 | ||||
| 83 | std::map<TTree*, TString> SWeightNameMap = { | |||
| 84 | {TTreeGammaWrap, "1"}, | |||
| 85 | {TTreeDstarSW, "nSignalDstar_sw"}, | |||
| 86 | {TTreeLambdaSW, "nSignalLambda_sw"} | |||
| 87 | }; | |||
| 88 | ||||
| 89 | for (const TString& PIDDetectorsName : PIDDetectors) { | |||
| 90 | PlotEfficiencyPlots(PIDDetectorsName, TTreeGammaWrap, SWeightNameMap[TTreeGammaWrap], "FirstElectron", "electron", TTreeDstarSW, | |||
| 91 | SWeightNameMap[TTreeDstarSW], "PionD", "pion", | |||
| 92 | "BinaryElectronPionID", | |||
| 93 | "0.5", m_NumEffBins, 0., m_MomHighEff); | |||
| 94 | PlotEfficiencyPlots(PIDDetectorsName, TTreeGammaWrap, SWeightNameMap[TTreeGammaWrap], "FirstElectron", "electron", TTreeDstarSW, | |||
| 95 | SWeightNameMap[TTreeDstarSW], "Kaon", "kaon", | |||
| 96 | "BinaryElectronKaonID", "0.5", | |||
| 97 | m_NumEffBins, 0., m_MomHighEff); | |||
| 98 | PlotEfficiencyPlots(PIDDetectorsName, TTreeLambdaSW, SWeightNameMap[TTreeLambdaSW], "Proton", "proton", TTreeDstarSW, | |||
| 99 | SWeightNameMap[TTreeDstarSW], "PionD", "pion", | |||
| 100 | "BinaryProtonPionID", "0.5", | |||
| 101 | m_NumEffBins, 0.25, m_MomHighEff); | |||
| 102 | PlotEfficiencyPlots(PIDDetectorsName, TTreeLambdaSW, SWeightNameMap[TTreeLambdaSW], "Proton", "proton", TTreeDstarSW, | |||
| 103 | SWeightNameMap[TTreeDstarSW], "Kaon", "kaon", | |||
| 104 | "BinaryProtonKaonID", "0.5", | |||
| 105 | m_NumEffBins, 0.25, m_MomHighEff); | |||
| 106 | PlotEfficiencyPlots(PIDDetectorsName, TTreeDstarSW, SWeightNameMap[TTreeDstarSW], "PionD", "pion", TTreeDstarSW, | |||
| 107 | SWeightNameMap[TTreeDstarSW], | |||
| 108 | "Kaon", "kaon", | |||
| 109 | "BinaryPionKaonID", "0.5", m_NumEffBins, | |||
| 110 | 0., m_MomHighEff); | |||
| 111 | PlotEfficiencyPlots(PIDDetectorsName, TTreeDstarSW, SWeightNameMap[TTreeDstarSW], "Kaon", "kaon", TTreeDstarSW, | |||
| 112 | SWeightNameMap[TTreeDstarSW], | |||
| 113 | "PionD", "pion", | |||
| 114 | "BinaryKaonPionID", "0.5", m_NumEffBins, | |||
| 115 | 0., m_MomHighEff); | |||
| 116 | } | |||
| 117 | ||||
| 118 | if (m_fullValidation) { | |||
| 119 | PlotROCCurve(TTreeGammaWrap, SWeightNameMap[TTreeGammaWrap], "FirstElectron", "electron", TTreeDstarSW, | |||
| 120 | SWeightNameMap[TTreeDstarSW], "PionD", | |||
| 121 | "pion", "BinaryElectronPionID"); | |||
| 122 | PlotROCCurve(TTreeGammaWrap, SWeightNameMap[TTreeGammaWrap], "FirstElectron", "electron", TTreeDstarSW, | |||
| 123 | SWeightNameMap[TTreeDstarSW], "Kaon", | |||
| 124 | "kaon", "BinaryElectronKaonID"); | |||
| 125 | PlotROCCurve(TTreeLambdaSW, SWeightNameMap[TTreeLambdaSW], "Proton", "proton", TTreeDstarSW, SWeightNameMap[TTreeDstarSW], "PionD", | |||
| 126 | "pion", | |||
| 127 | "BinaryProtonPionID"); | |||
| 128 | PlotROCCurve(TTreeLambdaSW, SWeightNameMap[TTreeLambdaSW], "Proton", "proton", TTreeDstarSW, SWeightNameMap[TTreeDstarSW], "Kaon", | |||
| 129 | "kaon", | |||
| 130 | "BinaryProtonKaonID"); | |||
| 131 | PlotROCCurve(TTreeDstarSW, SWeightNameMap[TTreeDstarSW], "PionD", "pion", TTreeDstarSW, SWeightNameMap[TTreeDstarSW], "Kaon", | |||
| 132 | "kaon", | |||
| 133 | "BinaryPionKaonID"); | |||
| 134 | PlotROCCurve(TTreeDstarSW, SWeightNameMap[TTreeDstarSW], "Kaon", "kaon", TTreeDstarSW, SWeightNameMap[TTreeDstarSW], "PionD", | |||
| 135 | "pion", | |||
| 136 | "BinaryKaonPionID"); | |||
| 137 | } | |||
| 138 | B2INFO("SVD dE/dx validation done!")do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream ; varStream << "SVD dE/dx validation done!"; Belle2::LogSystem ::Instance().sendMessage(Belle2::LogMessage(Belle2::LogConfig ::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc" , 138, 0)); }; } } while(false); | |||
| 139 | ||||
| 140 | return c_OK; | |||
| 141 | } | |||
| 142 | ||||
| 143 | // generic efficiency and fake rate | |||
| 144 | void SVDdEdxValidationAlgorithm::PlotEfficiencyPlots(const TString& PIDDetectorsName, TTree* SignalTree, TString SignalWeightName, | |||
| 145 | TString SignalVarName, TString SignalVarNameFull, TTree* FakeTree, TString FakeWeightName, TString FakeVarName, | |||
| 146 | TString FakeVarNameFull, TString PIDVarName, TString PIDCut, unsigned int nbins, double MomLow, double MomHigh) | |||
| 147 | { | |||
| 148 | ||||
| 149 | if ((SignalTree == nullptr) || (FakeTree == nullptr)) { | |||
| 150 | B2FATAL("Invalid dataset, stopping here")do { { LogVariableStream varStream; varStream << "Invalid dataset, stopping here" ; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage (Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__ , "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 150, 0 )); }; exit(1); } while(false); | |||
| 151 | } | |||
| 152 | ||||
| 153 | if ((SignalTree->GetEntries() == 0) || (FakeTree->GetEntries() == 0)) { | |||
| 154 | B2FATAL("The dataset is empty, stopping here")do { { LogVariableStream varStream; varStream << "The dataset is empty, stopping here" ; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage (Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__ , "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 154, 0 )); }; exit(1); } while(false); | |||
| 155 | } | |||
| 156 | ||||
| 157 | if ((SignalTree->GetBranch(Form("%sMomentum", SignalVarName.Data())) == nullptr) | |||
| 158 | || (FakeTree->GetBranch(Form("%sMomentum", FakeVarName.Data())) == nullptr)) { | |||
| 159 | B2FATAL("Check the provided branch name, stopping here")do { { LogVariableStream varStream; varStream << "Check the provided branch name, stopping here" ; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage (Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__ , "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 159, 0 )); }; exit(1); } while(false); | |||
| 160 | } | |||
| 161 | ||||
| 162 | TString SignalFiducialCut = "(1>0)"; // placeholder for a possible sanity cut | |||
| 163 | TString FakesFiducialCut = "(1>0)"; | |||
| 164 | ||||
| 165 | // Produce the plots of the SVD PID distribution | |||
| 166 | if (PIDDetectorsName == "SVDonly") { | |||
| 167 | SignalTree->Draw(Form("%s%s%s>>hSignalPIDDistribution(100,0.,1.)", SignalVarName.Data(), PIDVarName.Data(), | |||
| 168 | PIDDetectorsName.Data()), | |||
| 169 | SignalWeightName + Form("* (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), MomLow, SignalVarName.Data(), MomHigh), "goff"); | |||
| 170 | TH1D* hSignalPIDDistribution = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalPIDDistribution")); | |||
| 171 | hSignalPIDDistribution->Scale(1. / hSignalPIDDistribution->Integral()); | |||
| 172 | hSignalPIDDistribution->GetXaxis()->SetTitle(PIDVarName + PIDDetectorsName + " for " + SignalVarNameFull); | |||
| 173 | hSignalPIDDistribution->GetYaxis()->SetTitle("Candidates, normalised"); | |||
| 174 | hSignalPIDDistribution->SetMaximum(1.35 * hSignalPIDDistribution->GetMaximum()); | |||
| 175 | ||||
| 176 | SignalTree->Draw(Form("%sElectronLLSVDonly>>hSignalElectronLLDistribution(100,-17.,3.)", SignalVarName.Data()), | |||
| 177 | SignalWeightName + Form("* (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), MomLow, SignalVarName.Data(), MomHigh), "goff"); | |||
| 178 | TH1D* hSignalElectronLLDistribution = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalElectronLLDistribution")); | |||
| 179 | SignalTree->Draw(Form("%sPionLLSVDonly>>hSignalPionLLDistribution(100,-17.,3.)", SignalVarName.Data()), | |||
| 180 | SignalWeightName + Form("* (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), MomLow, SignalVarName.Data(), MomHigh), "goff"); | |||
| 181 | TH1D* hSignalPionLLDistribution = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalPionLLDistribution")); | |||
| 182 | SignalTree->Draw(Form("%sKaonLLSVDonly>>hSignalKaonLLDistribution(100,-17.,3.)", SignalVarName.Data()), | |||
| 183 | SignalWeightName + Form("* (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), MomLow, SignalVarName.Data(), MomHigh), "goff"); | |||
| 184 | TH1D* hSignalKaonLLDistribution = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalKaonLLDistribution")); | |||
| 185 | SignalTree->Draw(Form("%sProtonLLSVDonly>>hSignalProtonLLDistribution(100,-17.,3.)", SignalVarName.Data()), | |||
| 186 | SignalWeightName + Form("* (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), MomLow, SignalVarName.Data(), MomHigh), "goff"); | |||
| 187 | TH1D* hSignalProtonLLDistribution = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalProtonLLDistribution")); | |||
| 188 | ||||
| 189 | // same but only for tracks that are expected to actually have SVD dEdx info | |||
| 190 | SignalTree->Draw(Form("%sElectronLLSVDonly>>hSignalElectronLLDistributionGood(100,-17.,3.)", SignalVarName.Data()), | |||
| 191 | SignalWeightName + Form("* (%sSVDdEdx>0) * (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), SignalVarName.Data(), MomLow, | |||
| 192 | SignalVarName.Data(), MomHigh), "goff"); | |||
| 193 | TH1D* hSignalElectronLLDistributionGood = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalElectronLLDistributionGood")); | |||
| 194 | SignalTree->Draw(Form("%sPionLLSVDonly>>hSignalPionLLDistributionGood(100,-17.,3.)", SignalVarName.Data()), | |||
| 195 | SignalWeightName + Form("* (%sSVDdEdx>0) * (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), SignalVarName.Data(), MomLow, | |||
| 196 | SignalVarName.Data(), MomHigh), "goff"); | |||
| 197 | TH1D* hSignalPionLLDistributionGood = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalPionLLDistributionGood")); | |||
| 198 | SignalTree->Draw(Form("%sKaonLLSVDonly>>hSignalKaonLLDistributionGood(100,-17.,3.)", SignalVarName.Data()), | |||
| 199 | SignalWeightName + Form("* (%sSVDdEdx>0) * (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), SignalVarName.Data(), MomLow, | |||
| 200 | SignalVarName.Data(), MomHigh), "goff"); | |||
| 201 | TH1D* hSignalKaonLLDistributionGood = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalKaonLLDistributionGood")); | |||
| 202 | SignalTree->Draw(Form("%sProtonLLSVDonly>>hSignalProtonLLDistributionGood(100,-17.,3.)", SignalVarName.Data()), | |||
| 203 | SignalWeightName + Form("* (%sSVDdEdx>0) * (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), SignalVarName.Data(), MomLow, | |||
| 204 | SignalVarName.Data(), MomHigh), "goff"); | |||
| 205 | TH1D* hSignalProtonLLDistributionGood = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalProtonLLDistributionGood")); | |||
| 206 | ||||
| 207 | ||||
| 208 | hSignalElectronLLDistribution->Scale(1. / hSignalElectronLLDistribution->Integral()); | |||
| 209 | hSignalPionLLDistribution->Scale(1. / hSignalPionLLDistribution->Integral()); | |||
| 210 | hSignalKaonLLDistribution->Scale(1. / hSignalKaonLLDistribution->Integral()); | |||
| 211 | hSignalProtonLLDistribution->Scale(1. / hSignalProtonLLDistribution->Integral()); | |||
| 212 | ||||
| 213 | hSignalElectronLLDistributionGood->Scale(1. / hSignalElectronLLDistributionGood->Integral()); | |||
| 214 | hSignalPionLLDistributionGood->Scale(1. / hSignalPionLLDistributionGood->Integral()); | |||
| 215 | hSignalKaonLLDistributionGood->Scale(1. / hSignalKaonLLDistributionGood->Integral()); | |||
| 216 | hSignalProtonLLDistributionGood->Scale(1. / hSignalProtonLLDistributionGood->Integral()); | |||
| 217 | ||||
| 218 | hSignalElectronLLDistribution->GetXaxis()->SetTitle("ElectronLL (SVD only) for " + SignalVarNameFull); | |||
| 219 | hSignalElectronLLDistribution->GetYaxis()->SetTitle("Candidates, normalised"); | |||
| 220 | hSignalElectronLLDistribution->SetMaximum(1.35 * hSignalElectronLLDistribution->GetMaximum()); | |||
| 221 | ||||
| 222 | hSignalPionLLDistribution->GetXaxis()->SetTitle("PionLL (SVD only) for " + SignalVarNameFull); | |||
| 223 | hSignalPionLLDistribution->GetYaxis()->SetTitle("Candidates, normalised"); | |||
| 224 | hSignalPionLLDistribution->SetMaximum(1.35 * hSignalPionLLDistribution->GetMaximum()); | |||
| 225 | ||||
| 226 | hSignalKaonLLDistribution->GetXaxis()->SetTitle("KaonLL (SVD only) for " + SignalVarNameFull); | |||
| 227 | hSignalKaonLLDistribution->GetYaxis()->SetTitle("Candidates, normalised"); | |||
| 228 | hSignalKaonLLDistribution->SetMaximum(1.35 * hSignalKaonLLDistribution->GetMaximum()); | |||
| 229 | ||||
| 230 | hSignalProtonLLDistribution->GetXaxis()->SetTitle("ProtonLL (SVD only) for " + SignalVarNameFull); | |||
| 231 | hSignalProtonLLDistribution->GetYaxis()->SetTitle("Candidates, normalised"); | |||
| 232 | hSignalProtonLLDistribution->SetMaximum(1.35 * hSignalProtonLLDistribution->GetMaximum()); | |||
| 233 | ||||
| 234 | hSignalElectronLLDistributionGood->GetXaxis()->SetTitle("ElectronLL (SVD only) for " + SignalVarNameFull); | |||
| 235 | hSignalElectronLLDistributionGood->GetYaxis()->SetTitle("Candidates, normalised"); | |||
| 236 | hSignalElectronLLDistributionGood->SetMaximum(1.35 * hSignalElectronLLDistributionGood->GetMaximum()); | |||
| 237 | ||||
| 238 | hSignalPionLLDistributionGood->GetXaxis()->SetTitle("PionLL (SVD only) for " + SignalVarNameFull); | |||
| 239 | hSignalPionLLDistributionGood->GetYaxis()->SetTitle("Candidates, normalised"); | |||
| 240 | hSignalPionLLDistributionGood->SetMaximum(1.35 * hSignalPionLLDistributionGood->GetMaximum()); | |||
| 241 | ||||
| 242 | hSignalKaonLLDistributionGood->GetXaxis()->SetTitle("KaonLL (SVD only) for " + SignalVarNameFull); | |||
| 243 | hSignalKaonLLDistributionGood->GetYaxis()->SetTitle("Candidates, normalised"); | |||
| 244 | hSignalKaonLLDistributionGood->SetMaximum(1.35 * hSignalKaonLLDistributionGood->GetMaximum()); | |||
| 245 | ||||
| 246 | hSignalProtonLLDistributionGood->GetXaxis()->SetTitle("ProtonLL (SVD only) for " + SignalVarNameFull); | |||
| 247 | hSignalProtonLLDistributionGood->GetYaxis()->SetTitle("Candidates, normalised"); | |||
| 248 | hSignalProtonLLDistributionGood->SetMaximum(1.35 * hSignalProtonLLDistributionGood->GetMaximum()); | |||
| 249 | ||||
| 250 | TCanvas* DistribCanvas = new TCanvas("DistribCanvas", "", 600, 600); | |||
| 251 | gPad(TVirtualPad::Pad())->SetTopMargin(0.05); | |||
| 252 | gPad(TVirtualPad::Pad())->SetRightMargin(0.05); | |||
| 253 | gPad(TVirtualPad::Pad())->SetLeftMargin(0.13); | |||
| 254 | gPad(TVirtualPad::Pad())->SetBottomMargin(0.12); | |||
| 255 | ||||
| 256 | hSignalPIDDistribution->SetLineWidth(2); | |||
| 257 | hSignalPIDDistribution->SetLineColor(TColor::GetColor("#2166ac")); | |||
| 258 | hSignalPIDDistribution->Draw("hist"); | |||
| 259 | ||||
| 260 | DistribCanvas->Print("SVDdEdxValidation_Distribution_" + SignalVarNameFull + PIDVarName + PIDDetectorsName + | |||
| 261 | "_MomRange_" + | |||
| 262 | std::to_string( | |||
| 263 | MomLow) | |||
| 264 | .substr(0, 3) + | |||
| 265 | "_" + std::to_string(MomHigh).substr(0, 3) + ".pdf"); | |||
| 266 | ||||
| 267 | hSignalElectronLLDistribution->SetLineWidth(2); | |||
| 268 | hSignalPionLLDistribution->SetLineWidth(2); | |||
| 269 | hSignalKaonLLDistribution->SetLineWidth(2); | |||
| 270 | hSignalProtonLLDistribution->SetLineWidth(2); | |||
| 271 | ||||
| 272 | hSignalElectronLLDistributionGood->SetLineWidth(2); | |||
| 273 | hSignalPionLLDistributionGood->SetLineWidth(2); | |||
| 274 | hSignalKaonLLDistributionGood->SetLineWidth(2); | |||
| 275 | hSignalProtonLLDistributionGood->SetLineWidth(2); | |||
| 276 | ||||
| 277 | hSignalElectronLLDistributionGood->SetLineColor(kBlack); | |||
| 278 | hSignalPionLLDistributionGood->SetLineColor(kBlack); | |||
| 279 | hSignalKaonLLDistributionGood->SetLineColor(kBlack); | |||
| 280 | hSignalProtonLLDistributionGood->SetLineColor(kBlack); | |||
| 281 | ||||
| 282 | hSignalElectronLLDistribution->SetTitle("ElectronLL (SVD), all tracks"); | |||
| 283 | hSignalPionLLDistribution->SetTitle("PionLL (SVD), all tracks"); | |||
| 284 | hSignalKaonLLDistribution->SetTitle("KaonLL (SVD), all tracks"); | |||
| 285 | hSignalProtonLLDistribution->SetTitle("ProtonLL (SVD), all tracks"); | |||
| 286 | ||||
| 287 | hSignalElectronLLDistributionGood->SetTitle("ElectronLL (SVD), tracks with dEdx info"); | |||
| 288 | hSignalPionLLDistributionGood->SetTitle("PionLL (SVD), tracks with dEdx info"); | |||
| 289 | hSignalKaonLLDistributionGood->SetTitle("KaonLL (SVD), tracks with dEdx info"); | |||
| 290 | hSignalProtonLLDistributionGood->SetTitle("ProtonLL (SVD), tracks with dEdx info"); | |||
| 291 | ||||
| 292 | hSignalElectronLLDistribution->GetXaxis()->SetTitleSize(0.04); | |||
| 293 | hSignalElectronLLDistribution->GetYaxis()->SetTitleSize(0.04); | |||
| 294 | hSignalElectronLLDistribution->GetXaxis()->SetTitleOffset(1.0); | |||
| 295 | hSignalElectronLLDistribution->GetYaxis()->SetTitleOffset(1.3); | |||
| 296 | hSignalElectronLLDistribution->GetYaxis()->SetLabelSize(0.04); | |||
| 297 | hSignalElectronLLDistribution->GetXaxis()->SetLabelSize(0.04); | |||
| 298 | ||||
| 299 | hSignalPionLLDistribution->GetXaxis()->SetTitleSize(0.04); | |||
| 300 | hSignalPionLLDistribution->GetYaxis()->SetTitleSize(0.04); | |||
| 301 | hSignalPionLLDistribution->GetXaxis()->SetTitleOffset(1.0); | |||
| 302 | hSignalPionLLDistribution->GetYaxis()->SetTitleOffset(1.3); | |||
| 303 | hSignalPionLLDistribution->GetYaxis()->SetLabelSize(0.04); | |||
| 304 | hSignalPionLLDistribution->GetXaxis()->SetLabelSize(0.04); | |||
| 305 | ||||
| 306 | hSignalKaonLLDistribution->GetXaxis()->SetTitleSize(0.04); | |||
| 307 | hSignalKaonLLDistribution->GetYaxis()->SetTitleSize(0.04); | |||
| 308 | hSignalKaonLLDistribution->GetXaxis()->SetTitleOffset(1.0); | |||
| 309 | hSignalKaonLLDistribution->GetYaxis()->SetTitleOffset(1.3); | |||
| 310 | hSignalKaonLLDistribution->GetYaxis()->SetLabelSize(0.04); | |||
| 311 | hSignalKaonLLDistribution->GetXaxis()->SetLabelSize(0.04); | |||
| 312 | ||||
| 313 | hSignalProtonLLDistribution->GetXaxis()->SetTitleSize(0.04); | |||
| 314 | hSignalProtonLLDistribution->GetYaxis()->SetTitleSize(0.04); | |||
| 315 | hSignalProtonLLDistribution->GetXaxis()->SetTitleOffset(1.0); | |||
| 316 | hSignalProtonLLDistribution->GetYaxis()->SetTitleOffset(1.3); | |||
| 317 | hSignalProtonLLDistribution->GetYaxis()->SetLabelSize(0.04); | |||
| 318 | hSignalProtonLLDistribution->GetXaxis()->SetLabelSize(0.04); | |||
| 319 | ||||
| 320 | TCanvas* LLCanvas = new TCanvas("LLCanvas", "", 900, 700); | |||
| 321 | ||||
| 322 | gPad(TVirtualPad::Pad())->SetTopMargin(0.05); | |||
| 323 | gPad(TVirtualPad::Pad())->SetRightMargin(0.05); | |||
| 324 | gPad(TVirtualPad::Pad())->SetLeftMargin(0.13); | |||
| 325 | gPad(TVirtualPad::Pad())->SetBottomMargin(0.12); | |||
| 326 | ||||
| 327 | LLCanvas->Divide(2, 2, 0.01, 0.01); | |||
| 328 | LLCanvas->cd(1); | |||
| 329 | hSignalElectronLLDistribution->Draw("hist"); | |||
| 330 | LLCanvas->cd(2); | |||
| 331 | hSignalPionLLDistribution->Draw("hist"); | |||
| 332 | LLCanvas->cd(3); | |||
| 333 | hSignalKaonLLDistribution->Draw("hist"); | |||
| 334 | LLCanvas->cd(4); | |||
| 335 | hSignalProtonLLDistribution->Draw("hist"); | |||
| 336 | ||||
| 337 | TCanvas* LLCanvasGood = new TCanvas("LLCanvasGood", "", 900, 700); | |||
| 338 | ||||
| 339 | gPad(TVirtualPad::Pad())->SetTopMargin(0.05); | |||
| 340 | gPad(TVirtualPad::Pad())->SetRightMargin(0.05); | |||
| 341 | gPad(TVirtualPad::Pad())->SetLeftMargin(0.13); | |||
| 342 | gPad(TVirtualPad::Pad())->SetBottomMargin(0.12); | |||
| 343 | ||||
| 344 | LLCanvasGood->Divide(2, 2, 0.01, 0.01); | |||
| 345 | LLCanvasGood->cd(1); | |||
| 346 | hSignalElectronLLDistributionGood->Draw("hist"); | |||
| 347 | LLCanvasGood->cd(2); | |||
| 348 | hSignalPionLLDistributionGood->Draw("hist"); | |||
| 349 | LLCanvasGood->cd(3); | |||
| 350 | hSignalKaonLLDistributionGood->Draw("hist"); | |||
| 351 | LLCanvasGood->cd(4); | |||
| 352 | hSignalProtonLLDistributionGood->Draw("hist"); | |||
| 353 | ||||
| 354 | LLCanvas->Print("SVDdEdxValidation_LLDistributions_" + SignalVarNameFull + | |||
| 355 | "_SVDonly_MomRange_" + | |||
| 356 | std::to_string( | |||
| 357 | MomLow) | |||
| 358 | .substr(0, 3) + | |||
| 359 | "_" + std::to_string(MomHigh).substr(0, 3) + ".pdf"); | |||
| 360 | ||||
| 361 | LLCanvasGood->Print("SVDdEdxValidation_LLDistributions_GoodSVDTracks_" + SignalVarNameFull + | |||
| 362 | "_SVDonly_MomRange_" + | |||
| 363 | std::to_string( | |||
| 364 | MomLow) | |||
| 365 | .substr(0, 3) + | |||
| 366 | "_" + std::to_string(MomHigh).substr(0, 3) + ".pdf"); | |||
| 367 | ||||
| 368 | TFile DistribFile("SVDdEdxValidation_Distribution_" + SignalVarNameFull + PIDVarName + PIDDetectorsName + | |||
| 369 | "_MomRange_" + | |||
| 370 | std::to_string( | |||
| 371 | MomLow) | |||
| 372 | .substr(0, 3) + | |||
| 373 | "_" + std::to_string(MomHigh).substr(0, 3) + ".root", | |||
| 374 | "RECREATE"); | |||
| 375 | hSignalPIDDistribution->SetLineColor(kBlack); | |||
| 376 | hSignalPIDDistribution->Write(); | |||
| 377 | DistribFile.Close(); | |||
| 378 | delete DistribCanvas; | |||
| 379 | ||||
| 380 | TFile LLDistribFile(TString("SVDdEdxValidation_LLDistributions_" + SignalVarNameFull + "_SVDonly_MomRange_" + | |||
| 381 | std::to_string( | |||
| 382 | MomLow) | |||
| 383 | .substr(0, 3) + | |||
| 384 | "_" + std::to_string(MomHigh).substr(0, 3) + ".root"), | |||
| 385 | "RECREATE"); | |||
| 386 | hSignalElectronLLDistribution->Write(); | |||
| 387 | hSignalPionLLDistribution->Write(); | |||
| 388 | hSignalKaonLLDistribution->Write(); | |||
| 389 | hSignalProtonLLDistribution->Write(); | |||
| 390 | LLDistribFile.Close(); | |||
| 391 | delete LLCanvas; | |||
| 392 | delete LLCanvasGood; | |||
| 393 | } | |||
| 394 | ||||
| 395 | // ---------- Momentum distributions (for efficiency determination) ---------- | |||
| 396 | ||||
| 397 | SignalTree->Draw(Form("%sMomentum>>hAllSignal(%i,%f,%f)", SignalVarName.Data(), nbins, MomLow, MomHigh), | |||
| 398 | SignalWeightName + " * (" + SignalFiducialCut + ")", "goff"); | |||
| 399 | SignalTree->Draw(Form("%sMomentum>>hSelectedSignal(%i,%f,%f)", SignalVarName.Data(), nbins, MomLow, MomHigh), | |||
| 400 | SignalWeightName + " * (" + SignalVarName + PIDVarName + PIDDetectorsName + ">" + PIDCut + "&&" + SignalFiducialCut + | |||
| 401 | ")", | |||
| 402 | "goff"); | |||
| 403 | ||||
| 404 | FakeTree->Draw(Form("%sMomentum>>hAllFakes(%i,%f,%f)", FakeVarName.Data(), nbins, MomLow, MomHigh), | |||
| 405 | FakeWeightName + " * (" + FakesFiducialCut + ")", "goff"); | |||
| 406 | FakeTree->Draw(Form("%sMomentum>>hSelectedFakes(%i,%f,%f)", FakeVarName.Data(), nbins, MomLow, MomHigh), | |||
| 407 | FakeWeightName + " * (" + FakeVarName + PIDVarName + PIDDetectorsName + ">" + PIDCut + "&&" + FakesFiducialCut + ")", | |||
| 408 | "goff"); | |||
| 409 | ||||
| 410 | TH1D* hAllSignal = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hAllSignal")); | |||
| 411 | TH1D* hSelectedSignal = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSelectedSignal")); | |||
| 412 | TH1D* hAllFakes = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hAllFakes")); | |||
| 413 | TH1D* hSelectedFakes = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSelectedFakes")); | |||
| 414 | ||||
| 415 | // ---------- Add slow pions to the pion dataset ---------- | |||
| 416 | if (strncmp(SignalVarName.Data(), "PionD", 5) == 0) { | |||
| 417 | SignalTree->Draw(Form("SlowPionMomentum>>hAllSignalSlow(%i,%f,%f)", nbins, MomLow, MomHigh), | |||
| 418 | SignalWeightName + " * (" + SignalFiducialCut + ")", "goff"); | |||
| 419 | SignalTree->Draw(Form("SlowPionMomentum>>hSelectedSignalSlow(%i,%f,%f)", nbins, MomLow, MomHigh), | |||
| 420 | SignalWeightName + " * (SlowPion" + PIDVarName + PIDDetectorsName + ">" + PIDCut + "&&" + SignalFiducialCut + ")", "goff"); | |||
| 421 | TH1D* hAllSignalSlow = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hAllSignalSlow")); | |||
| 422 | TH1D* hSelectedSignalSlow = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSelectedSignalSlow")); | |||
| 423 | hAllSignal->Add(hAllSignalSlow); | |||
| 424 | hSelectedSignal->Add(hSelectedSignalSlow); | |||
| 425 | } | |||
| 426 | ||||
| 427 | if (strncmp(FakeVarName.Data(), "PionD", 5) == 0) { | |||
| 428 | FakeTree->Draw(Form("SlowPionMomentum>>hAllFakesSlow(%i,%f,%f)", nbins, MomLow, MomHigh), | |||
| 429 | FakeWeightName + " * (" + FakesFiducialCut + ")", | |||
| 430 | "goff"); | |||
| 431 | FakeTree->Draw(Form("SlowPionMomentum>>hSelectedFakesSlow(%i,%f,%f)", nbins, MomLow, MomHigh), | |||
| 432 | FakeWeightName + " * (SlowPion" + PIDVarName + PIDDetectorsName + ">" + PIDCut + "&&" + FakesFiducialCut + ")", "goff"); | |||
| 433 | TH1D* hAllFakesSlow = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hAllFakesSlow")); | |||
| 434 | TH1D* hSelectedFakesSlow = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSelectedFakesSlow")); | |||
| 435 | hAllFakes->Add(hAllFakesSlow); | |||
| 436 | hSelectedFakes->Add(hSelectedFakesSlow); | |||
| 437 | } | |||
| 438 | ||||
| 439 | TH1D* EffHistoSig = static_cast<TH1D*>(hAllSignal->Clone("EffHistoSig")); // signal efficiency | |||
| 440 | TH1D* EffHistoFake = static_cast<TH1D*>(hAllFakes->Clone("EffHistoFake")); // fakes efficiency | |||
| 441 | ||||
| 442 | EffHistoSig->Divide(hSelectedSignal, hAllSignal);//, 1, 1, "B"); | |||
| 443 | EffHistoFake->Divide(hSelectedFakes, hAllFakes);//, 1, 1, "B"); | |||
| 444 | ||||
| 445 | // PID plots | |||
| 446 | TH1D* hBase = new TH1D("hBase", "", 100, 0.0, MomHigh); | |||
| 447 | hBase->SetTitle(";Momentum [GeV];Efficiency"); | |||
| 448 | hBase->SetMaximum(1.20); | |||
| 449 | hBase->SetMinimum(0.0); | |||
| 450 | ||||
| 451 | TLegend* tleg1 = new TLegend(0.63, 0.82, 0.93, 0.94); | |||
| 452 | tleg1->AddEntry(EffHistoSig, SignalVarNameFull + " efficiency", "pl"); | |||
| 453 | tleg1->AddEntry(EffHistoFake, FakeVarNameFull + " fake rate", "pl"); | |||
| 454 | ||||
| 455 | TCanvas* ResultCanvas = new TCanvas("ResultCanvas", "", 600, 600); | |||
| 456 | gPad(TVirtualPad::Pad())->SetTopMargin(0.05); | |||
| 457 | gPad(TVirtualPad::Pad())->SetRightMargin(0.05); | |||
| 458 | gPad(TVirtualPad::Pad())->SetLeftMargin(0.13); | |||
| 459 | gPad(TVirtualPad::Pad())->SetBottomMargin(0.12); | |||
| 460 | ||||
| 461 | ResultCanvas->SetGrid(); | |||
| 462 | hBase->Draw(); | |||
| 463 | EffHistoSig->SetMarkerSize(1.5); | |||
| 464 | EffHistoSig->SetMarkerStyle(22); | |||
| 465 | EffHistoSig->SetMarkerColor(TColor::GetColor("#2166ac")); | |||
| 466 | EffHistoSig->SetLineColor(TColor::GetColor("#2166ac")); | |||
| 467 | EffHistoSig->Draw("P,same"); | |||
| 468 | ||||
| 469 | EffHistoFake->SetMarkerSize(1.5); | |||
| 470 | EffHistoFake->SetMarkerStyle(23); | |||
| 471 | EffHistoFake->SetMarkerColor(TColor::GetColor("#ef8a62")); | |||
| 472 | EffHistoFake->SetLineColor(TColor::GetColor("#ef8a62")); | |||
| 473 | EffHistoFake->Draw("P,same"); | |||
| 474 | ||||
| 475 | tleg1->Draw("same"); | |||
| 476 | ||||
| 477 | hBase->SetStats(0); | |||
| 478 | hBase->GetXaxis()->SetTitleSize(0.04); | |||
| 479 | hBase->GetYaxis()->SetTitleSize(0.04); | |||
| 480 | hBase->GetXaxis()->SetTitleOffset(1.0); | |||
| 481 | hBase->GetYaxis()->SetTitleOffset(1.3); | |||
| 482 | hBase->GetYaxis()->SetLabelSize(0.04); | |||
| 483 | hBase->GetXaxis()->SetLabelSize(0.04); | |||
| 484 | ||||
| 485 | // std::setprecision(2); | |||
| 486 | ResultCanvas->Print("SVDdEdxValidation_Efficiency_" + SignalVarNameFull + "_vs_" + FakeVarNameFull + PIDVarName + "_" + | |||
| 487 | PIDDetectorsName + | |||
| 488 | "_Cut" + | |||
| 489 | PIDCut + "_MomRange_" + std::to_string(MomLow).substr(0, 3) + "_" + std::to_string(MomHigh).substr(0, 3) + ".pdf"); | |||
| 490 | TFile ResultFile("SVDdEdxValidation_Efficiency_" + SignalVarNameFull + "_vs_" + FakeVarNameFull + PIDVarName + "_" + | |||
| 491 | PIDDetectorsName + | |||
| 492 | "_Cut" + | |||
| 493 | PIDCut + "_MomRange_" + std::to_string(MomLow).substr(0, 3) + "_" + std::to_string(MomHigh).substr(0, 3) + ".root", | |||
| 494 | "RECREATE"); | |||
| 495 | EffHistoSig->SetLineColor(kBlack); | |||
| 496 | EffHistoSig->SetMarkerColor(kBlack); | |||
| 497 | EffHistoFake->SetLineColor(kBlack); | |||
| 498 | EffHistoFake->SetMarkerColor(kBlack); | |||
| 499 | EffHistoSig->Write(); | |||
| 500 | EffHistoFake->Write(); | |||
| 501 | ResultFile.Close(); | |||
| 502 | delete ResultCanvas; | |||
| 503 | delete hBase; | |||
| 504 | } | |||
| 505 | ||||
| 506 | void SVDdEdxValidationAlgorithm::PlotROCCurve(TTree* SignalTree, TString SignalWeightName, TString SignalVarName, | |||
| 507 | TString SignalVarNameFull, TTree* FakeTree, TString FakeWeightName, TString FakeVarName, TString FakeVarNameFull, | |||
| 508 | TString PIDVarName) | |||
| 509 | { | |||
| 510 | ||||
| 511 | if ((SignalTree == nullptr) || (FakeTree == nullptr)) { | |||
| ||||
| 512 | B2FATAL("Invalid dataset, stopping here")do { { LogVariableStream varStream; varStream << "Invalid dataset, stopping here" ; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage (Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__ , "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 512, 0 )); }; exit(1); } while(false); | |||
| 513 | } | |||
| 514 | ||||
| 515 | if ((SignalTree->GetEntries() == 0) || (FakeTree->GetEntries() == 0)) { | |||
| 516 | B2FATAL("The dataset is empty, stopping here")do { { LogVariableStream varStream; varStream << "The dataset is empty, stopping here" ; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage (Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__ , "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 516, 0 )); }; exit(1); } while(false); | |||
| 517 | } | |||
| 518 | ||||
| 519 | if ((SignalTree->GetBranch(Form("%sMomentum", SignalVarName.Data())) == nullptr) | |||
| 520 | || (FakeTree->GetBranch(Form("%sMomentum", FakeVarName.Data())) == nullptr)) { | |||
| 521 | B2FATAL("Check the provided branch name, stopping here")do { { LogVariableStream varStream; varStream << "Check the provided branch name, stopping here" ; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage (Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__ , "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 521, 0 )); }; exit(1); } while(false); | |||
| 522 | } | |||
| 523 | ||||
| 524 | std::vector<TString> PIDDetectors; | |||
| 525 | PIDDetectors.clear(); | |||
| 526 | PIDDetectors.push_back("ALL"); | |||
| 527 | PIDDetectors.push_back("noSVD"); | |||
| 528 | ||||
| 529 | std::vector<double> SignalEfficiencyALL, FakeEfficiencyALL; | |||
| 530 | SignalEfficiencyALL.reserve(m_NumROCpoints); | |||
| 531 | FakeEfficiencyALL.reserve(m_NumROCpoints); | |||
| 532 | std::vector<double> SignalEfficiencynoSVD, FakeEfficiencynoSVD; | |||
| 533 | SignalEfficiencynoSVD.reserve(m_NumROCpoints); | |||
| 534 | FakeEfficiencynoSVD.reserve(m_NumROCpoints); | |||
| 535 | ||||
| 536 | TString SignalFiducialCut = SignalVarName + PIDVarName + "noSVD>=0"; // sanity cuts to reject events with NaN | |||
| 537 | TString FakesFiducialCut = FakeVarName + PIDVarName + "noSVD>=0"; | |||
| 538 | TString SignalFiducialCutSlow = "SlowPion" + PIDVarName + "noSVD>=0"; | |||
| 539 | TString FakesFiducialCutSlow = "SlowPion" + PIDVarName + "noSVD>=0"; | |||
| 540 | ||||
| 541 | // calculate efficiencies | |||
| 542 | ||||
| 543 | TCut AllSignalCut = SignalFiducialCut * Form("%sMomentum>%f && %sMomentum<%f", SignalVarName.Data(), m_MomLowROC, | |||
| 544 | SignalVarName.Data(), m_MomHighROC); | |||
| 545 | ||||
| 546 | double AllSignalIntegral, SelectedSignalIntegral; | |||
| 547 | ||||
| 548 | auto DataFrameSignalAll = RDataFrame(*SignalTree).Filter(AllSignalCut.GetTitle()); | |||
| 549 | ||||
| 550 | if (SignalWeightName == "1") { | |||
| 551 | AllSignalIntegral = DataFrameSignalAll.Count().GetValue(); | |||
| 552 | } else { | |||
| 553 | AllSignalIntegral = DataFrameSignalAll.Sum(SignalWeightName).GetValue(); | |||
| 554 | } | |||
| 555 | ||||
| 556 | std::unique_ptr<ROOT::RDF::RNode> DataFrameSlowSignalAll; | |||
| 557 | ||||
| 558 | if (strncmp(SignalVarName.Data(), "PionD", 5) == 0) { | |||
| 559 | TString SignalVarNameSlow = "SlowPion"; | |||
| 560 | TCut AllSignalCutSlow = TCut(SignalFiducialCut) * TCut(SignalFiducialCutSlow) * Form("(%sMomentum>%f && %sMomentum<%f)", | |||
| 561 | SignalVarNameSlow.Data(), m_MomLowROC, SignalVarNameSlow.Data(), m_MomHighROC); | |||
| 562 | DataFrameSlowSignalAll = std::make_unique<ROOT::RDF::RNode>(RDataFrame(*SignalTree).Filter(AllSignalCutSlow.GetTitle())); | |||
| 563 | ||||
| 564 | if (SignalWeightName == "1") { | |||
| 565 | AllSignalIntegral += DataFrameSlowSignalAll->Count().GetValue(); | |||
| 566 | } else { | |||
| 567 | AllSignalIntegral += DataFrameSlowSignalAll->Sum(SignalWeightName).GetValue(); | |||
| 568 | } | |||
| 569 | } | |||
| 570 | ||||
| 571 | for (unsigned int i = 0; i < PIDDetectors.size(); i++) { | |||
| 572 | for (unsigned int j = 0; j < m_NumROCpoints; ++j) { | |||
| 573 | delete gROOT(ROOT::GetROOT())->FindObject("PIDCut"); | |||
| 574 | ||||
| 575 | // scan cut values from 0 to 1, with a denser scan closer to 0 or 1, to get a nicer ROC curve | |||
| 576 | double x = 1. / m_NumROCpoints * j; | |||
| 577 | TString PIDCut = TString::Format("%f", 1. / (1 + TMath::Power(x / (1 - x), -3))); | |||
| 578 | ||||
| 579 | TCut SelectedSignalCut = Form("(%s%s%s > %s)", SignalVarName.Data(), PIDVarName.Data(), PIDDetectors[i].Data(), PIDCut.Data()); | |||
| 580 | ||||
| 581 | if (SignalWeightName == "1") { | |||
| 582 | SelectedSignalIntegral = DataFrameSignalAll.Filter(SelectedSignalCut.GetTitle()).Count().GetValue(); | |||
| 583 | } else { | |||
| 584 | SelectedSignalIntegral = DataFrameSignalAll.Filter(SelectedSignalCut.GetTitle()).Sum(SignalWeightName).GetValue(); | |||
| 585 | } | |||
| 586 | ||||
| 587 | // special treatment for pions: add also the slow pions from Dstar to gain low-momentum coverage | |||
| 588 | if (strncmp(SignalVarName.Data(), "PionD", 5) == 0) { | |||
| 589 | TString SignalVarNameSlow = "SlowPion"; | |||
| 590 | TCut SelectedSignalCutSlow = Form("(%s%s%s > %s)", SignalVarNameSlow.Data(), PIDVarName.Data(), PIDDetectors[i].Data(), | |||
| 591 | PIDCut.Data()); | |||
| 592 | ||||
| 593 | if (SignalWeightName == "1") { | |||
| 594 | SelectedSignalIntegral += DataFrameSlowSignalAll->Filter(SelectedSignalCutSlow.GetTitle()).Count().GetValue(); | |||
| 595 | } else { | |||
| 596 | SelectedSignalIntegral += DataFrameSlowSignalAll->Filter(SelectedSignalCutSlow.GetTitle()).Sum(SignalWeightName).GetValue(); | |||
| 597 | } | |||
| 598 | } | |||
| 599 | ||||
| 600 | if (PIDDetectors[i] == "ALL") { | |||
| 601 | SignalEfficiencyALL.push_back(SelectedSignalIntegral / AllSignalIntegral); | |||
| 602 | } | |||
| 603 | ||||
| 604 | if (PIDDetectors[i] == "noSVD") { | |||
| 605 | SignalEfficiencynoSVD.push_back(SelectedSignalIntegral / AllSignalIntegral); | |||
| 606 | } | |||
| 607 | } | |||
| 608 | } | |||
| 609 | ||||
| 610 | // calculate fake rates | |||
| 611 | ||||
| 612 | TCut AllFakeCut = FakesFiducialCut * Form("%sMomentum>%f && %sMomentum<%f", FakeVarName.Data(), m_MomLowROC, FakeVarName.Data(), | |||
| 613 | m_MomHighROC); | |||
| 614 | ||||
| 615 | double AllFakeIntegral, SelectedFakeIntegral; | |||
| 616 | auto DataFrameFakeAll = RDataFrame(*FakeTree).Filter(AllFakeCut.GetTitle()); | |||
| 617 | ||||
| 618 | if (FakeWeightName == "1") { | |||
| 619 | AllFakeIntegral = DataFrameFakeAll.Count().GetValue(); | |||
| 620 | } else { | |||
| 621 | AllFakeIntegral = DataFrameFakeAll.Sum(FakeWeightName).GetValue(); | |||
| 622 | } | |||
| 623 | ||||
| 624 | std::unique_ptr<ROOT::RDF::RNode> DataFrameSlowFakeAll; | |||
| 625 | ||||
| 626 | // special treatment for pions: add also the slow pions from Dstar to gain low-momentum coverage | |||
| 627 | if (strncmp(FakeVarName.Data(), "PionD", 5) == 0) { | |||
| 628 | ||||
| 629 | TString FakeVarNameSlow = "SlowPion"; | |||
| 630 | TCut AllFakeCutSlow = TCut(FakesFiducialCut) * TCut(FakesFiducialCutSlow) * Form("(%sMomentum>%f && %sMomentum<%f)", | |||
| 631 | FakeVarNameSlow.Data(), m_MomLowROC, FakeVarNameSlow.Data(), m_MomHighROC); | |||
| 632 | DataFrameSlowFakeAll = std::make_unique<ROOT::RDF::RNode>(RDataFrame(*FakeTree).Filter(AllFakeCutSlow.GetTitle())); | |||
| 633 | ||||
| 634 | if (FakeWeightName == "1") { | |||
| 635 | AllFakeIntegral += DataFrameSlowFakeAll->Count().GetValue(); | |||
| 636 | } else { | |||
| 637 | AllFakeIntegral += DataFrameSlowFakeAll->Sum(FakeWeightName).GetValue(); | |||
| 638 | } | |||
| 639 | } | |||
| 640 | ||||
| 641 | for (unsigned int i = 0; i < PIDDetectors.size(); i++) { | |||
| 642 | for (unsigned int j = 0; j < m_NumROCpoints; ++j) { | |||
| 643 | delete gROOT(ROOT::GetROOT())->FindObject("PIDCut"); | |||
| 644 | delete gROOT(ROOT::GetROOT())->FindObject("hAllFakes"); | |||
| 645 | delete gROOT(ROOT::GetROOT())->FindObject("hSelectedFakes"); | |||
| 646 | ||||
| 647 | // scan cut values from 0 to 1, with a denser scan closer to 0 or 1, to get a nicer ROC curve | |||
| 648 | double x = 1. / m_NumROCpoints * j; | |||
| 649 | TString PIDCut = TString::Format("%f", 1. / (1 + TMath::Power(x / (1 - x), -3))); | |||
| 650 | ||||
| 651 | TCut SelectedFakeCut = Form("(%s%s%s > %s)", FakeVarName.Data(), PIDVarName.Data(), PIDDetectors[i].Data(), PIDCut.Data()); | |||
| 652 | ||||
| 653 | if (FakeWeightName == "1") { | |||
| 654 | SelectedFakeIntegral = DataFrameFakeAll.Filter(SelectedFakeCut.GetTitle()).Count().GetValue(); | |||
| 655 | } else { | |||
| 656 | SelectedFakeIntegral = DataFrameFakeAll.Filter(SelectedFakeCut.GetTitle()).Sum(FakeWeightName).GetValue(); | |||
| 657 | } | |||
| 658 | ||||
| 659 | if (strncmp(FakeVarName.Data(), "PionD", 5) == 0) { | |||
| 660 | TString FakeVarNameSlow = "SlowPion"; | |||
| 661 | ||||
| 662 | TCut SelectedFakeCutSlow = Form("(%s%s%s > %s)", FakeVarNameSlow.Data(), PIDVarName.Data(), PIDDetectors[i].Data(), PIDCut.Data()); | |||
| 663 | ||||
| 664 | if (FakeWeightName == "1") { | |||
| 665 | SelectedFakeIntegral += DataFrameSlowFakeAll->Filter(SelectedFakeCutSlow.GetTitle()).Count().GetValue(); | |||
| 666 | } else { | |||
| 667 | SelectedFakeIntegral += DataFrameSlowFakeAll->Filter(SelectedFakeCutSlow.GetTitle()).Sum(FakeWeightName).GetValue(); | |||
| 668 | } | |||
| 669 | } | |||
| 670 | ||||
| 671 | if (PIDDetectors[i] == "ALL") { | |||
| 672 | FakeEfficiencyALL.push_back(SelectedFakeIntegral / AllFakeIntegral); | |||
| 673 | } | |||
| 674 | ||||
| 675 | if (PIDDetectors[i] == "noSVD") { | |||
| 676 | FakeEfficiencynoSVD.push_back(SelectedFakeIntegral / AllFakeIntegral); | |||
| 677 | } | |||
| 678 | } | |||
| 679 | } | |||
| 680 | ||||
| 681 | auto ResultCanvas = new TCanvas("ResultCanvas", "", 600, 400); | |||
| 682 | TMultiGraph* hmgraph = new TMultiGraph(); | |||
| 683 | ||||
| 684 | // efficiency vs fake rate graph | |||
| 685 | TGraph* hgraphALL = new TGraph(m_NumROCpoints, FakeEfficiencyALL.data(), SignalEfficiencyALL.data()); | |||
| 686 | hgraphALL->SetMarkerColor(TColor::GetColor("#2166ac")); | |||
| 687 | hgraphALL->SetMarkerStyle(20); | |||
| 688 | hgraphALL->SetLineColor(TColor::GetColor("#2166ac")); | |||
| 689 | hgraphALL->SetLineWidth(3); | |||
| 690 | hgraphALL->SetDrawOption("AP*"); | |||
| 691 | hgraphALL->SetTitle("with SVD"); | |||
| 692 | ||||
| 693 | TGraph* hgraphnoSVD = new TGraph(m_NumROCpoints, FakeEfficiencynoSVD.data(), SignalEfficiencynoSVD.data()); | |||
| 694 | hgraphnoSVD->SetMarkerColor(TColor::GetColor("#ef8a62")); | |||
| 695 | hgraphnoSVD->SetLineColor(TColor::GetColor("#ef8a62")); | |||
| 696 | hgraphnoSVD->SetLineWidth(3); | |||
| 697 | hgraphnoSVD->SetMarkerStyle(22); | |||
| 698 | hgraphnoSVD->SetDrawOption("P*"); | |||
| 699 | hgraphnoSVD->SetTitle("without SVD"); | |||
| 700 | ||||
| 701 | hmgraph->Add(hgraphALL); | |||
| 702 | hmgraph->Add(hgraphnoSVD); | |||
| 703 | hmgraph->Draw("A"); | |||
| 704 | hmgraph->GetHistogram()->GetXaxis()->SetTitle(FakeVarNameFull + " fake rate"); | |||
| 705 | hmgraph->GetHistogram()->GetYaxis()->SetTitle(SignalVarNameFull + " signal efficiency"); | |||
| 706 | ||||
| 707 | ResultCanvas->BuildLegend(0.6, 0.25, 0.9, 0.5); | |||
| 708 | ResultCanvas->SetGrid(); | |||
| 709 | ||||
| 710 | ResultCanvas->Print("SVDdEdxValidation_ROC_curve_" + SignalVarNameFull + "_vs_" + FakeVarNameFull + PIDVarName + "_MomRange" + | |||
| 711 | std::to_string(m_MomLowROC).substr(0, 3) + "_" + std::to_string(m_MomHighROC).substr(0, 3) + ".pdf"); | |||
| 712 | ||||
| 713 | TFile ResultFile("SVDdEdxValidation_ROC_curve_" + SignalVarNameFull + "_vs_" + FakeVarNameFull + PIDVarName + "_MomRange" + | |||
| 714 | std::to_string(m_MomLowROC).substr(0, 3) + "_" + std::to_string(m_MomHighROC).substr(0, 3) + ".root", | |||
| 715 | "RECREATE"); | |||
| 716 | hmgraph->Write(); | |||
| 717 | ResultFile.Close(); | |||
| 718 | ||||
| 719 | delete ResultCanvas; | |||
| 720 | } | |||
| 721 | ||||
| 722 | TTree* SVDdEdxValidationAlgorithm::LambdaMassFit(std::shared_ptr<TTree> preselTree) | |||
| 723 | { | |||
| 724 | B2INFO("Configuring the Lambda fit...")do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream ; varStream << "Configuring the Lambda fit..."; Belle2:: LogSystem::Instance().sendMessage(Belle2::LogMessage(Belle2:: LogConfig::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__ , "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 724, 0 )); }; } } while(false); | |||
| 725 | gROOT(ROOT::GetROOT())->SetBatch(true); | |||
| 726 | RooMsgService::instance().setGlobalKillBelow(RooFit::WARNING); | |||
| 727 | ||||
| 728 | RooRealVar InvM("InvM", "m(p^{+}#pi^{-})", 1.1, 1.13, "GeV/c^{2}"); | |||
| 729 | ||||
| 730 | RooRealVar ProtonMomentum("ProtonMomentum", "momentum for p", -1.e8, 1.e8); | |||
| 731 | RooRealVar ProtonSVDdEdx("ProtonSVDdEdx", "", -1.e8, 1.e8); | |||
| 732 | ||||
| 733 | RooRealVar exp("exp", "experiment number", 0, 1.e5); | |||
| 734 | RooRealVar run("run", "run number", 0, 1.e7); | |||
| 735 | ||||
| 736 | RooRealVar ProtonProtonIDALL("ProtonProtonIDALL", "", -1.e8, 1.e8); | |||
| 737 | RooRealVar ProtonProtonIDSVDonly("ProtonProtonIDSVDonly", "", -1.e8, 1.e8); | |||
| 738 | RooRealVar ProtonProtonIDnoSVD("ProtonProtonIDnoSVD", "", -1.e8, 1.e8); | |||
| 739 | ||||
| 740 | RooRealVar ProtonElectronLLSVDonly("ProtonElectronLLSVDonly", "", -1.e8, 1.e8); | |||
| 741 | RooRealVar ProtonPionLLSVDonly("ProtonPionLLSVDonly", "", -1.e8, 1.e8); | |||
| 742 | RooRealVar ProtonKaonLLSVDonly("ProtonKaonLLSVDonly", "", -1.e8, 1.e8); | |||
| 743 | RooRealVar ProtonProtonLLSVDonly("ProtonProtonLLSVDonly", "", -1.e8, 1.e8); | |||
| 744 | ||||
| 745 | RooRealVar ProtonBinaryProtonPionIDALL("ProtonBinaryProtonPionIDALL", "", -1.e8, 1.e8); | |||
| 746 | RooRealVar ProtonBinaryProtonPionIDSVDonly("ProtonBinaryProtonPionIDSVDonly", "", -1.e8, 1.e8); | |||
| 747 | RooRealVar ProtonBinaryProtonPionIDnoSVD("ProtonBinaryProtonPionIDnoSVD", "", -1.e8, 1.e8); | |||
| 748 | ||||
| 749 | RooRealVar ProtonBinaryProtonKaonIDALL("ProtonBinaryProtonKaonIDALL", "", -1.e8, 1.e8); | |||
| 750 | RooRealVar ProtonBinaryProtonKaonIDSVDonly("ProtonBinaryProtonKaonIDSVDonly", "", -1.e8, 1.e8); | |||
| 751 | RooRealVar ProtonBinaryProtonKaonIDnoSVD("ProtonBinaryProtonKaonIDnoSVD", "", -1.e8, 1.e8); | |||
| 752 | ||||
| 753 | RooRealVar ProtonBinaryProtonElectronIDALL("ProtonBinaryProtonElectronIDALL", "", -1.e8, 1.e8); | |||
| 754 | RooRealVar ProtonBinaryProtonElectronIDSVDonly("ProtonBinaryProtonElectronIDSVDonly", "", -1.e8, 1.e8); | |||
| 755 | RooRealVar ProtonBinaryProtonElectronIDnoSVD("ProtonBinaryProtonElectronIDnoSVD", "", -1.e8, 1.e8); | |||
| 756 | ||||
| 757 | RooRealVar ProtonBinaryPionProtonIDALL("ProtonBinaryPionProtonIDALL", "", -1.e8, 1.e8); | |||
| 758 | RooRealVar ProtonBinaryPionProtonIDSVDonly("ProtonBinaryPionProtonIDSVDonly", "", -1.e8, 1.e8); | |||
| 759 | RooRealVar ProtonBinaryPionProtonIDnoSVD("ProtonBinaryPionProtonIDnoSVD", "", -1.e8, 1.e8); | |||
| 760 | ||||
| 761 | RooRealVar ProtonBinaryKaonProtonIDALL("ProtonBinaryKaonProtonIDALL", "", -1.e8, 1.e8); | |||
| 762 | RooRealVar ProtonBinaryKaonProtonIDSVDonly("ProtonBinaryKaonProtonIDSVDonly", "", -1.e8, 1.e8); | |||
| 763 | RooRealVar ProtonBinaryKaonProtonIDnoSVD("ProtonBinaryKaonProtonIDnoSVD", "", -1.e8, 1.e8); | |||
| 764 | ||||
| 765 | RooRealVar ProtonBinaryElectronProtonIDALL("ProtonBinaryElectronProtonIDALL", "", -1.e8, 1.e8); | |||
| 766 | RooRealVar ProtonBinaryElectronProtonIDSVDonly("ProtonBinaryElectronProtonIDSVDonly", "", -1.e8, 1.e8); | |||
| 767 | RooRealVar ProtonBinaryElectronProtonIDnoSVD("ProtonBinaryElectronProtonIDnoSVD", "", -1.e8, 1.e8); | |||
| 768 | ||||
| 769 | auto variables = new RooArgSet(); | |||
| 770 | ||||
| 771 | variables->add(InvM); | |||
| 772 | ||||
| 773 | variables->add(ProtonMomentum); | |||
| 774 | variables->add(ProtonSVDdEdx); | |||
| 775 | variables->add(exp); | |||
| 776 | variables->add(run); | |||
| 777 | ||||
| 778 | variables->add(ProtonProtonIDALL); | |||
| 779 | variables->add(ProtonProtonIDSVDonly); | |||
| 780 | variables->add(ProtonProtonIDnoSVD); | |||
| 781 | variables->add(ProtonElectronLLSVDonly); | |||
| 782 | variables->add(ProtonPionLLSVDonly); | |||
| 783 | variables->add(ProtonKaonLLSVDonly); | |||
| 784 | variables->add(ProtonProtonLLSVDonly); | |||
| 785 | variables->add(ProtonBinaryProtonPionIDALL); | |||
| 786 | variables->add(ProtonBinaryProtonPionIDSVDonly); | |||
| 787 | variables->add(ProtonBinaryProtonPionIDnoSVD); | |||
| 788 | variables->add(ProtonBinaryProtonKaonIDALL); | |||
| 789 | variables->add(ProtonBinaryProtonKaonIDSVDonly); | |||
| 790 | variables->add(ProtonBinaryProtonKaonIDnoSVD); | |||
| 791 | variables->add(ProtonBinaryProtonElectronIDALL); | |||
| 792 | variables->add(ProtonBinaryProtonElectronIDSVDonly); | |||
| 793 | variables->add(ProtonBinaryProtonElectronIDnoSVD); | |||
| 794 | variables->add(ProtonBinaryPionProtonIDALL); | |||
| 795 | variables->add(ProtonBinaryPionProtonIDSVDonly); | |||
| 796 | variables->add(ProtonBinaryPionProtonIDnoSVD); | |||
| 797 | variables->add(ProtonBinaryKaonProtonIDALL); | |||
| 798 | variables->add(ProtonBinaryKaonProtonIDSVDonly); | |||
| 799 | variables->add(ProtonBinaryKaonProtonIDnoSVD); | |||
| 800 | variables->add(ProtonBinaryElectronProtonIDALL); | |||
| 801 | variables->add(ProtonBinaryElectronProtonIDSVDonly); | |||
| 802 | variables->add(ProtonBinaryElectronProtonIDnoSVD); | |||
| 803 | ||||
| 804 | RooDataSet* LambdaDataset = new RooDataSet("LambdaDataset", "LambdaDataset", *variables, Import(*preselTree)); | |||
| 805 | ||||
| 806 | if (LambdaDataset->sumEntries() == 0) { | |||
| 807 | B2FATAL("The Lambda dataset is empty, stopping here")do { { LogVariableStream varStream; varStream << "The Lambda dataset is empty, stopping here" ; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage (Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__ , "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 807, 0 )); }; exit(1); } while(false); | |||
| 808 | } | |||
| 809 | ||||
| 810 | // the signal PDF; might be revisited at a later point | |||
| 811 | ||||
| 812 | RooRealVar GaussMean("GaussMean", " GaussMean", 1.116, 1.111, 1.12); | |||
| 813 | RooRealVar GaussSigma("GaussSigma", "#sigma_{1}", 3.e-3, 3.e-5, 10.e-3); | |||
| 814 | RooGaussian LambdaGauss("LambdaGauss", "LambdaGauss", InvM, GaussMean, GaussSigma); | |||
| 815 | ||||
| 816 | /* temporary RooRealVar sigmaBifurGaussL1 and sigmaBifurGaussR1 to replace | |||
| 817 | * RooRealVar resolutionParamL("resolutionParamL", "resolutionParamL", 0.4, 5.e-4, 1.0); | |||
| 818 | * RooRealVar resolutionParamR("resolutionParamR", "resolutionParamR", 0.4, 5.e-4, 1.0); | |||
| 819 | * RooFormulaVar sigmaBifurGaussL1("sigmaBifurGaussL1", "resolutionParamL*GaussSigma", RooArgSet(resolutionParamL, GaussSigma)); | |||
| 820 | * RooFormulaVar sigmaBifurGaussR1("sigmaBifurGaussR1", "resolutionParamR*GaussSigma", RooArgSet(resolutionParamR, GaussSigma)); | |||
| 821 | */ | |||
| 822 | RooRealVar sigmaBifurGaussL1("sigmaBifurGaussL1", "sigma left", 0.4 * 3.e-3, 3.e-5, 10.e-3); | |||
| 823 | RooRealVar sigmaBifurGaussR1("sigmaBifurGaussR1", "sigma right", 0.4 * 3.e-3, 3.e-5, 10.e-3); | |||
| 824 | RooBifurGauss LambdaBifurGauss("LambdaBifurGauss", "LambdaBifurGauss", InvM, GaussMean, sigmaBifurGaussL1, sigmaBifurGaussR1); | |||
| 825 | ||||
| 826 | /* temporary RooRealVar sigmaBifurGaussL2 to replace | |||
| 827 | * RooRealVar resolutionParam2("resolutionParam2", "resolutionParam2", 0.2, 5.e-4, 1.0); | |||
| 828 | * sigmaBifurGaussL2("sigmaBifurGaussL2", "resolutionParam2*GaussSigma", RooArgSet(resolutionParam2, GaussSigma)); | |||
| 829 | */ | |||
| 830 | RooRealVar sigmaBifurGaussL2("sigmaBifurGaussL2", "sigmaBifurGaussL2", 0.2 * 3.e-3, 3.e-5, 10.e-3); | |||
| 831 | RooGaussian LambdaBifurGauss2("LambdaBifurGauss2", "LambdaBifurGauss2", InvM, GaussMean, sigmaBifurGaussL2); | |||
| 832 | ||||
| 833 | RooRealVar fracBifurGaussYield("fracBifurGaussYield", "fracBifurGaussYield", 0.3, 5.e-4, 1.0); | |||
| 834 | RooRealVar fracGaussYield("fracGaussYield", "fracGaussYield", 0.8, 5.e-4, 1.0); | |||
| 835 | ||||
| 836 | RooAddPdf LambdaCombinedBifurGauss("LambdaCombinedBifurGauss", "LambdaBifurGauss + LambdaBifurGauss2 ", RooArgList(LambdaBifurGauss, | |||
| 837 | LambdaBifurGauss2), RooArgList(fracBifurGaussYield)); | |||
| 838 | ||||
| 839 | RooAddPdf LambdaSignalPDF("LambdaSignalPDF", "LambdaCombinedBifurGauss + LambdaGauss", RooArgList(LambdaCombinedBifurGauss, | |||
| 840 | LambdaGauss), RooArgList(fracGaussYield)); | |||
| 841 | ||||
| 842 | // Background PDF | |||
| 843 | RooRealVar BkgPolyCoef0("BkgPolyCoef0", "BkgPolyCoef0", 0.1, 0., 1.5); | |||
| 844 | RooRealVar BkgPolyCoef1("BkgPolyCoef1", "BkgPolyCoef1", -0.5, -1.5, -1.e-3); | |||
| 845 | RooChebychev BkgPolyPDF("BkgPolyPDF", "BkgPolyPDF", InvM, RooArgList(BkgPolyCoef0, BkgPolyCoef1)); | |||
| 846 | ||||
| 847 | RooRealVar nSignalLambda("nSignalLambda", "nSignalLambda", 0.6 * preselTree->GetEntries(), 0., 0.99 * preselTree->GetEntries()); | |||
| 848 | RooRealVar nBkgLambda("nBkgLambda", "nBkgLambda", 0.4 * preselTree->GetEntries(), 0., 0.99 * preselTree->GetEntries()); | |||
| 849 | RooAddPdf totalPDFLambda("totalPDFLambda", "totalPDFLambda pdf", RooArgList(LambdaSignalPDF, BkgPolyPDF), | |||
| 850 | RooArgList(nSignalLambda, nBkgLambda)); | |||
| 851 | ||||
| 852 | B2INFO("Lambda: Start fitting...")do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream ; varStream << "Lambda: Start fitting..."; Belle2::LogSystem ::Instance().sendMessage(Belle2::LogMessage(Belle2::LogConfig ::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc" , 852, 0)); }; } } while(false); | |||
| 853 | RooFitResult* LambdaFitResult = totalPDFLambda.fitTo(*LambdaDataset, Save(kTRUE), PrintLevel(-1)); | |||
| 854 | ||||
| 855 | int status = LambdaFitResult->status(); | |||
| 856 | int covqual = LambdaFitResult->covQual(); | |||
| 857 | double diff = nSignalLambda.getValV() + nBkgLambda.getValV() - LambdaDataset->sumEntries(); | |||
| 858 | ||||
| 859 | B2INFO("Lambda: Fit status: " << status << "; covariance quality: " << covqual)do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream ; varStream << "Lambda: Fit status: " << status << "; covariance quality: " << covqual; Belle2::LogSystem ::Instance().sendMessage(Belle2::LogMessage(Belle2::LogConfig ::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc" , 859, 0)); }; } } while(false); | |||
| 860 | // if the fit is not healthy, try again once before giving up, with a slightly different setup: | |||
| 861 | if ((status > 0) || (TMath::Abs(diff) > 1.) || (nSignalLambda.getError() < sqrt(nSignalLambda.getValV())) | |||
| 862 | || (nSignalLambda.getError() > (nSignalLambda.getValV()))) { | |||
| 863 | ||||
| 864 | LambdaFitResult = totalPDFLambda.fitTo(*LambdaDataset, Save(), Strategy(2), Offset(1)); | |||
| 865 | status = LambdaFitResult->status(); | |||
| 866 | covqual = LambdaFitResult->covQual(); | |||
| 867 | diff = nSignalLambda.getValV() + nBkgLambda.getValV() - LambdaDataset->sumEntries(); | |||
| 868 | } | |||
| 869 | ||||
| 870 | if ((status > 0) || (TMath::Abs(diff) > 1.) || (nSignalLambda.getError() < sqrt(nSignalLambda.getValV())) | |||
| 871 | || (nSignalLambda.getError() > (nSignalLambda.getValV()))) { | |||
| 872 | B2WARNING("Lambda: Fit problem: fit status " << status << "; sum of component yields minus the dataset yield is " << diff <<do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Warning, 0, "svd")) { { LogVariableStream varStream ; varStream << "Lambda: Fit problem: fit status " << status << "; sum of component yields minus the dataset yield is " << diff << "; signal yield is " << nSignalLambda .getValV() << ", while its uncertainty is " << nSignalLambda .getError(); Belle2::LogSystem::Instance().sendMessage(Belle2 ::LogMessage(Belle2::LogConfig::c_Warning, std::move(varStream ), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc" , 873, 0)); }; } } while(false) | |||
| 873 | "; signal yield is " << nSignalLambda.getValV() << ", while its uncertainty is " << nSignalLambda.getError())do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Warning, 0, "svd")) { { LogVariableStream varStream ; varStream << "Lambda: Fit problem: fit status " << status << "; sum of component yields minus the dataset yield is " << diff << "; signal yield is " << nSignalLambda .getValV() << ", while its uncertainty is " << nSignalLambda .getError(); Belle2::LogSystem::Instance().sendMessage(Belle2 ::LogMessage(Belle2::LogConfig::c_Warning, std::move(varStream ), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc" , 873, 0)); }; } } while(false); | |||
| 874 | } | |||
| 875 | if (covqual < 2) { | |||
| 876 | B2INFO("Lambda: Fit warning: covariance quality " << covqual)do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream ; varStream << "Lambda: Fit warning: covariance quality " << covqual; Belle2::LogSystem::Instance().sendMessage( Belle2::LogMessage(Belle2::LogConfig::c_Info, std::move(varStream ), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc" , 876, 0)); }; } } while(false); | |||
| 877 | } | |||
| 878 | ||||
| 879 | TCanvas* canvLambda = new TCanvas("canvLambda", "canvLambda"); | |||
| 880 | RooPlot* LambdaFitFrame = LambdaDataset->plotOn(InvM.frame(130)); | |||
| 881 | totalPDFLambda.plotOn(LambdaFitFrame, LineColor(TColor::GetColor("#4575b4"))); | |||
| 882 | ||||
| 883 | double chisquare = LambdaFitFrame->chiSquare(); | |||
| 884 | B2INFO("Lambda: Fit chi2 = " << chisquare)do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream ; varStream << "Lambda: Fit chi2 = " << chisquare ; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage (Belle2::LogConfig::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__ , "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 884, 0 )); }; } } while(false); | |||
| 885 | totalPDFLambda.paramOn(LambdaFitFrame, Layout(0.6, 0.96, 0.93), Format("NEU", AutoPrecision(2))); | |||
| 886 | LambdaFitFrame->getAttText()->SetTextSize(0.03); | |||
| 887 | ||||
| 888 | totalPDFLambda.plotOn(LambdaFitFrame, Components("LambdaSignalPDF"), LineColor(TColor::GetColor("#d73027"))); | |||
| 889 | totalPDFLambda.plotOn(LambdaFitFrame, Components("BkgPolyPDF"), LineColor(TColor::GetColor("#fc8d59"))); | |||
| 890 | totalPDFLambda.plotOn(LambdaFitFrame, LineColor(TColor::GetColor("#4575b4"))); | |||
| 891 | ||||
| 892 | LambdaFitFrame->GetXaxis()->SetTitle("m(p#pi^{-}) (GeV/c^{2})"); | |||
| 893 | ||||
| 894 | LambdaFitFrame->Draw(); | |||
| 895 | ||||
| 896 | if (m_isMakePlots) { | |||
| 897 | canvLambda->Print("SVDdEdxValidationFitLambda.pdf"); | |||
| 898 | TFile LambdaFitPlotFile("SVDdEdxValidationLambdaFitPlotFile.root", "RECREATE"); | |||
| 899 | canvLambda->Write(); | |||
| 900 | LambdaFitPlotFile.Close(); | |||
| 901 | } | |||
| 902 | RooStats::SPlot* sPlotDatasetLambda = new RooStats::SPlot("sData", "An SPlot", *LambdaDataset, &totalPDFLambda, | |||
| 903 | RooArgList(nSignalLambda, nBkgLambda)); | |||
| 904 | ||||
| 905 | for (int iEvt = 0; iEvt < 5; iEvt++) { | |||
| 906 | if (TMath::Abs(sPlotDatasetLambda->GetSWeight(iEvt, "nSignalLambda") + sPlotDatasetLambda->GetSWeight(iEvt, | |||
| 907 | "nBkgLambda") - 1) > 5.e-3) | |||
| 908 | B2FATAL("Lambda: sPlot error: sum of weights not equal to 1")do { { LogVariableStream varStream; varStream << "Lambda: sPlot error: sum of weights not equal to 1" ; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage (Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__ , "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 908, 0 )); }; exit(1); } while(false); | |||
| 909 | } | |||
| 910 | ||||
| 911 | TTree* treeLambda_sw = LambdaDataset->GetClonedTree(); | |||
| 912 | treeLambda_sw->SetName("treeLambda_sw"); | |||
| 913 | ||||
| 914 | B2INFO("Lambda: sPlot done. ")do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream ; varStream << "Lambda: sPlot done. "; Belle2::LogSystem ::Instance().sendMessage(Belle2::LogMessage(Belle2::LogConfig ::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc" , 914, 0)); }; } } while(false); | |||
| 915 | ||||
| 916 | return treeLambda_sw; | |||
| 917 | } | |||
| 918 | ||||
| 919 | TTree* SVDdEdxValidationAlgorithm::DstarMassFit(std::shared_ptr<TTree> preselTree) | |||
| 920 | { | |||
| 921 | B2INFO("Configuring the Dstar fit...")do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream ; varStream << "Configuring the Dstar fit..."; Belle2:: LogSystem::Instance().sendMessage(Belle2::LogMessage(Belle2:: LogConfig::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__ , "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 921, 0 )); }; } } while(false); | |||
| 922 | gROOT(ROOT::GetROOT())->SetBatch(true); | |||
| 923 | RooMsgService::instance().setGlobalKillBelow(RooFit::WARNING); | |||
| 924 | ||||
| 925 | RooRealVar deltaM("deltaM", "m(D*)-m(D^{0})", 0.139545, 0.151, "GeV/c^{2}"); | |||
| 926 | ||||
| 927 | RooRealVar KaonMomentum("KaonMomentum", "momentum for Kaon(GeV)", -1.e8, 1.e8); | |||
| 928 | RooRealVar KaonSVDdEdx("KaonSVDdEdx", "", -1.e8, 1.e8); | |||
| 929 | RooRealVar PionDMomentum("PionDMomentum", "momentum for pion(GeV)", -1.e8, 1.e8); | |||
| 930 | RooRealVar PionDSVDdEdx("PionDSVDdEdx", "", -1.e8, 1.e8); | |||
| 931 | RooRealVar SlowPionMomentum("SlowPionMomentum", "momentum for slow pion(GeV)", -1.e8, 1.e8); | |||
| 932 | RooRealVar SlowPionSVDdEdx("SlowPionSVDdEdx", "", -1.e8, 1.e8); | |||
| 933 | ||||
| 934 | RooRealVar exp("exp", "experiment number", 0, 1.e5); | |||
| 935 | RooRealVar run("run", "run number", 0, 1.e8); | |||
| 936 | ||||
| 937 | RooRealVar KaonKaonIDALL("KaonKaonIDALL", "", -1.e8, 1.e8); | |||
| 938 | RooRealVar KaonKaonIDSVDonly("KaonKaonIDSVDonly", "", -1.e8, 1.e8); | |||
| 939 | RooRealVar KaonKaonIDnoSVD("KaonKaonIDnoSVD", "", -1.e8, 1.e8); | |||
| 940 | ||||
| 941 | RooRealVar KaonPionIDALL("KaonPionIDALL", "", -1.e8, 1.e8); | |||
| 942 | RooRealVar KaonPionIDSVDonly("KaonPionIDSVDonly", "", -1.e8, 1.e8); | |||
| 943 | RooRealVar KaonPionIDnoSVD("KaonPionIDnoSVD", "", -1.e8, 1.e8); | |||
| 944 | ||||
| 945 | RooRealVar KaonProtonIDALL("KaonProtonIDALL", "", -1.e8, 1.e8); | |||
| 946 | RooRealVar KaonProtonIDSVDonly("KaonProtonIDSVDonly", "", -1.e8, 1.e8); | |||
| 947 | RooRealVar KaonProtonIDnoSVD("KaonProtonIDnoSVD", "", -1.e8, 1.e8); | |||
| 948 | ||||
| 949 | RooRealVar KaonElectronIDALL("KaonElectronIDALL", "", -1.e8, 1.e8); | |||
| 950 | RooRealVar KaonElectronIDSVDonly("KaonElectronIDSVDonly", "", -1.e8, 1.e8); | |||
| 951 | RooRealVar KaonElectronIDnoSVD("KaonElectronIDnoSVD", "", -1.e8, 1.e8); | |||
| 952 | ||||
| 953 | RooRealVar KaonElectronLLSVDonly("KaonElectronLLSVDonly", "", -1.e8, 1.e8); | |||
| 954 | RooRealVar KaonPionLLSVDonly("KaonPionLLSVDonly", "", -1.e8, 1.e8); | |||
| 955 | RooRealVar KaonKaonLLSVDonly("KaonKaonLLSVDonly", "", -1.e8, 1.e8); | |||
| 956 | RooRealVar KaonProtonLLSVDonly("KaonProtonLLSVDonly", "", -1.e8, 1.e8); | |||
| 957 | ||||
| 958 | RooRealVar KaonBinaryKaonPionIDALL("KaonBinaryKaonPionIDALL", "", -1.e8, 1.e8); | |||
| 959 | RooRealVar KaonBinaryKaonPionIDSVDonly("KaonBinaryKaonPionIDSVDonly", "", -1.e8, 1.e8); | |||
| 960 | RooRealVar KaonBinaryKaonPionIDnoSVD("KaonBinaryKaonPionIDnoSVD", "", -1.e8, 1.e8); | |||
| 961 | ||||
| 962 | RooRealVar KaonBinaryPionKaonIDALL("KaonBinaryPionKaonIDALL", "", -1.e8, 1.e8); | |||
| 963 | RooRealVar KaonBinaryPionKaonIDSVDonly("KaonBinaryPionKaonIDSVDonly", "", -1.e8, 1.e8); | |||
| 964 | RooRealVar KaonBinaryPionKaonIDnoSVD("KaonBinaryPionKaonIDnoSVD", "", -1.e8, 1.e8); | |||
| 965 | ||||
| 966 | RooRealVar KaonBinaryProtonKaonIDALL("KaonBinaryProtonKaonIDALL", "", -1.e8, 1.e8); | |||
| 967 | RooRealVar KaonBinaryProtonKaonIDSVDonly("KaonBinaryProtonKaonIDSVDonly", "", -1.e8, 1.e8); | |||
| 968 | RooRealVar KaonBinaryProtonKaonIDnoSVD("KaonBinaryProtonKaonIDnoSVD", "", -1.e8, 1.e8); | |||
| 969 | ||||
| 970 | RooRealVar KaonBinaryElectronKaonIDALL("KaonBinaryElectronKaonIDALL", "", -1.e8, 1.e8); | |||
| 971 | RooRealVar KaonBinaryElectronKaonIDSVDonly("KaonBinaryElectronKaonIDSVDonly", "", -1.e8, 1.e8); | |||
| 972 | RooRealVar KaonBinaryElectronKaonIDnoSVD("KaonBinaryElectronKaonIDnoSVD", "", -1.e8, 1.e8); | |||
| 973 | ||||
| 974 | RooRealVar PionDKaonIDALL("PionDKaonIDALL", "", -1.e8, 1.e8); | |||
| 975 | RooRealVar PionDKaonIDSVDonly("PionDKaonIDSVDonly", "", -1.e8, 1.e8); | |||
| 976 | RooRealVar PionDKaonIDnoSVD("PionDKaonIDnoSVD", "", -1.e8, 1.e8); | |||
| 977 | ||||
| 978 | RooRealVar PionDPionIDALL("PionDPionIDALL", "", -1.e8, 1.e8); | |||
| 979 | RooRealVar PionDPionIDSVDonly("PionDPionIDSVDonly", "", -1.e8, 1.e8); | |||
| 980 | RooRealVar PionDPionIDnoSVD("PionDPionIDnoSVD", "", -1.e8, 1.e8); | |||
| 981 | ||||
| 982 | RooRealVar PionDElectronIDALL("PionDElectronIDALL", "", -1.e8, 1.e8); | |||
| 983 | RooRealVar PionDElectronIDSVDonly("PionDElectronIDSVDonly", "", -1.e8, 1.e8); | |||
| 984 | RooRealVar PionDElectronIDnoSVD("PionDElectronIDnoSVD", "", -1.e8, 1.e8); | |||
| 985 | ||||
| 986 | RooRealVar PionDProtonIDALL("PionDProtonIDALL", "", -1.e8, 1.e8); | |||
| 987 | RooRealVar PionDProtonIDSVDonly("PionDProtonIDSVDonly", "", -1.e8, 1.e8); | |||
| 988 | RooRealVar PionDProtonIDnoSVD("PionDProtonIDnoSVD", "", -1.e8, 1.e8); | |||
| 989 | ||||
| 990 | RooRealVar PionDElectronLLSVDonly("PionDElectronLLSVDonly", "", -1.e8, 1.e8); | |||
| 991 | RooRealVar PionDPionLLSVDonly("PionDPionLLSVDonly", "", -1.e8, 1.e8); | |||
| 992 | RooRealVar PionDKaonLLSVDonly("PionDKaonLLSVDonly", "", -1.e8, 1.e8); | |||
| 993 | RooRealVar PionDProtonLLSVDonly("PionDProtonLLSVDonly", "", -1.e8, 1.e8); | |||
| 994 | ||||
| 995 | RooRealVar PionDBinaryPionKaonIDALL("PionDBinaryPionKaonIDALL", "", -1.e8, 1.e8); | |||
| 996 | RooRealVar PionDBinaryPionKaonIDSVDonly("PionDBinaryPionKaonIDSVDonly", "", -1.e8, 1.e8); | |||
| 997 | RooRealVar PionDBinaryPionKaonIDnoSVD("PionDBinaryPionKaonIDnoSVD", "", -1.e8, 1.e8); | |||
| 998 | ||||
| 999 | RooRealVar PionDBinaryKaonPionIDALL("PionDBinaryKaonPionIDALL", "", -1.e8, 1.e8); | |||
| 1000 | RooRealVar PionDBinaryKaonPionIDSVDonly("PionDBinaryKaonPionIDSVDonly", "", -1.e8, 1.e8); | |||
| 1001 | RooRealVar PionDBinaryKaonPionIDnoSVD("PionDBinaryKaonPionIDnoSVD", "", -1.e8, 1.e8); | |||
| 1002 | ||||
| 1003 | RooRealVar PionDBinaryProtonPionIDALL("PionDBinaryProtonPionIDALL", "", -1.e8, 1.e8); | |||
| 1004 | RooRealVar PionDBinaryProtonPionIDSVDonly("PionDBinaryProtonPionIDSVDonly", "", -1.e8, 1.e8); | |||
| 1005 | RooRealVar PionDBinaryProtonPionIDnoSVD("PionDBinaryProtonPionIDnoSVD", "", -1.e8, 1.e8); | |||
| 1006 | ||||
| 1007 | RooRealVar PionDBinaryElectronPionIDALL("PionDBinaryElectronPionIDALL", "", -1.e8, 1.e8); | |||
| 1008 | RooRealVar PionDBinaryElectronPionIDSVDonly("PionDBinaryElectronPionIDSVDonly", "", -1.e8, 1.e8); | |||
| 1009 | RooRealVar PionDBinaryElectronPionIDnoSVD("PionDBinaryElectronPionIDnoSVD", "", -1.e8, 1.e8); | |||
| 1010 | ||||
| 1011 | RooRealVar SlowPionKaonIDALL("SlowPionKaonIDALL", "", -1.e8, 1.e8); | |||
| 1012 | RooRealVar SlowPionKaonIDSVDonly("SlowPionKaonIDSVDonly", "", -1.e8, 1.e8); | |||
| 1013 | RooRealVar SlowPionKaonIDnoSVD("SlowPionKaonIDnoSVD", "", -1.e8, 1.e8); | |||
| 1014 | ||||
| 1015 | RooRealVar SlowPionPionIDALL("SlowPionPionIDALL", "", -1.e8, 1.e8); | |||
| 1016 | RooRealVar SlowPionPionIDSVDonly("SlowPionPionIDSVDonly", "", -1.e8, 1.e8); | |||
| 1017 | RooRealVar SlowPionPionIDnoSVD("SlowPionPionIDnoSVD", "", -1.e8, 1.e8); | |||
| 1018 | ||||
| 1019 | RooRealVar SlowPionElectronIDALL("SlowPionElectronIDALL", "", -1.e8, 1.e8); | |||
| 1020 | RooRealVar SlowPionElectronIDSVDonly("SlowPionElectronIDSVDonly", "", -1.e8, 1.e8); | |||
| 1021 | RooRealVar SlowPionElectronIDnoSVD("SlowPionElectronIDnoSVD", "", -1.e8, 1.e8); | |||
| 1022 | ||||
| 1023 | RooRealVar SlowPionProtonIDALL("SlowPionProtonIDALL", "", -1.e8, 1.e8); | |||
| 1024 | RooRealVar SlowPionProtonIDSVDonly("SlowPionProtonIDSVDonly", "", -1.e8, 1.e8); | |||
| 1025 | RooRealVar SlowPionProtonIDnoSVD("SlowPionProtonIDnoSVD", "", -1.e8, 1.e8); | |||
| 1026 | ||||
| 1027 | RooRealVar SlowPionElectronLLSVDonly("SlowPionElectronLLSVDonly", "", -1.e8, 1.e8); | |||
| 1028 | RooRealVar SlowPionPionLLSVDonly("SlowPionPionLLSVDonly", "", -1.e8, 1.e8); | |||
| 1029 | RooRealVar SlowPionKaonLLSVDonly("SlowPionKaonLLSVDonly", "", -1.e8, 1.e8); | |||
| 1030 | RooRealVar SlowPionProtonLLSVDonly("SlowPionProtonLLSVDonly", "", -1.e8, 1.e8); | |||
| 1031 | ||||
| 1032 | RooRealVar SlowPionBinaryPionKaonIDALL("SlowPionBinaryPionKaonIDALL", "", -1.e8, 1.e8); | |||
| 1033 | RooRealVar SlowPionBinaryPionKaonIDSVDonly("SlowPionBinaryPionKaonIDSVDonly", "", -1.e8, 1.e8); | |||
| 1034 | RooRealVar SlowPionBinaryPionKaonIDnoSVD("SlowPionBinaryPionKaonIDnoSVD", "", -1.e8, 1.e8); | |||
| 1035 | ||||
| 1036 | RooRealVar SlowPionBinaryKaonPionIDALL("SlowPionBinaryKaonPionIDALL", "", -1.e8, 1.e8); | |||
| 1037 | RooRealVar SlowPionBinaryKaonPionIDSVDonly("SlowPionBinaryKaonPionIDSVDonly", "", -1.e8, 1.e8); | |||
| 1038 | RooRealVar SlowPionBinaryKaonPionIDnoSVD("SlowPionBinaryKaonPionIDnoSVD", "", -1.e8, 1.e8); | |||
| 1039 | ||||
| 1040 | RooRealVar SlowPionBinaryProtonPionIDALL("SlowPionBinaryProtonPionIDALL", "", -1.e8, 1.e8); | |||
| 1041 | RooRealVar SlowPionBinaryProtonPionIDSVDonly("SlowPionBinaryProtonPionIDSVDonly", "", -1.e8, 1.e8); | |||
| 1042 | RooRealVar SlowPionBinaryProtonPionIDnoSVD("SlowPionBinaryProtonPionIDnoSVD", "", -1.e8, 1.e8); | |||
| 1043 | ||||
| 1044 | RooRealVar SlowPionBinaryElectronPionIDALL("SlowPionBinaryElectronPionIDALL", "", -1.e8, 1.e8); | |||
| 1045 | RooRealVar SlowPionBinaryElectronPionIDSVDonly("SlowPionBinaryElectronPionIDSVDonly", "", -1.e8, 1.e8); | |||
| 1046 | RooRealVar SlowPionBinaryElectronPionIDnoSVD("SlowPionBinaryElectronPionIDnoSVD", "", -1.e8, 1.e8); | |||
| 1047 | ||||
| 1048 | auto variables = new RooArgSet(); | |||
| 1049 | variables->add(deltaM); | |||
| 1050 | variables->add(KaonMomentum); | |||
| 1051 | variables->add(KaonSVDdEdx); | |||
| 1052 | variables->add(PionDMomentum); | |||
| 1053 | variables->add(PionDSVDdEdx); | |||
| 1054 | variables->add(SlowPionMomentum); | |||
| 1055 | variables->add(SlowPionSVDdEdx); | |||
| 1056 | variables->add(exp); | |||
| 1057 | variables->add(run); | |||
| 1058 | ||||
| 1059 | variables->add(KaonKaonIDALL); | |||
| 1060 | variables->add(KaonKaonIDSVDonly); | |||
| 1061 | variables->add(KaonKaonIDnoSVD); | |||
| 1062 | variables->add(KaonPionIDALL); | |||
| 1063 | variables->add(KaonPionIDSVDonly); | |||
| 1064 | variables->add(KaonPionIDnoSVD); | |||
| 1065 | variables->add(KaonProtonIDALL); | |||
| 1066 | variables->add(KaonProtonIDSVDonly); | |||
| 1067 | variables->add(KaonProtonIDnoSVD); | |||
| 1068 | variables->add(KaonElectronIDALL); | |||
| 1069 | variables->add(KaonElectronIDSVDonly); | |||
| 1070 | variables->add(KaonElectronIDnoSVD); | |||
| 1071 | ||||
| 1072 | variables->add(KaonElectronLLSVDonly); | |||
| 1073 | variables->add(KaonPionLLSVDonly); | |||
| 1074 | variables->add(KaonKaonLLSVDonly); | |||
| 1075 | variables->add(KaonProtonLLSVDonly); | |||
| 1076 | ||||
| 1077 | variables->add(KaonBinaryKaonPionIDALL); | |||
| 1078 | variables->add(KaonBinaryKaonPionIDSVDonly); | |||
| 1079 | variables->add(KaonBinaryKaonPionIDnoSVD); | |||
| 1080 | variables->add(KaonBinaryPionKaonIDALL); | |||
| 1081 | variables->add(KaonBinaryPionKaonIDSVDonly); | |||
| 1082 | variables->add(KaonBinaryPionKaonIDnoSVD); | |||
| 1083 | variables->add(KaonBinaryProtonKaonIDALL); | |||
| 1084 | variables->add(KaonBinaryProtonKaonIDSVDonly); | |||
| 1085 | variables->add(KaonBinaryProtonKaonIDnoSVD); | |||
| 1086 | variables->add(KaonBinaryElectronKaonIDALL); | |||
| 1087 | variables->add(KaonBinaryElectronKaonIDSVDonly); | |||
| 1088 | variables->add(KaonBinaryElectronKaonIDnoSVD); | |||
| 1089 | ||||
| 1090 | variables->add(PionDPionIDALL); | |||
| 1091 | variables->add(PionDPionIDSVDonly); | |||
| 1092 | variables->add(PionDPionIDnoSVD); | |||
| 1093 | variables->add(PionDKaonIDALL); | |||
| 1094 | variables->add(PionDKaonIDSVDonly); | |||
| 1095 | variables->add(PionDKaonIDnoSVD); | |||
| 1096 | variables->add(PionDElectronIDALL); | |||
| 1097 | variables->add(PionDElectronIDSVDonly); | |||
| 1098 | variables->add(PionDElectronIDnoSVD); | |||
| 1099 | variables->add(PionDProtonIDALL); | |||
| 1100 | variables->add(PionDProtonIDSVDonly); | |||
| 1101 | variables->add(PionDProtonIDnoSVD); | |||
| 1102 | ||||
| 1103 | variables->add(PionDElectronLLSVDonly); | |||
| 1104 | variables->add(PionDPionLLSVDonly); | |||
| 1105 | variables->add(PionDKaonLLSVDonly); | |||
| 1106 | variables->add(PionDProtonLLSVDonly); | |||
| 1107 | ||||
| 1108 | variables->add(PionDBinaryPionKaonIDALL); | |||
| 1109 | variables->add(PionDBinaryPionKaonIDSVDonly); | |||
| 1110 | variables->add(PionDBinaryPionKaonIDnoSVD); | |||
| 1111 | variables->add(PionDBinaryKaonPionIDALL); | |||
| 1112 | variables->add(PionDBinaryKaonPionIDSVDonly); | |||
| 1113 | variables->add(PionDBinaryKaonPionIDnoSVD); | |||
| 1114 | variables->add(PionDBinaryProtonPionIDALL); | |||
| 1115 | variables->add(PionDBinaryProtonPionIDSVDonly); | |||
| 1116 | variables->add(PionDBinaryProtonPionIDnoSVD); | |||
| 1117 | variables->add(PionDBinaryElectronPionIDALL); | |||
| 1118 | variables->add(PionDBinaryElectronPionIDSVDonly); | |||
| 1119 | variables->add(PionDBinaryElectronPionIDnoSVD); | |||
| 1120 | ||||
| 1121 | variables->add(SlowPionPionIDALL); | |||
| 1122 | variables->add(SlowPionPionIDSVDonly); | |||
| 1123 | variables->add(SlowPionPionIDnoSVD); | |||
| 1124 | variables->add(SlowPionKaonIDALL); | |||
| 1125 | variables->add(SlowPionKaonIDSVDonly); | |||
| 1126 | variables->add(SlowPionKaonIDnoSVD); | |||
| 1127 | variables->add(SlowPionElectronIDALL); | |||
| 1128 | variables->add(SlowPionElectronIDSVDonly); | |||
| 1129 | variables->add(SlowPionElectronIDnoSVD); | |||
| 1130 | variables->add(SlowPionProtonIDALL); | |||
| 1131 | variables->add(SlowPionProtonIDSVDonly); | |||
| 1132 | variables->add(SlowPionProtonIDnoSVD); | |||
| 1133 | ||||
| 1134 | variables->add(SlowPionElectronLLSVDonly); | |||
| 1135 | variables->add(SlowPionPionLLSVDonly); | |||
| 1136 | variables->add(SlowPionKaonLLSVDonly); | |||
| 1137 | variables->add(SlowPionProtonLLSVDonly); | |||
| 1138 | ||||
| 1139 | variables->add(SlowPionBinaryPionKaonIDALL); | |||
| 1140 | variables->add(SlowPionBinaryPionKaonIDSVDonly); | |||
| 1141 | variables->add(SlowPionBinaryPionKaonIDnoSVD); | |||
| 1142 | variables->add(SlowPionBinaryKaonPionIDALL); | |||
| 1143 | variables->add(SlowPionBinaryKaonPionIDSVDonly); | |||
| 1144 | variables->add(SlowPionBinaryKaonPionIDnoSVD); | |||
| 1145 | variables->add(SlowPionBinaryProtonPionIDALL); | |||
| 1146 | variables->add(SlowPionBinaryProtonPionIDSVDonly); | |||
| 1147 | variables->add(SlowPionBinaryProtonPionIDnoSVD); | |||
| 1148 | variables->add(SlowPionBinaryElectronPionIDALL); | |||
| 1149 | variables->add(SlowPionBinaryElectronPionIDSVDonly); | |||
| 1150 | variables->add(SlowPionBinaryElectronPionIDnoSVD); | |||
| 1151 | ||||
| 1152 | RooDataSet* DstarDataset = new RooDataSet("DstarDataset", "DstarDataset", *variables, Import(*preselTree)); | |||
| 1153 | ||||
| 1154 | if (DstarDataset->sumEntries() == 0) { | |||
| 1155 | B2FATAL("The Dstar dataset is empty, stopping here")do { { LogVariableStream varStream; varStream << "The Dstar dataset is empty, stopping here" ; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage (Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__ , "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 1155, 0 )); }; exit(1); } while(false); | |||
| 1156 | } | |||
| 1157 | ||||
| 1158 | RooPlot* DstarFitFrame = DstarDataset->plotOn(deltaM.frame()); | |||
| 1159 | ||||
| 1160 | RooRealVar GaussMean("GaussMean", "GaussMean", 0.145, 0.140, 0.150); | |||
| 1161 | RooRealVar GaussSigma1("GaussSigma1", "GaussSigma1", 0.01, 1.e-4, 1.0); | |||
| 1162 | RooGaussian DstarGauss1("DstarGauss1", "DstarGauss1", deltaM, GaussMean, GaussSigma1); | |||
| 1163 | RooRealVar GaussSigma2("GaussSigma2", "GaussSigma2", 0.001, 1.e-4, 1.0); | |||
| 1164 | RooGaussian DstarGauss2("DstarGauss2", "DstarGauss2", deltaM, GaussMean, GaussSigma2); | |||
| 1165 | RooRealVar fracGaussYield("fracGaussYield", "Fraction of two Gaussians", 0.75, 0.0, 1.0); | |||
| 1166 | RooAddPdf DstarSignalPDF("DstarSignalPDF", "DstarGauss1+DstarGauss2", RooArgList(DstarGauss1, DstarGauss2), fracGaussYield); | |||
| 1167 | ||||
| 1168 | RooRealVar dm0Bkg("dm0Bkg", "dm0", 0.13957018, 0.130, 0.140); | |||
| 1169 | RooRealVar aBkg("aBkg", "a", -0.0784, -0.08, 3.0); | |||
| 1170 | RooRealVar bBkg("bBkg", "b", -0.444713, -0.5, 0.4); | |||
| 1171 | RooRealVar cBkg("cBkg", "c", 0.3); | |||
| 1172 | RooDstD0BG DstarBkgPDF("DstarBkgPDF", "DstarBkgPDF", deltaM, dm0Bkg, cBkg, aBkg, bBkg); | |||
| 1173 | RooRealVar nSignalDstar("nSignalDstar", "signal yield", 0.5 * preselTree->GetEntries(), 0, preselTree->GetEntries()); | |||
| 1174 | RooRealVar nBkgDstar("nBkgDstar", "background yield", 0.5 * preselTree->GetEntries(), 0, preselTree->GetEntries()); | |||
| 1175 | RooAddPdf totalPDFDstar("totalPDFDstar", "totalPDFDstar pdf", RooArgList(DstarSignalPDF, DstarBkgPDF), | |||
| 1176 | RooArgList(nSignalDstar, nBkgDstar)); | |||
| 1177 | ||||
| 1178 | B2INFO("Dstar: Start fitting...")do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream ; varStream << "Dstar: Start fitting..."; Belle2::LogSystem ::Instance().sendMessage(Belle2::LogMessage(Belle2::LogConfig ::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc" , 1178, 0)); }; } } while(false); | |||
| 1179 | RooFitResult* DstarFitResult = totalPDFDstar.fitTo(*DstarDataset, Save(kTRUE), PrintLevel(-1)); | |||
| 1180 | ||||
| 1181 | int status = DstarFitResult->status(); | |||
| 1182 | int covqual = DstarFitResult->covQual(); | |||
| 1183 | double diff = nSignalDstar.getValV() + nBkgDstar.getValV() - DstarDataset->sumEntries(); | |||
| 1184 | ||||
| 1185 | B2INFO("Dstar: Fit status: " << status << "; covariance quality: " << covqual)do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream ; varStream << "Dstar: Fit status: " << status << "; covariance quality: " << covqual; Belle2::LogSystem ::Instance().sendMessage(Belle2::LogMessage(Belle2::LogConfig ::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc" , 1185, 0)); }; } } while(false); | |||
| 1186 | // if the fit is not healthy, try again once before giving up, with a slightly different setup: | |||
| 1187 | if ((status > 0) || (TMath::Abs(diff) > 1.) || (nSignalDstar.getError() < sqrt(nSignalDstar.getValV())) | |||
| 1188 | || (nSignalDstar.getError() > (nSignalDstar.getValV()))) { | |||
| 1189 | ||||
| 1190 | DstarFitResult = totalPDFDstar.fitTo(*DstarDataset, Save(), Strategy(2), Offset(1)); | |||
| 1191 | status = DstarFitResult->status(); | |||
| 1192 | covqual = DstarFitResult->covQual(); | |||
| 1193 | diff = nSignalDstar.getValV() + nBkgDstar.getValV() - DstarDataset->sumEntries(); | |||
| 1194 | } | |||
| 1195 | ||||
| 1196 | if ((status > 0) || (TMath::Abs(diff) > 1.) || (nSignalDstar.getError() < sqrt(nSignalDstar.getValV())) | |||
| 1197 | || (nSignalDstar.getError() > (nSignalDstar.getValV()))) { | |||
| 1198 | B2WARNING("Dstar: Fit problem: fit status " << status << "; sum of component yields minus the dataset yield is " << diff <<do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Warning, 0, "svd")) { { LogVariableStream varStream ; varStream << "Dstar: Fit problem: fit status " << status << "; sum of component yields minus the dataset yield is " << diff << "; signal yield is " << nSignalDstar .getValV() << ", while its uncertainty is " << nSignalDstar .getError(); Belle2::LogSystem::Instance().sendMessage(Belle2 ::LogMessage(Belle2::LogConfig::c_Warning, std::move(varStream ), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc" , 1199, 0)); }; } } while(false) | |||
| 1199 | "; signal yield is " << nSignalDstar.getValV() << ", while its uncertainty is " << nSignalDstar.getError())do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Warning, 0, "svd")) { { LogVariableStream varStream ; varStream << "Dstar: Fit problem: fit status " << status << "; sum of component yields minus the dataset yield is " << diff << "; signal yield is " << nSignalDstar .getValV() << ", while its uncertainty is " << nSignalDstar .getError(); Belle2::LogSystem::Instance().sendMessage(Belle2 ::LogMessage(Belle2::LogConfig::c_Warning, std::move(varStream ), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc" , 1199, 0)); }; } } while(false); | |||
| 1200 | } | |||
| 1201 | if (covqual < 2) { | |||
| 1202 | B2INFO("Dstar: Fit warning: covariance quality " << covqual)do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream ; varStream << "Dstar: Fit warning: covariance quality " << covqual; Belle2::LogSystem::Instance().sendMessage( Belle2::LogMessage(Belle2::LogConfig::c_Info, std::move(varStream ), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc" , 1202, 0)); }; } } while(false); | |||
| 1203 | } | |||
| 1204 | ||||
| 1205 | totalPDFDstar.plotOn(DstarFitFrame, LineColor(TColor::GetColor("#4575b4"))); | |||
| 1206 | ||||
| 1207 | double chisquare = DstarFitFrame->chiSquare(); | |||
| 1208 | B2INFO("Dstar: Fit chi2 = " << chisquare)do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream ; varStream << "Dstar: Fit chi2 = " << chisquare; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage (Belle2::LogConfig::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__ , "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 1208, 0 )); }; } } while(false); | |||
| 1209 | totalPDFDstar.paramOn(DstarFitFrame, Layout(0.63, 0.96, 0.93), Format("NEU", AutoPrecision(2))); | |||
| 1210 | DstarFitFrame->getAttText()->SetTextSize(0.03); | |||
| 1211 | ||||
| 1212 | totalPDFDstar.plotOn(DstarFitFrame, Components("DstarSignalPDF"), LineColor(TColor::GetColor("#d73027"))); | |||
| 1213 | totalPDFDstar.plotOn(DstarFitFrame, Components("DstarBkgPDF"), LineColor(TColor::GetColor("#fc8d59"))); | |||
| 1214 | totalPDFDstar.plotOn(DstarFitFrame, LineColor(TColor::GetColor("#4575b4"))); | |||
| 1215 | ||||
| 1216 | DstarFitFrame->GetXaxis()->SetTitle("#Deltam [GeV/c^{2}]"); | |||
| 1217 | TCanvas* canvDstar = new TCanvas("canvDstar", "canvDstar"); | |||
| 1218 | canvDstar->cd(); | |||
| 1219 | ||||
| 1220 | DstarFitFrame->Draw(); | |||
| 1221 | ||||
| 1222 | if (m_isMakePlots) { | |||
| 1223 | canvDstar->Print("SVDdEdxValidationFitDstar.pdf"); | |||
| 1224 | TFile DstarFitPlotFile("SVDdEdxValidationDstarFitPlotFile.root", "RECREATE"); | |||
| 1225 | canvDstar->Write(); | |||
| 1226 | DstarFitPlotFile.Close(); | |||
| 1227 | } | |||
| 1228 | ||||
| 1229 | /////////////////// SPlot /////////////////////////////////////////////////////////// | |||
| 1230 | ||||
| 1231 | RooStats::SPlot* sPlotDatasetDstar = new RooStats::SPlot("sData", "An SPlot", *DstarDataset, &totalPDFDstar, | |||
| 1232 | RooArgList(nSignalDstar, nBkgDstar)); | |||
| 1233 | ||||
| 1234 | for (int iEvt = 0; iEvt < 5; iEvt++) { | |||
| 1235 | if (TMath::Abs(sPlotDatasetDstar->GetSWeight(iEvt, "nSignalDstar") + sPlotDatasetDstar->GetSWeight(iEvt, "nBkgDstar") - 1) > 5.e-3) | |||
| 1236 | B2FATAL("Dstar: sPlot error: sum of weights not equal to 1")do { { LogVariableStream varStream; varStream << "Dstar: sPlot error: sum of weights not equal to 1" ; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage (Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__ , "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 1236, 0 )); }; exit(1); } while(false); | |||
| 1237 | } | |||
| 1238 | ||||
| 1239 | TTree* treeDstar_sw = DstarDataset->GetClonedTree(); | |||
| 1240 | treeDstar_sw->SetName("treeDstar_sw"); | |||
| 1241 | ||||
| 1242 | B2INFO("Dstar: sPlot done. ")do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2:: LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream ; varStream << "Dstar: sPlot done. "; Belle2::LogSystem ::Instance().sendMessage(Belle2::LogMessage(Belle2::LogConfig ::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc" , 1242, 0)); }; } } while(false); | |||
| 1243 | ||||
| 1244 | return treeDstar_sw; | |||
| 1245 | } |
| 1 | // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017 | |||
| 2 | ||||
| 3 | /************************************************************************* | |||
| 4 | * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. * | |||
| 5 | * All rights reserved. * | |||
| 6 | * * | |||
| 7 | * For the licensing terms see $ROOTSYS/LICENSE. * | |||
| 8 | * For the list of contributors see $ROOTSYS/README/CREDITS. * | |||
| 9 | *************************************************************************/ | |||
| 10 | ||||
| 11 | #ifndef ROOT_RDF_TINTERFACE | |||
| 12 | #define ROOT_RDF_TINTERFACE | |||
| 13 | ||||
| 14 | #include "ROOT/RDataSource.hxx" | |||
| 15 | #include "ROOT/RDF/ActionHelpers.hxx" | |||
| 16 | #include "ROOT/RDF/HistoModels.hxx" | |||
| 17 | #include "ROOT/RDF/InterfaceUtils.hxx" | |||
| 18 | #include "ROOT/RDF/RColumnRegister.hxx" | |||
| 19 | #include "ROOT/RDF/RDefaultValueFor.hxx" | |||
| 20 | #include "ROOT/RDF/RDefine.hxx" | |||
| 21 | #include "ROOT/RDF/RDefinePerSample.hxx" | |||
| 22 | #include "ROOT/RDF/RFilter.hxx" | |||
| 23 | #include "ROOT/RDF/RInterfaceBase.hxx" | |||
| 24 | #include "ROOT/RDF/RVariation.hxx" | |||
| 25 | #include "ROOT/RDF/RLazyDSImpl.hxx" | |||
| 26 | #include "ROOT/RDF/RLoopManager.hxx" | |||
| 27 | #include "ROOT/RDF/RRange.hxx" | |||
| 28 | #include "ROOT/RDF/RFilterWithMissingValues.hxx" | |||
| 29 | #include "ROOT/RDF/Utils.hxx" | |||
| 30 | #include "ROOT/RDF/RDFDescription.hxx" | |||
| 31 | #include "ROOT/RDF/RVariationsDescription.hxx" | |||
| 32 | #include "ROOT/RResultPtr.hxx" | |||
| 33 | #include "ROOT/RSnapshotOptions.hxx" | |||
| 34 | #include <string_view> | |||
| 35 | #include "ROOT/RVec.hxx" | |||
| 36 | #include "ROOT/TypeTraits.hxx" | |||
| 37 | #include "RtypesCore.h" // for ULong64_t | |||
| 38 | #include "TDirectory.h" | |||
| 39 | #include "TH1.h" // For Histo actions | |||
| 40 | #include "TH2.h" // For Histo actions | |||
| 41 | #include "TH3.h" // For Histo actions | |||
| 42 | #include "THn.h" | |||
| 43 | #include "TProfile.h" | |||
| 44 | #include "TProfile2D.h" | |||
| 45 | #include "TStatistic.h" | |||
| 46 | ||||
| 47 | #include <algorithm> | |||
| 48 | #include <cstddef> | |||
| 49 | #include <initializer_list> | |||
| 50 | #include <iterator> // std::back_insterter | |||
| 51 | #include <limits> | |||
| 52 | #include <memory> | |||
| 53 | #include <set> | |||
| 54 | #include <sstream> | |||
| 55 | #include <stdexcept> | |||
| 56 | #include <string> | |||
| 57 | #include <type_traits> // is_same, enable_if | |||
| 58 | #include <typeinfo> | |||
| 59 | #include <unordered_set> | |||
| 60 | #include <utility> // std::index_sequence | |||
| 61 | #include <vector> | |||
| 62 | #include <any> | |||
| 63 | ||||
| 64 | class TGraph; | |||
| 65 | ||||
| 66 | // Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface | |||
| 67 | namespace ROOT { | |||
| 68 | void DisableImplicitMT(); | |||
| 69 | bool IsImplicitMTEnabled(); | |||
| 70 | void EnableImplicitMT(UInt_t numthreads); | |||
| 71 | class RDataFrame; | |||
| 72 | } // namespace ROOT | |||
| 73 | namespace cling { | |||
| 74 | std::string printValue(ROOT::RDataFrame *tdf); | |||
| 75 | } | |||
| 76 | ||||
| 77 | namespace ROOT { | |||
| 78 | namespace RDF { | |||
| 79 | namespace RDFDetail = ROOT::Detail::RDF; | |||
| 80 | namespace RDFInternal = ROOT::Internal::RDF; | |||
| 81 | namespace TTraits = ROOT::TypeTraits; | |||
| 82 | ||||
| 83 | template <typename Proxied, typename DataSource> | |||
| 84 | class RInterface; | |||
| 85 | ||||
| 86 | using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>; | |||
| 87 | } // namespace RDF | |||
| 88 | ||||
| 89 | namespace Internal { | |||
| 90 | namespace RDF { | |||
| 91 | class GraphCreatorHelper; | |||
| 92 | void ChangeEmptyEntryRange(const ROOT::RDF::RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange); | |||
| 93 | void ChangeBeginAndEndEntries(const RNode &node, Long64_t begin, Long64_t end); | |||
| 94 | void ChangeSpec(const ROOT::RDF::RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec); | |||
| 95 | void TriggerRun(ROOT::RDF::RNode node); | |||
| 96 | std::string GetDataSourceLabel(const ROOT::RDF::RNode &node); | |||
| 97 | void SetTTreeLifeline(ROOT::RDF::RNode &node, std::any lifeline); | |||
| 98 | } // namespace RDF | |||
| 99 | } // namespace Internal | |||
| 100 | ||||
| 101 | namespace RDF { | |||
| 102 | ||||
| 103 | // clang-format off | |||
| 104 | /** | |||
| 105 | * \class ROOT::RDF::RInterface | |||
| 106 | * \ingroup dataframe | |||
| 107 | * \brief The public interface to the RDataFrame federation of classes. | |||
| 108 | * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually. | |||
| 109 | * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default. | |||
| 110 | * | |||
| 111 | * The documentation of each method features a one liner illustrating how to use the method, for example showing how | |||
| 112 | * the majority of the template parameters are automatically deduced requiring no or very little effort by the user. | |||
| 113 | */ | |||
| 114 | // clang-format on | |||
| 115 | template <typename Proxied, typename DataSource = void> | |||
| 116 | class RInterface : public RInterfaceBase { | |||
| 117 | using DS_t = DataSource; | |||
| 118 | using RFilterBase = RDFDetail::RFilterBase; | |||
| 119 | using RRangeBase = RDFDetail::RRangeBase; | |||
| 120 | using RLoopManager = RDFDetail::RLoopManager; | |||
| 121 | friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt | |||
| 122 | friend class RDFInternal::GraphDrawing::GraphCreatorHelper; | |||
| 123 | ||||
| 124 | template <typename T, typename W> | |||
| 125 | friend class RInterface; | |||
| 126 | ||||
| 127 | friend void RDFInternal::TriggerRun(RNode node); | |||
| 128 | friend void RDFInternal::ChangeEmptyEntryRange(const RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange); | |||
| 129 | friend void RDFInternal::ChangeBeginAndEndEntries(const RNode &node, Long64_t start, Long64_t end); | |||
| 130 | friend void RDFInternal::ChangeSpec(const RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec); | |||
| 131 | friend std::string ROOT::Internal::RDF::GetDataSourceLabel(const RNode &node); | |||
| 132 | friend void ROOT::Internal::RDF::SetTTreeLifeline(ROOT::RDF::RNode &node, std::any lifeline); | |||
| 133 | std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface. | |||
| 134 | ||||
| 135 | public: | |||
| 136 | //////////////////////////////////////////////////////////////////////////// | |||
| 137 | /// \brief Copy-assignment operator for RInterface. | |||
| 138 | RInterface &operator=(const RInterface &) = default; | |||
| 139 | ||||
| 140 | //////////////////////////////////////////////////////////////////////////// | |||
| 141 | /// \brief Copy-ctor for RInterface. | |||
| 142 | RInterface(const RInterface &) = default; | |||
| 143 | ||||
| 144 | //////////////////////////////////////////////////////////////////////////// | |||
| 145 | /// \brief Move-ctor for RInterface. | |||
| 146 | RInterface(RInterface &&) = default; | |||
| 147 | ||||
| 148 | //////////////////////////////////////////////////////////////////////////// | |||
| 149 | /// \brief Move-assignment operator for RInterface. | |||
| 150 | RInterface &operator=(RInterface &&) = default; | |||
| 151 | ||||
| 152 | //////////////////////////////////////////////////////////////////////////// | |||
| 153 | /// \brief Build a RInterface from a RLoopManager. | |||
| 154 | /// This constructor is only available for RInterface<RLoopManager>. | |||
| 155 | template <typename T = Proxied, typename = std::enable_if_t<std::is_same<T, RLoopManager>::value, int>> | |||
| 156 | RInterface(const std::shared_ptr<RLoopManager> &proxied) : RInterfaceBase(proxied), fProxiedPtr(proxied) | |||
| 157 | { | |||
| 158 | } | |||
| 159 | ||||
| 160 | //////////////////////////////////////////////////////////////////////////// | |||
| 161 | /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode. | |||
| 162 | /// Different RDataFrame methods return different C++ types. All nodes, however, | |||
| 163 | /// can be cast to this common type at the cost of a small performance penalty. | |||
| 164 | /// This allows, for example, storing RDataFrame nodes in a vector, or passing them | |||
| 165 | /// around via (non-template, C++11) helper functions. | |||
| 166 | /// Example usage: | |||
| 167 | /// ~~~{.cpp} | |||
| 168 | /// // a function that conditionally adds a Range to a RDataFrame node. | |||
| 169 | /// RNode MaybeAddRange(RNode df, bool mustAddRange) | |||
| 170 | /// { | |||
| 171 | /// return mustAddRange ? df.Range(1) : df; | |||
| 172 | /// } | |||
| 173 | /// // use as : | |||
| 174 | /// ROOT::RDataFrame df(10); | |||
| 175 | /// auto maybeRanged = MaybeAddRange(df, true); | |||
| 176 | /// ~~~ | |||
| 177 | /// Note that it is not a problem to pass RNode's by value. | |||
| 178 | operator RNode() const | |||
| 179 | { | |||
| 180 | return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fColRegister); | |||
| 181 | } | |||
| 182 | ||||
| 183 | //////////////////////////////////////////////////////////////////////////// | |||
| 184 | /// \brief Append a filter to the call graph. | |||
| 185 | /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool` | |||
| 186 | /// signalling whether the event has passed the selection (true) or not (false). | |||
| 187 | /// \param[in] columns Names of the columns/branches in input to the filter function. | |||
| 188 | /// \param[in] name Optional name of this filter. See `Report`. | |||
| 189 | /// \return the filter node of the computation graph. | |||
| 190 | /// | |||
| 191 | /// Append a filter node at the point of the call graph corresponding to the | |||
| 192 | /// object this method is called on. | |||
| 193 | /// The callable `f` should not have side-effects (e.g. modification of an | |||
| 194 | /// external or static variable) to ensure correct results when implicit | |||
| 195 | /// multi-threading is active. | |||
| 196 | /// | |||
| 197 | /// RDataFrame only evaluates filters when necessary: if multiple filters | |||
| 198 | /// are chained one after another, they are executed in order and the first | |||
| 199 | /// one returning false causes the event to be discarded. | |||
| 200 | /// Even if multiple actions or transformations depend on the same filter, | |||
| 201 | /// it is executed once per entry. If its result is requested more than | |||
| 202 | /// once, the cached result is served. | |||
| 203 | /// | |||
| 204 | /// ### Example usage: | |||
| 205 | /// ~~~{.cpp} | |||
| 206 | /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y" | |||
| 207 | /// auto filtered = df.Filter(myCut, {"x", "y"}); | |||
| 208 | /// | |||
| 209 | /// // String: it must contain valid C++ except that column names can be used instead of variable names | |||
| 210 | /// auto filtered = df.Filter("x*y > 0"); | |||
| 211 | /// ~~~ | |||
| 212 | /// | |||
| 213 | /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested | |||
| 214 | /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work: | |||
| 215 | /// ~~~{.cpp} | |||
| 216 | /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))") | |||
| 217 | /// ~~~ | |||
| 218 | /// but instead this will: | |||
| 219 | /// ~~~{.cpp} | |||
| 220 | /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))") | |||
| 221 | /// ~~~ | |||
| 222 | template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0> | |||
| 223 | RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> | |||
| 224 | Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "") | |||
| 225 | { | |||
| 226 | RDFInternal::CheckFilter(f); | |||
| 227 | using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types; | |||
| 228 | constexpr auto nColumns = ColTypes_t::list_size; | |||
| 229 | const auto validColumnNames = GetValidatedColumnNames(nColumns, columns); | |||
| 230 | CheckAndFillDSColumns(validColumnNames, ColTypes_t()); | |||
| 231 | ||||
| 232 | using F_t = RDFDetail::RFilter<F, Proxied>; | |||
| 233 | ||||
| 234 | auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, fColRegister, name); | |||
| 235 | return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister); | |||
| 236 | } | |||
| 237 | ||||
| 238 | //////////////////////////////////////////////////////////////////////////// | |||
| 239 | /// \brief Append a filter to the call graph. | |||
| 240 | /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool` | |||
| 241 | /// signalling whether the event has passed the selection (true) or not (false). | |||
| 242 | /// \param[in] name Optional name of this filter. See `Report`. | |||
| 243 | /// \return the filter node of the computation graph. | |||
| 244 | /// | |||
| 245 | /// Refer to the first overload of this method for the full documentation. | |||
| 246 | template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0> | |||
| 247 | RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, std::string_view name) | |||
| 248 | { | |||
| 249 | // The sfinae is there in order to pick up the overloaded method which accepts two strings | |||
| 250 | // rather than this template method. | |||
| 251 | return Filter(f, {}, name); | |||
| 252 | } | |||
| 253 | ||||
| 254 | //////////////////////////////////////////////////////////////////////////// | |||
| 255 | /// \brief Append a filter to the call graph. | |||
| 256 | /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool` | |||
| 257 | /// signalling whether the event has passed the selection (true) or not (false). | |||
| 258 | /// \param[in] columns Names of the columns/branches in input to the filter function. | |||
| 259 | /// \return the filter node of the computation graph. | |||
| 260 | /// | |||
| 261 | /// Refer to the first overload of this method for the full documentation. | |||
| 262 | template <typename F> | |||
| 263 | RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns) | |||
| 264 | { | |||
| 265 | return Filter(f, ColumnNames_t{columns}); | |||
| 266 | } | |||
| 267 | ||||
| 268 | //////////////////////////////////////////////////////////////////////////// | |||
| 269 | /// \brief Append a filter to the call graph. | |||
| 270 | /// \param[in] expression The filter expression in C++ | |||
| 271 | /// \param[in] name Optional name of this filter. See `Report`. | |||
| 272 | /// \return the filter node of the computation graph. | |||
| 273 | /// | |||
| 274 | /// The expression is just-in-time compiled and used to filter entries. It must | |||
| 275 | /// be valid C++ syntax in which variable names are substituted with the names | |||
| 276 | /// of branches/columns. | |||
| 277 | /// | |||
| 278 | /// ### Example usage: | |||
| 279 | /// ~~~{.cpp} | |||
| 280 | /// auto filtered_df = df.Filter("myCollection.size() > 3"); | |||
| 281 | /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size"); | |||
| 282 | /// ~~~ | |||
| 283 | /// | |||
| 284 | /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested | |||
| 285 | /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work: | |||
| 286 | /// ~~~{.cpp} | |||
| 287 | /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))") | |||
| 288 | /// ~~~ | |||
| 289 | /// but instead this will: | |||
| 290 | /// ~~~{.cpp} | |||
| 291 | /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))") | |||
| 292 | /// ~~~ | |||
| 293 | RInterface<RDFDetail::RJittedFilter, DS_t> Filter(std::string_view expression, std::string_view name = "") | |||
| 294 | { | |||
| 295 | // deleted by the jitted call to JitFilterHelper | |||
| 296 | auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); | |||
| 297 | using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type; | |||
| 298 | RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fColRegister); | |||
| 299 | const auto jittedFilter = | |||
| ||||
| 300 | RDFInternal::BookFilterJit(upcastNodeOnHeap, name, expression, fLoopManager->GetBranchNames(), fColRegister, | |||
| 301 | fLoopManager->GetTree(), GetDataSource()); | |||
| 302 | ||||
| 303 | return RInterface<RDFDetail::RJittedFilter, DS_t>(std::move(jittedFilter), *fLoopManager, fColRegister); | |||
| 304 | } | |||
| 305 | ||||
| 306 | //////////////////////////////////////////////////////////////////////////// | |||
| 307 | /// \brief Discard entries with missing values | |||
| 308 | /// \param[in] column Column name whose entries with missing values should be discarded | |||
| 309 | /// \return The filter node of the computation graph | |||
| 310 | /// | |||
| 311 | /// This operation is useful in case an entry of the dataset is incomplete, | |||
| 312 | /// i.e. if one or more of the columns do not have valid values. If the value | |||
| 313 | /// of the input column is missing for an entry, the entire entry will be | |||
| 314 | /// discarded from the rest of this branch of the computation graph. | |||
| 315 | /// | |||
| 316 | /// Use cases include: | |||
| 317 | /// * When processing multiple files, one or more of them is missing a column | |||
| 318 | /// * In horizontal joining with entry matching, a certain dataset has no | |||
| 319 | /// match for the current entry. | |||
| 320 | /// | |||
| 321 | /// ### Example usage: | |||
| 322 | /// | |||
| 323 | /// \code{.py} | |||
| 324 | /// # Assume a dataset with columns [idx, x] matching another dataset with | |||
| 325 | /// # columns [idx, y]. For idx == 42, the right-hand dataset has no match | |||
| 326 | /// df = ROOT.RDataFrame(dataset) | |||
| 327 | /// df_nomissing = df.FilterAvailable("idx").Define("z", "x + y") | |||
| 328 | /// colz = df_nomissing.Take[int]("z") | |||
| 329 | /// \endcode | |||
| 330 | /// | |||
| 331 | /// \code{.cpp} | |||
| 332 | /// // Assume a dataset with columns [idx, x] matching another dataset with | |||
| 333 | /// // columns [idx, y]. For idx == 42, the right-hand dataset has no match | |||
| 334 | /// ROOT::RDataFrame df{dataset}; | |||
| 335 | /// auto df_nomissing = df.FilterAvailable("idx") | |||
| 336 | /// .Define("z", [](int x, int y) { return x + y; }, {"x", "y"}); | |||
| 337 | /// auto colz = df_nomissing.Take<int>("z"); | |||
| 338 | /// \endcode | |||
| 339 | /// | |||
| 340 | /// \note See FilterMissing() if you want to keep only the entries with | |||
| 341 | /// missing values instead. | |||
| 342 | RInterface<RDFDetail::RFilterWithMissingValues<Proxied>, DS_t> FilterAvailable(std::string_view column) | |||
| 343 | { | |||
| 344 | const auto columns = ColumnNames_t{column.data()}; | |||
| 345 | // For now disable this functionality in case of an empty data source and | |||
| 346 | // the column name was not defined previously. | |||
| 347 | if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS") | |||
| 348 | throw std::runtime_error("Unknown column: \"" + std::string(column) + "\""); | |||
| 349 | using F_t = RDFDetail::RFilterWithMissingValues<Proxied>; | |||
| 350 | auto filterPtr = std::make_shared<F_t>(/*discardEntry*/ true, fProxiedPtr, fColRegister, columns); | |||
| 351 | CheckAndFillDSColumns(columns, TTraits::TypeList<void>{}); | |||
| 352 | return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister); | |||
| 353 | } | |||
| 354 | ||||
| 355 | //////////////////////////////////////////////////////////////////////////// | |||
| 356 | /// \brief Keep only the entries that have missing values. | |||
| 357 | /// \param[in] column Column name whose entries with missing values should be kept | |||
| 358 | /// \return The filter node of the computation graph | |||
| 359 | /// | |||
| 360 | /// This operation is useful in case an entry of the dataset is incomplete, | |||
| 361 | /// i.e. if one or more of the columns do not have valid values. It only | |||
| 362 | /// keeps the entries for which the value of the input column is missing. | |||
| 363 | /// | |||
| 364 | /// Use cases include: | |||
| 365 | /// * When processing multiple files, one or more of them is missing a column | |||
| 366 | /// * In horizontal joining with entry matching, a certain dataset has no | |||
| 367 | /// match for the current entry. | |||
| 368 | /// | |||
| 369 | /// ### Example usage: | |||
| 370 | /// | |||
| 371 | /// \code{.py} | |||
| 372 | /// # Assume a dataset made of two files vertically chained together, one has | |||
| 373 | /// # column "x" and the other has column "y" | |||
| 374 | /// df = ROOT.RDataFrame(dataset) | |||
| 375 | /// df_valid_col_x = df.FilterMissing("y") | |||
| 376 | /// df_valid_col_y = df.FilterMissing("x") | |||
| 377 | /// display_x = df_valid_col_x.Display(("x",)) | |||
| 378 | /// display_y = df_valid_col_y.Display(("y",)) | |||
| 379 | /// \endcode | |||
| 380 | /// | |||
| 381 | /// \code{.cpp} | |||
| 382 | /// // Assume a dataset made of two files vertically chained together, one has | |||
| 383 | /// // column "x" and the other has column "y" | |||
| 384 | /// ROOT.RDataFrame df{dataset}; | |||
| 385 | /// auto df_valid_col_x = df.FilterMissing("y"); | |||
| 386 | /// auto df_valid_col_y = df.FilterMissing("x"); | |||
| 387 | /// auto display_x = df_valid_col_x.Display<int>({"x"}); | |||
| 388 | /// auto display_y = df_valid_col_y.Display<int>({"y"}); | |||
| 389 | /// \endcode | |||
| 390 | /// | |||
| 391 | /// \note See FilterAvailable() if you want to discard the entries in case | |||
| 392 | /// there is a missing value instead. | |||
| 393 | RInterface<RDFDetail::RFilterWithMissingValues<Proxied>, DS_t> FilterMissing(std::string_view column) | |||
| 394 | { | |||
| 395 | const auto columns = ColumnNames_t{column.data()}; | |||
| 396 | // For now disable this functionality in case of an empty data source and | |||
| 397 | // the column name was not defined previously. | |||
| 398 | if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS") | |||
| 399 | throw std::runtime_error("Unknown column: \"" + std::string(column) + "\""); | |||
| 400 | using F_t = RDFDetail::RFilterWithMissingValues<Proxied>; | |||
| 401 | auto filterPtr = std::make_shared<F_t>(/*discardEntry*/ false, fProxiedPtr, fColRegister, columns); | |||
| 402 | CheckAndFillDSColumns(columns, TTraits::TypeList<void>{}); | |||
| 403 | return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister); | |||
| 404 | } | |||
| 405 | ||||
| 406 | // clang-format off | |||
| 407 | //////////////////////////////////////////////////////////////////////////// | |||
| 408 | /// \brief Define a new column. | |||
| 409 | /// \param[in] name The name of the defined column. | |||
| 410 | /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. | |||
| 411 | /// \param[in] columns Names of the columns/branches in input to the producer function. | |||
| 412 | /// \return the first node of the computation graph for which the new quantity is defined. | |||
| 413 | /// | |||
| 414 | /// Define a column that will be visible from all subsequent nodes | |||
| 415 | /// of the functional chain. The `expression` is only evaluated for entries that pass | |||
| 416 | /// all the preceding filters. | |||
| 417 | /// A new variable is created called `name`, accessible as if it was contained | |||
| 418 | /// in the dataset from subsequent transformations/actions. | |||
| 419 | /// | |||
| 420 | /// Use cases include: | |||
| 421 | /// * caching the results of complex calculations for easy and efficient multiple access | |||
| 422 | /// * extraction of quantities of interest from complex objects | |||
| 423 | /// | |||
| 424 | /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph. | |||
| 425 | /// | |||
| 426 | /// ### Example usage: | |||
| 427 | /// ~~~{.cpp} | |||
| 428 | /// // assuming a function with signature: | |||
| 429 | /// double myComplexCalculation(const RVec<float> &muon_pts); | |||
| 430 | /// // we can pass it directly to Define | |||
| 431 | /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"}); | |||
| 432 | /// // alternatively, we can pass the body of the function as a string, as in Filter: | |||
| 433 | /// auto df_with_define = df.Define("newColumn", "x*x + y*y"); | |||
| 434 | /// ~~~ | |||
| 435 | /// | |||
| 436 | /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested | |||
| 437 | /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work: | |||
| 438 | /// ~~~{.cpp} | |||
| 439 | /// df.Define("x2", "Map(v, [](float e) { return e*e; })") | |||
| 440 | /// ~~~ | |||
| 441 | /// but instead this will: | |||
| 442 | /// ~~~{.cpp} | |||
| 443 | /// df.Define("x2", "return Map(v, [](float e) { return e*e; })") | |||
| 444 | /// ~~~ | |||
| 445 | template <typename F, typename std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0> | |||
| 446 | RInterface<Proxied, DS_t> Define(std::string_view name, F expression, const ColumnNames_t &columns = {}) | |||
| 447 | { | |||
| 448 | return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Define"); | |||
| 449 | } | |||
| 450 | // clang-format on | |||
| 451 | ||||
| 452 | // clang-format off | |||
| 453 | //////////////////////////////////////////////////////////////////////////// | |||
| 454 | /// \brief Define a new column with a value dependent on the processing slot. | |||
| 455 | /// \param[in] name The name of the defined column. | |||
| 456 | /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. | |||
| 457 | /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number). | |||
| 458 | /// \return the first node of the computation graph for which the new quantity is defined. | |||
| 459 | /// | |||
| 460 | /// This alternative implementation of `Define` is meant as a helper to evaluate new column values in a thread-safe manner. | |||
| 461 | /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types | |||
| 462 | /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer | |||
| 463 | /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with | |||
| 464 | /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. | |||
| 465 | /// Note that there is no guarantee as to how often each slot will be reached during the event loop. | |||
| 466 | /// | |||
| 467 | /// The following two calls are equivalent, although `DefineSlot` is slightly more performant: | |||
| 468 | /// ~~~{.cpp} | |||
| 469 | /// int function(unsigned int, double, double); | |||
| 470 | /// df.Define("x", function, {"rdfslot_", "column1", "column2"}) | |||
| 471 | /// df.DefineSlot("x", function, {"column1", "column2"}) | |||
| 472 | /// ~~~ | |||
| 473 | /// | |||
| 474 | /// See Define() for more information. | |||
| 475 | template <typename F> | |||
| 476 | RInterface<Proxied, DS_t> DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {}) | |||
| 477 | { | |||
| 478 | return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "DefineSlot"); | |||
| 479 | } | |||
| 480 | // clang-format on | |||
| 481 | ||||
| 482 | // clang-format off | |||
| 483 | //////////////////////////////////////////////////////////////////////////// | |||
| 484 | /// \brief Define a new column with a value dependent on the processing slot and the current entry. | |||
| 485 | /// \param[in] name The name of the defined column. | |||
| 486 | /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. | |||
| 487 | /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry). | |||
| 488 | /// \return the first node of the computation graph for which the new quantity is defined. | |||
| 489 | /// | |||
| 490 | /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom | |||
| 491 | /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...` | |||
| 492 | /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned | |||
| 493 | /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with | |||
| 494 | /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. | |||
| 495 | /// Note that there is no guarantee as to how often each slot will be reached during the event loop. | |||
| 496 | /// The second parameter is reserved for a `ULong64_t` representing the current entry being processed by the current thread. | |||
| 497 | /// | |||
| 498 | /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant: | |||
| 499 | /// ~~~{.cpp} | |||
| 500 | /// int function(unsigned int, ULong64_t, double, double); | |||
| 501 | /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"}) | |||
| 502 | /// DefineSlotEntry("x", function, {"column1", "column2"}) | |||
| 503 | /// ~~~ | |||
| 504 | /// | |||
| 505 | /// See Define() for more information. | |||
| 506 | template <typename F> | |||
| 507 | RInterface<Proxied, DS_t> DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {}) | |||
| 508 | { | |||
| 509 | return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns, | |||
| 510 | "DefineSlotEntry"); | |||
| 511 | } | |||
| 512 | // clang-format on | |||
| 513 | ||||
| 514 | //////////////////////////////////////////////////////////////////////////// | |||
| 515 | /// \brief Define a new column. | |||
| 516 | /// \param[in] name The name of the defined column. | |||
| 517 | /// \param[in] expression An expression in C++ which represents the defined value | |||
| 518 | /// \return the first node of the computation graph for which the new quantity is defined. | |||
| 519 | /// | |||
| 520 | /// The expression is just-in-time compiled and used to produce the column entries. | |||
| 521 | /// It must be valid C++ syntax in which variable names are substituted with the names | |||
| 522 | /// of branches/columns. | |||
| 523 | /// | |||
| 524 | /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested | |||
| 525 | /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work: | |||
| 526 | /// ~~~{.cpp} | |||
| 527 | /// df.Define("x2", "Map(v, [](float e) { return e*e; })") | |||
| 528 | /// ~~~ | |||
| 529 | /// but instead this will: | |||
| 530 | /// ~~~{.cpp} | |||
| 531 | /// df.Define("x2", "return Map(v, [](float e) { return e*e; })") | |||
| 532 | /// ~~~ | |||
| 533 | /// | |||
| 534 | /// Refer to the first overload of this method for the full documentation. | |||
| 535 | RInterface<Proxied, DS_t> Define(std::string_view name, std::string_view expression) | |||
| 536 | { | |||
| 537 | constexpr auto where = "Define"; | |||
| 538 | RDFInternal::CheckValidCppVarName(name, where); | |||
| 539 | // these checks must be done before jitting lest we throw exceptions in jitted code | |||
| 540 | RDFInternal::CheckForRedefinition(where, name, fColRegister, fLoopManager->GetBranchNames(), | |||
| 541 | GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}); | |||
| 542 | ||||
| 543 | auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); | |||
| 544 | auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, GetDataSource(), fColRegister, | |||
| 545 | fLoopManager->GetBranchNames(), upcastNodeOnHeap); | |||
| 546 | ||||
| 547 | RDFInternal::RColumnRegister newCols(fColRegister); | |||
| 548 | newCols.AddDefine(std::move(jittedDefine)); | |||
| 549 | ||||
| 550 | RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); | |||
| 551 | ||||
| 552 | return newInterface; | |||
| 553 | } | |||
| 554 | ||||
| 555 | //////////////////////////////////////////////////////////////////////////// | |||
| 556 | /// \brief Overwrite the value and/or type of an existing column. | |||
| 557 | /// \param[in] name The name of the column to redefine. | |||
| 558 | /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. | |||
| 559 | /// \param[in] columns Names of the columns/branches in input to the expression. | |||
| 560 | /// \return the first node of the computation graph for which the quantity is redefined. | |||
| 561 | /// | |||
| 562 | /// The old value of the column can be used as an input for the expression. | |||
| 563 | /// | |||
| 564 | /// An exception is thrown in case the column to redefine does not already exist. | |||
| 565 | /// See Define() for more information. | |||
| 566 | template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0> | |||
| 567 | RInterface<Proxied, DS_t> Redefine(std::string_view name, F expression, const ColumnNames_t &columns = {}) | |||
| 568 | { | |||
| 569 | return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Redefine"); | |||
| 570 | } | |||
| 571 | ||||
| 572 | // clang-format off | |||
| 573 | //////////////////////////////////////////////////////////////////////////// | |||
| 574 | /// \brief Overwrite the value and/or type of an existing column. | |||
| 575 | /// \param[in] name The name of the column to redefine. | |||
| 576 | /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. | |||
| 577 | /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot). | |||
| 578 | /// \return the first node of the computation graph for which the new quantity is defined. | |||
| 579 | /// | |||
| 580 | /// The old value of the column can be used as an input for the expression. | |||
| 581 | /// An exception is thrown in case the column to redefine does not already exist. | |||
| 582 | /// | |||
| 583 | /// See DefineSlot() for more information. | |||
| 584 | // clang-format on | |||
| 585 | template <typename F> | |||
| 586 | RInterface<Proxied, DS_t> RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {}) | |||
| 587 | { | |||
| 588 | return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "RedefineSlot"); | |||
| 589 | } | |||
| 590 | ||||
| 591 | // clang-format off | |||
| 592 | //////////////////////////////////////////////////////////////////////////// | |||
| 593 | /// \brief Overwrite the value and/or type of an existing column. | |||
| 594 | /// \param[in] name The name of the column to redefine. | |||
| 595 | /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. | |||
| 596 | /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry). | |||
| 597 | /// \return the first node of the computation graph for which the new quantity is defined. | |||
| 598 | /// | |||
| 599 | /// The old value of the column can be used as an input for the expression. | |||
| 600 | /// An exception is thrown in case the column to re-define does not already exist. | |||
| 601 | /// | |||
| 602 | /// See DefineSlotEntry() for more information. | |||
| 603 | // clang-format on | |||
| 604 | template <typename F> | |||
| 605 | RInterface<Proxied, DS_t> RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {}) | |||
| 606 | { | |||
| 607 | return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns, | |||
| 608 | "RedefineSlotEntry"); | |||
| 609 | } | |||
| 610 | ||||
| 611 | //////////////////////////////////////////////////////////////////////////// | |||
| 612 | /// \brief Overwrite the value and/or type of an existing column. | |||
| 613 | /// \param[in] name The name of the column to redefine. | |||
| 614 | /// \param[in] expression An expression in C++ which represents the defined value | |||
| 615 | /// \return the first node of the computation graph for which the new quantity is defined. | |||
| 616 | /// | |||
| 617 | /// The expression is just-in-time compiled and used to produce the column entries. | |||
| 618 | /// It must be valid C++ syntax in which variable names are substituted with the names | |||
| 619 | /// of branches/columns. | |||
| 620 | /// | |||
| 621 | /// The old value of the column can be used as an input for the expression. | |||
| 622 | /// An exception is thrown in case the column to re-define does not already exist. | |||
| 623 | /// | |||
| 624 | /// Aliases cannot be overridden. See the corresponding Define() overload for more information. | |||
| 625 | RInterface<Proxied, DS_t> Redefine(std::string_view name, std::string_view expression) | |||
| 626 | { | |||
| 627 | constexpr auto where = "Redefine"; | |||
| 628 | RDFInternal::CheckValidCppVarName(name, where); | |||
| 629 | RDFInternal::CheckForDefinition(where, name, fColRegister, fLoopManager->GetBranchNames(), | |||
| 630 | GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}); | |||
| 631 | RDFInternal::CheckForNoVariations(where, name, fColRegister); | |||
| 632 | ||||
| 633 | auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); | |||
| 634 | auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, GetDataSource(), fColRegister, | |||
| 635 | fLoopManager->GetBranchNames(), upcastNodeOnHeap); | |||
| 636 | ||||
| 637 | RDFInternal::RColumnRegister newCols(fColRegister); | |||
| 638 | newCols.AddDefine(std::move(jittedDefine)); | |||
| 639 | ||||
| 640 | RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); | |||
| 641 | ||||
| 642 | return newInterface; | |||
| 643 | } | |||
| 644 | ||||
| 645 | //////////////////////////////////////////////////////////////////////////// | |||
| 646 | /// \brief In case the value in the given column is missing, provide a default value | |||
| 647 | /// \tparam T The type of the column | |||
| 648 | /// \param[in] column Column name where missing values should be replaced by the given default value | |||
| 649 | /// \param[in] defaultValue Value to provide instead of a missing value | |||
| 650 | /// \return The node of the graph that will provide a default value | |||
| 651 | /// | |||
| 652 | /// This operation is useful in case an entry of the dataset is incomplete, | |||
| 653 | /// i.e. if one or more of the columns do not have valid values. It does not | |||
| 654 | /// modify the values of the column, but in case any entry is missing, it | |||
| 655 | /// will provide the default value to downstream nodes instead. | |||
| 656 | /// | |||
| 657 | /// Use cases include: | |||
| 658 | /// * When processing multiple files, one or more of them is missing a column | |||
| 659 | /// * In horizontal joining with entry matching, a certain dataset has no | |||
| 660 | /// match for the current entry. | |||
| 661 | /// | |||
| 662 | /// ### Example usage: | |||
| 663 | /// | |||
| 664 | /// \code{.cpp} | |||
| 665 | /// // Assume a dataset with columns [idx, x] matching another dataset with | |||
| 666 | /// // columns [idx, y]. For idx == 42, the right-hand dataset has no match | |||
| 667 | /// ROOT::RDataFrame df{dataset}; | |||
| 668 | /// auto df_default = df.DefaultValueFor("y", 33) | |||
| 669 | /// .Define("z", [](int x, int y) { return x + y; }, {"x", "y"}); | |||
| 670 | /// auto colz = df_default.Take<int>("z"); | |||
| 671 | /// \endcode | |||
| 672 | /// | |||
| 673 | /// \code{.py} | |||
| 674 | /// df = ROOT.RDataFrame(dataset) | |||
| 675 | /// df_default = df.DefaultValueFor("y", 33).Define("z", "x + y") | |||
| 676 | /// colz = df_default.Take[int]("z") | |||
| 677 | /// \endcode | |||
| 678 | template <typename T> | |||
| 679 | RInterface<Proxied, DS_t> DefaultValueFor(std::string_view column, const T &defaultValue) | |||
| 680 | { | |||
| 681 | constexpr auto where{"DefaultValueFor"}; | |||
| 682 | RDFInternal::CheckForNoVariations(where, column, fColRegister); | |||
| 683 | // For now disable this functionality in case of an empty data source and | |||
| 684 | // the column name was not defined previously. | |||
| 685 | if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS") | |||
| 686 | RDFInternal::CheckForDefinition(where, column, fColRegister, fLoopManager->GetBranchNames(), | |||
| 687 | GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}); | |||
| 688 | ||||
| 689 | // Declare return type to the interpreter, for future use by jitted actions | |||
| 690 | auto retTypeName = RDFInternal::TypeID2TypeName(typeid(T)); | |||
| 691 | if (retTypeName.empty()) { | |||
| 692 | // The type is not known to the interpreter. | |||
| 693 | // We must not error out here, but if/when this column is used in jitted code | |||
| 694 | const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(T)); | |||
| 695 | retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType; | |||
| 696 | } | |||
| 697 | ||||
| 698 | const auto validColumnNames = ColumnNames_t{column.data()}; | |||
| 699 | auto newColumn = std::make_shared<ROOT::Internal::RDF::RDefaultValueFor<T>>( | |||
| 700 | column, retTypeName, defaultValue, validColumnNames, fColRegister, *fLoopManager); | |||
| 701 | CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>{}); | |||
| 702 | ||||
| 703 | RDFInternal::RColumnRegister newCols(fColRegister); | |||
| 704 | newCols.AddDefine(std::move(newColumn)); | |||
| 705 | ||||
| 706 | RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); | |||
| 707 | ||||
| 708 | return newInterface; | |||
| 709 | } | |||
| 710 | ||||
| 711 | // clang-format off | |||
| 712 | //////////////////////////////////////////////////////////////////////////// | |||
| 713 | /// \brief Define a new column that is updated when the input sample changes. | |||
| 714 | /// \param[in] name The name of the defined column. | |||
| 715 | /// \param[in] expression A C++ callable that computes the new value of the defined column. | |||
| 716 | /// \return the first node of the computation graph for which the new quantity is defined. | |||
| 717 | /// | |||
| 718 | /// The signature of the callable passed as second argument should be `T(unsigned int slot, const ROOT::RDF::RSampleInfo &id)` | |||
| 719 | /// where: | |||
| 720 | /// - `T` is the type of the defined column | |||
| 721 | /// - `slot` is a number in the range [0, nThreads) that is different for each processing thread. This can simplify | |||
| 722 | /// the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame. | |||
| 723 | /// - `id` is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is | |||
| 724 | /// being processed (see the class docs for more information). | |||
| 725 | /// | |||
| 726 | /// DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being | |||
| 727 | /// processed or to inject a callback into the event loop that is only called when the processing of a new sample | |||
| 728 | /// starts rather than at every entry. | |||
| 729 | /// | |||
| 730 | /// The callable will be invoked once per input TTree or once per multi-thread task, whichever is more often. | |||
| 731 | /// | |||
| 732 | /// ### Example usage: | |||
| 733 | /// ~~~{.cpp} | |||
| 734 | /// ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}}; | |||
| 735 | /// df.DefinePerSample("weightbysample", | |||
| 736 | /// [](unsigned int slot, const ROOT::RDF::RSampleInfo &id) | |||
| 737 | /// { return id.Contains("sample1") ? 1.0f : 2.0f; }); | |||
| 738 | /// ~~~ | |||
| 739 | // clang-format on | |||
| 740 | // TODO we could SFINAE on F's signature to provide friendlier compilation errors in case of signature mismatch | |||
| 741 | template <typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type> | |||
| 742 | RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, F expression) | |||
| 743 | { | |||
| 744 | RDFInternal::CheckValidCppVarName(name, "DefinePerSample"); | |||
| 745 | RDFInternal::CheckForRedefinition("DefinePerSample", name, fColRegister, fLoopManager->GetBranchNames(), | |||
| 746 | GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}); | |||
| 747 | ||||
| 748 | auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType_t)); | |||
| 749 | if (retTypeName.empty()) { | |||
| 750 | // The type is not known to the interpreter. | |||
| 751 | // We must not error out here, but if/when this column is used in jitted code | |||
| 752 | const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType_t)); | |||
| 753 | retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType; | |||
| 754 | } | |||
| 755 | ||||
| 756 | auto newColumn = | |||
| 757 | std::make_shared<RDFDetail::RDefinePerSample<F>>(name, retTypeName, std::move(expression), *fLoopManager); | |||
| 758 | ||||
| 759 | RDFInternal::RColumnRegister newCols(fColRegister); | |||
| 760 | newCols.AddDefine(std::move(newColumn)); | |||
| 761 | RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); | |||
| 762 | return newInterface; | |||
| 763 | } | |||
| 764 | ||||
| 765 | // clang-format off | |||
| 766 | //////////////////////////////////////////////////////////////////////////// | |||
| 767 | /// \brief Define a new column that is updated when the input sample changes. | |||
| 768 | /// \param[in] name The name of the defined column. | |||
| 769 | /// \param[in] expression A valid C++ expression as a string, which will be used to compute the defined value. | |||
| 770 | /// \return the first node of the computation graph for which the new quantity is defined. | |||
| 771 | /// | |||
| 772 | /// The expression is just-in-time compiled and used to produce the column entries. | |||
| 773 | /// It must be valid C++ syntax and the usage of the special variable names `rdfslot_` and `rdfsampleinfo_` is | |||
| 774 | /// permitted, where these variables will take the same values as the `slot` and `id` parameters described at the | |||
| 775 | /// DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information. | |||
| 776 | /// | |||
| 777 | /// ### Example usage: | |||
| 778 | /// ~~~{.py} | |||
| 779 | /// df = ROOT.RDataFrame('mytree', ['sample1.root','sample2.root']) | |||
| 780 | /// df.DefinePerSample('weightbysample', 'rdfsampleinfo_.Contains("sample1") ? 1.0f : 2.0f') | |||
| 781 | /// ~~~ | |||
| 782 | /// | |||
| 783 | /// \note | |||
| 784 | /// If you have declared some C++ function to the interpreter, the correct syntax to call that function with this | |||
| 785 | /// overload of DefinePerSample is by calling it explicitly with the special names `rdfslot_` and `rdfsampleinfo_` as | |||
| 786 | /// input parameters. This is for example the correct way to call this overload when working in PyROOT: | |||
| 787 | /// ~~~{.py} | |||
| 788 | /// ROOT.gInterpreter.Declare( | |||
| 789 | /// """ | |||
| 790 | /// float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){ | |||
| 791 | /// return id.Contains("sample1") ? 1.0f : 2.0f; | |||
| 792 | /// } | |||
| 793 | /// """) | |||
| 794 | /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"]) | |||
| 795 | /// df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)") | |||
| 796 | /// ~~~ | |||
| 797 | /// | |||
| 798 | /// \note | |||
| 799 | /// Differently from what happens in Define(), the string expression passed to DefinePerSample cannot contain | |||
| 800 | /// column names other than those mentioned above: the expression is evaluated once before the processing of the | |||
| 801 | /// sample even starts, so column values are not accessible. | |||
| 802 | // clang-format on | |||
| 803 | RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, std::string_view expression) | |||
| 804 | { | |||
| 805 | RDFInternal::CheckValidCppVarName(name, "DefinePerSample"); | |||
| 806 | // these checks must be done before jitting lest we throw exceptions in jitted code | |||
| 807 | RDFInternal::CheckForRedefinition("DefinePerSample", name, fColRegister, fLoopManager->GetBranchNames(), | |||
| 808 | GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}); | |||
| 809 | ||||
| 810 | auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); | |||
| 811 | auto jittedDefine = | |||
| 812 | RDFInternal::BookDefinePerSampleJit(name, expression, *fLoopManager, fColRegister, upcastNodeOnHeap); | |||
| 813 | ||||
| 814 | RDFInternal::RColumnRegister newCols(fColRegister); | |||
| 815 | newCols.AddDefine(std::move(jittedDefine)); | |||
| 816 | ||||
| 817 | RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); | |||
| 818 | ||||
| 819 | return newInterface; | |||
| 820 | } | |||
| 821 | ||||
| 822 | /// \brief Register systematic variations for a single existing column using custom variation tags. | |||
| 823 | /// \param[in] colName name of the column for which varied values are provided. | |||
| 824 | /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can | |||
| 825 | /// take any column values as input, similarly to what happens during Filter and Define calls. It must | |||
| 826 | /// return an RVec of varied values, one for each variation tag, in the same order as the tags. | |||
| 827 | /// \param[in] inputColumns the names of the columns to be passed to the callable. | |||
| 828 | /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`. | |||
| 829 | /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. | |||
| 830 | /// | |||
| 831 | /// Vary provides a natural and flexible syntax to define systematic variations that automatically propagate to | |||
| 832 | /// Filters, Defines and results. RDataFrame usage of columns with attached variations does not change, but for | |||
| 833 | /// results that depend on any varied quantity, a map/dictionary of varied results can be produced with | |||
| 834 | /// ROOT::RDF::Experimental::VariationsFor (see the example below). | |||
| 835 | /// | |||
| 836 | /// The dictionary will contain a "nominal" value (accessed with the "nominal" key) for the unchanged result, and | |||
| 837 | /// values for each of the systematic variations that affected the result (via upstream Filters or via direct or | |||
| 838 | /// indirect dependencies of the column values on some registered variations). The keys will be a composition of | |||
| 839 | /// variation names and tags, e.g. "pt:up" and "pt:down" for the example below. | |||
| 840 | /// | |||
| 841 | /// In the following example we add up/down variations of pt and fill a histogram with a quantity that depends on pt. | |||
| 842 | /// We automatically obtain three histograms in output ("nominal", "pt:up" and "pt:down"): | |||
| 843 | /// ~~~{.cpp} | |||
| 844 | /// auto nominal_hx = | |||
| 845 | /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, {"down", "up"}) | |||
| 846 | /// .Filter("pt > k") | |||
| 847 | /// .Define("x", someFunc, {"pt"}) | |||
| 848 | /// .Histo1D("x"); | |||
| 849 | /// | |||
| 850 | /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); | |||
| 851 | /// hx["nominal"].Draw(); | |||
| 852 | /// hx["pt:down"].Draw("SAME"); | |||
| 853 | /// hx["pt:up"].Draw("SAME"); | |||
| 854 | /// ~~~ | |||
| 855 | /// RDataFrame computes all variations as part of a single loop over the data. | |||
| 856 | /// In particular, this means that I/O and computation of values shared | |||
| 857 | /// among variations only happen once for all variations. Thus, the event loop | |||
| 858 | /// run-time typically scales much better than linearly with the number of | |||
| 859 | /// variations. | |||
| 860 | /// | |||
| 861 | /// RDataFrame lazily computes the varied values required to produce the | |||
| 862 | /// outputs of \ref ROOT::RDF::Experimental::VariationsFor "VariationsFor()". If \ref | |||
| 863 | /// ROOT::RDF::Experimental::VariationsFor "VariationsFor()" was not called for a result, the computations are only | |||
| 864 | /// run for the nominal case. | |||
| 865 | /// | |||
| 866 | /// See other overloads for examples when variations are added for multiple existing columns, | |||
| 867 | /// or when the tags are auto-generated instead of being directly defined. | |||
| 868 | template <typename F> | |||
| 869 | RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, | |||
| 870 | const std::vector<std::string> &variationTags, std::string_view variationName = "") | |||
| 871 | { | |||
| 872 | std::vector<std::string> colNames{{std::string(colName)}}; | |||
| 873 | const std::string theVariationName{variationName.empty() ? colName : variationName}; | |||
| 874 | ||||
| 875 | return VaryImpl<true>(std::move(colNames), std::forward<F>(expression), inputColumns, variationTags, | |||
| 876 | theVariationName); | |||
| 877 | } | |||
| 878 | ||||
| 879 | /// \brief Register systematic variations for a single existing column using auto-generated variation tags. | |||
| 880 | /// \param[in] colName name of the column for which varied values are provided. | |||
| 881 | /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can | |||
| 882 | /// take any column values as input, similarly to what happens during Filter and Define calls. It must | |||
| 883 | /// return an RVec of varied values, one for each variation tag, in the same order as the tags. | |||
| 884 | /// \param[in] inputColumns the names of the columns to be passed to the callable. | |||
| 885 | /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, | |||
| 886 | /// `"1"`, etc. | |||
| 887 | /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. | |||
| 888 | /// colName is used if none is provided. | |||
| 889 | /// | |||
| 890 | /// This overload of Vary takes an nVariations parameter instead of a list of tag names. | |||
| 891 | /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N` | |||
| 892 | /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`. | |||
| 893 | /// | |||
| 894 | /// Example usage: | |||
| 895 | /// ~~~{.cpp} | |||
| 896 | /// auto nominal_hx = | |||
| 897 | /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, 2) | |||
| 898 | /// .Histo1D("x"); | |||
| 899 | /// | |||
| 900 | /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); | |||
| 901 | /// hx["nominal"].Draw(); | |||
| 902 | /// hx["x:0"].Draw("SAME"); | |||
| 903 | /// hx["x:1"].Draw("SAME"); | |||
| 904 | /// ~~~ | |||
| 905 | /// | |||
| 906 | /// \note See also This Vary() overload for more information. | |||
| 907 | template <typename F> | |||
| 908 | RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, | |||
| 909 | std::size_t nVariations, std::string_view variationName = "") | |||
| 910 | { | |||
| 911 | R__ASSERT(nVariations > 0 && "Must have at least one variation.")do { if (__builtin_expect(!!(!(nVariations > 0 && "Must have at least one variation." )), 0)) ::Fatal("", kAssertMsg, "nVariations > 0 && \"Must have at least one variation.\"" , 911, "/cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/root/ROOT/RDF/RInterface.hxx" ); } while (false); | |||
| 912 | ||||
| 913 | std::vector<std::string> variationTags; | |||
| 914 | variationTags.reserve(nVariations); | |||
| 915 | for (std::size_t i = 0u; i < nVariations; ++i) | |||
| 916 | variationTags.emplace_back(std::to_string(i)); | |||
| 917 | ||||
| 918 | const std::string theVariationName{variationName.empty() ? colName : variationName}; | |||
| 919 | ||||
| 920 | return Vary(colName, std::forward<F>(expression), inputColumns, std::move(variationTags), theVariationName); | |||
| 921 | } | |||
| 922 | ||||
| 923 | /// \brief Register systematic variations for multiple existing columns using custom variation tags. | |||
| 924 | /// \param[in] colNames set of names of the columns for which varied values are provided. | |||
| 925 | /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can | |||
| 926 | /// take any column values as input, similarly to what happens during Filter and Define calls. It must | |||
| 927 | /// return an RVec of varied values, one for each variation tag, in the same order as the tags. | |||
| 928 | /// \param[in] inputColumns the names of the columns to be passed to the callable. | |||
| 929 | /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`. | |||
| 930 | /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"` | |||
| 931 | /// | |||
| 932 | /// This overload of Vary takes a list of column names as first argument and | |||
| 933 | /// requires that the expression returns an RVec of RVecs of values: one inner RVec for the variations of each | |||
| 934 | /// affected column. The `variationTags` are defined as `{"down", "up"}`. | |||
| 935 | /// | |||
| 936 | /// Example usage: | |||
| 937 | /// ~~~{.cpp} | |||
| 938 | /// // produce variations "ptAndEta:down" and "ptAndEta:up" | |||
| 939 | /// auto nominal_hx = | |||
| 940 | /// df.Vary({"pt", "eta"}, // the columns that will vary simultaneously | |||
| 941 | /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; }, | |||
| 942 | /// {"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied | |||
| 943 | /// {"down", "up"}, // variation tags | |||
| 944 | /// "ptAndEta") // variation name | |||
| 945 | /// .Histo1D("pt", "eta"); | |||
| 946 | /// | |||
| 947 | /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); | |||
| 948 | /// hx["nominal"].Draw(); | |||
| 949 | /// hx["ptAndEta:down"].Draw("SAME"); | |||
| 950 | /// hx["ptAndEta:up"].Draw("SAME"); | |||
| 951 | /// ~~~ | |||
| 952 | /// | |||
| 953 | /// \note See also This Vary() overload for more information. | |||
| 954 | ||||
| 955 | template <typename F> | |||
| 956 | RInterface<Proxied, DS_t> | |||
| 957 | Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns, | |||
| 958 | const std::vector<std::string> &variationTags, std::string_view variationName) | |||
| 959 | { | |||
| 960 | return VaryImpl<false>(colNames, std::forward<F>(expression), inputColumns, variationTags, variationName); | |||
| 961 | } | |||
| 962 | ||||
| 963 | /// \brief Register systematic variations for multiple existing columns using custom variation tags. | |||
| 964 | /// \param[in] colNames set of names of the columns for which varied values are provided. | |||
| 965 | /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can | |||
| 966 | /// take any column values as input, similarly to what happens during Filter and Define calls. It must | |||
| 967 | /// return an RVec of varied values, one for each variation tag, in the same order as the tags. | |||
| 968 | /// \param[in] inputColumns the names of the columns to be passed to the callable. | |||
| 969 | /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`. | |||
| 970 | /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. | |||
| 971 | /// colName is used if none is provided. | |||
| 972 | /// | |||
| 973 | /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list | |||
| 974 | /// is avoided. | |||
| 975 | /// | |||
| 976 | /// \note See also This Vary() overload for more information. | |||
| 977 | template <typename F> | |||
| 978 | RInterface<Proxied, DS_t> | |||
| 979 | Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns, | |||
| 980 | const std::vector<std::string> &variationTags, std::string_view variationName) | |||
| 981 | { | |||
| 982 | return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, variationTags, variationName); | |||
| 983 | } | |||
| 984 | ||||
| 985 | /// \brief Register systematic variations for multiple existing columns using auto-generated tags. | |||
| 986 | /// \param[in] colNames set of names of the columns for which varied values are provided. | |||
| 987 | /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can | |||
| 988 | /// take any column values as input, similarly to what happens during Filter and Define calls. It must | |||
| 989 | /// return an RVec of varied values, one for each variation tag, in the same order as the tags. | |||
| 990 | /// \param[in] inputColumns the names of the columns to be passed to the callable. | |||
| 991 | /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, | |||
| 992 | /// `"1"`, etc. | |||
| 993 | /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. | |||
| 994 | /// colName is used if none is provided. | |||
| 995 | /// | |||
| 996 | /// This overload of Vary takes a list of column names as first argument. | |||
| 997 | /// It takes an `nVariations` parameter instead of a list of tag names (`variationTags`). Tag names | |||
| 998 | /// will be auto-generated as the sequence 0...``nVariations-1``. | |||
| 999 | /// | |||
| 1000 | /// Example usage: | |||
| 1001 | /// ~~~{.cpp} | |||
| 1002 | /// auto nominal_hx = | |||
| 1003 | /// df.Vary({"pt", "eta"}, // the columns that will vary simultaneously | |||
| 1004 | /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; }, | |||
| 1005 | /// {"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied | |||
| 1006 | /// 2, // auto-generated variation tags | |||
| 1007 | /// "ptAndEta") // variation name | |||
| 1008 | /// .Histo1D("pt", "eta"); | |||
| 1009 | /// | |||
| 1010 | /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); | |||
| 1011 | /// hx["nominal"].Draw(); | |||
| 1012 | /// hx["ptAndEta:0"].Draw("SAME"); | |||
| 1013 | /// hx["ptAndEta:1"].Draw("SAME"); | |||
| 1014 | /// ~~~ | |||
| 1015 | /// | |||
| 1016 | /// \note See also This Vary() overload for more information. | |||
| 1017 | template <typename F> | |||
| 1018 | RInterface<Proxied, DS_t> | |||
| 1019 | Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns, | |||
| 1020 | std::size_t nVariations, std::string_view variationName) | |||
| 1021 | { | |||
| 1022 | R__ASSERT(nVariations > 0 && "Must have at least one variation.")do { if (__builtin_expect(!!(!(nVariations > 0 && "Must have at least one variation." )), 0)) ::Fatal("", kAssertMsg, "nVariations > 0 && \"Must have at least one variation.\"" , 1022, "/cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/root/ROOT/RDF/RInterface.hxx" ); } while (false); | |||
| 1023 | ||||
| 1024 | std::vector<std::string> variationTags; | |||
| 1025 | variationTags.reserve(nVariations); | |||
| 1026 | for (std::size_t i = 0u; i < nVariations; ++i) | |||
| 1027 | variationTags.emplace_back(std::to_string(i)); | |||
| 1028 | ||||
| 1029 | return Vary(colNames, std::forward<F>(expression), inputColumns, std::move(variationTags), variationName); | |||
| 1030 | } | |||
| 1031 | ||||
| 1032 | /// \brief Register systematic variations for for multiple existing columns using custom variation tags. | |||
| 1033 | /// \param[in] colNames set of names of the columns for which varied values are provided. | |||
| 1034 | /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can | |||
| 1035 | /// take any column values as input, similarly to what happens during Filter and Define calls. It must | |||
| 1036 | /// return an RVec of varied values, one for each variation tag, in the same order as the tags. | |||
| 1037 | /// \param[in] inputColumns the names of the columns to be passed to the callable. | |||
| 1038 | /// \param[in] inputColumns the names of the columns to be passed to the callable. | |||
| 1039 | /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, | |||
| 1040 | /// `"1"`, etc. | |||
| 1041 | /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. | |||
| 1042 | /// colName is used if none is provided. | |||
| 1043 | /// | |||
| 1044 | /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list | |||
| 1045 | /// is avoided. | |||
| 1046 | /// | |||
| 1047 | /// \note See also This Vary() overload for more information. | |||
| 1048 | template <typename F> | |||
| 1049 | RInterface<Proxied, DS_t> | |||
| 1050 | Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns, | |||
| 1051 | std::size_t nVariations, std::string_view variationName) | |||
| 1052 | { | |||
| 1053 | return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, nVariations, variationName); | |||
| 1054 | } | |||
| 1055 | ||||
| 1056 | /// \brief Register systematic variations for a single existing column using custom variation tags. | |||
| 1057 | /// \param[in] colName name of the column for which varied values are provided. | |||
| 1058 | /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied | |||
| 1059 | /// values for the specified column. | |||
| 1060 | /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`. | |||
| 1061 | /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. | |||
| 1062 | /// colName is used if none is provided. | |||
| 1063 | /// | |||
| 1064 | /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time | |||
| 1065 | /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are | |||
| 1066 | /// defined as `{"down", "up"}`. | |||
| 1067 | /// ~~~{.cpp} | |||
| 1068 | /// auto nominal_hx = | |||
| 1069 | /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", {"down", "up"}) | |||
| 1070 | /// .Filter("pt > k") | |||
| 1071 | /// .Define("x", someFunc, {"pt"}) | |||
| 1072 | /// .Histo1D("x"); | |||
| 1073 | /// | |||
| 1074 | /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); | |||
| 1075 | /// hx["nominal"].Draw(); | |||
| 1076 | /// hx["pt:down"].Draw("SAME"); | |||
| 1077 | /// hx["pt:up"].Draw("SAME"); | |||
| 1078 | /// ~~~ | |||
| 1079 | /// | |||
| 1080 | /// \note See also This Vary() overload for more information. | |||
| 1081 | RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, | |||
| 1082 | const std::vector<std::string> &variationTags, std::string_view variationName = "") | |||
| 1083 | { | |||
| 1084 | std::vector<std::string> colNames{{std::string(colName)}}; | |||
| 1085 | const std::string theVariationName{variationName.empty() ? colName : variationName}; | |||
| 1086 | ||||
| 1087 | return JittedVaryImpl(colNames, expression, variationTags, theVariationName, /*isSingleColumn=*/true); | |||
| 1088 | } | |||
| 1089 | ||||
| 1090 | /// \brief Register systematic variations for a single existing column using auto-generated variation tags. | |||
| 1091 | /// \param[in] colName name of the column for which varied values are provided. | |||
| 1092 | /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied | |||
| 1093 | /// values for the specified column. | |||
| 1094 | /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, | |||
| 1095 | /// `"1"`, etc. | |||
| 1096 | /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. | |||
| 1097 | /// colName is used if none is provided. | |||
| 1098 | /// | |||
| 1099 | /// This overload adds the possibility for the expression used to evaluate the varied values to be a just-in-time | |||
| 1100 | /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are | |||
| 1101 | /// auto-generated. | |||
| 1102 | /// ~~~{.cpp} | |||
| 1103 | /// auto nominal_hx = | |||
| 1104 | /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", 2) | |||
| 1105 | /// .Histo1D("pt"); | |||
| 1106 | /// | |||
| 1107 | /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); | |||
| 1108 | /// hx["nominal"].Draw(); | |||
| 1109 | /// hx["pt:0"].Draw("SAME"); | |||
| 1110 | /// hx["pt:1"].Draw("SAME"); | |||
| 1111 | /// ~~~ | |||
| 1112 | /// | |||
| 1113 | /// \note See also This Vary() overload for more information. | |||
| 1114 | RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, std::size_t nVariations, | |||
| 1115 | std::string_view variationName = "") | |||
| 1116 | { | |||
| 1117 | std::vector<std::string> variationTags; | |||
| 1118 | variationTags.reserve(nVariations); | |||
| 1119 | for (std::size_t i = 0u; i < nVariations; ++i) | |||
| 1120 | variationTags.emplace_back(std::to_string(i)); | |||
| 1121 | ||||
| 1122 | return Vary(colName, expression, std::move(variationTags), variationName); | |||
| 1123 | } | |||
| 1124 | ||||
| 1125 | /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags. | |||
| 1126 | /// \param[in] colNames set of names of the columns for which varied values are provided. | |||
| 1127 | /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied | |||
| 1128 | /// values for the specified columns. | |||
| 1129 | /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, | |||
| 1130 | /// `"1"`, etc. | |||
| 1131 | /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. | |||
| 1132 | /// | |||
| 1133 | /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time | |||
| 1134 | /// compiled. It takes an nVariations parameter instead of a list of tag names. | |||
| 1135 | /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N` | |||
| 1136 | /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`. | |||
| 1137 | /// The example below shows how Vary() is used while dealing with multiple columns. | |||
| 1138 | /// | |||
| 1139 | /// ~~~{.cpp} | |||
| 1140 | /// auto nominal_hx = | |||
| 1141 | /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy") | |||
| 1142 | /// .Histo1D("x", "y"); | |||
| 1143 | /// | |||
| 1144 | /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); | |||
| 1145 | /// hx["nominal"].Draw(); | |||
| 1146 | /// hx["xy:0"].Draw("SAME"); | |||
| 1147 | /// hx["xy:1"].Draw("SAME"); | |||
| 1148 | /// ~~~ | |||
| 1149 | /// | |||
| 1150 | /// \note See also This Vary() overload for more information. | |||
| 1151 | RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression, | |||
| 1152 | std::size_t nVariations, std::string_view variationName) | |||
| 1153 | { | |||
| 1154 | std::vector<std::string> variationTags; | |||
| 1155 | variationTags.reserve(nVariations); | |||
| 1156 | for (std::size_t i = 0u; i < nVariations; ++i) | |||
| 1157 | variationTags.emplace_back(std::to_string(i)); | |||
| 1158 | ||||
| 1159 | return Vary(colNames, expression, std::move(variationTags), variationName); | |||
| 1160 | } | |||
| 1161 | ||||
| 1162 | /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags. | |||
| 1163 | /// \param[in] colNames set of names of the columns for which varied values are provided. | |||
| 1164 | /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied | |||
| 1165 | /// values for the specified column. | |||
| 1166 | /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, | |||
| 1167 | /// `"1"`, etc. | |||
| 1168 | /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. | |||
| 1169 | /// colName is used if none is provided. | |||
| 1170 | /// | |||
| 1171 | /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list | |||
| 1172 | /// is avoided. | |||
| 1173 | /// | |||
| 1174 | /// \note See also This Vary() overload for more information. | |||
| 1175 | RInterface<Proxied, DS_t> Vary(std::initializer_list<std::string> colNames, std::string_view expression, | |||
| 1176 | std::size_t nVariations, std::string_view variationName) | |||
| 1177 | { | |||
| 1178 | return Vary(std::vector<std::string>(colNames), expression, nVariations, variationName); | |||
| 1179 | } | |||
| 1180 | ||||
| 1181 | /// \brief Register systematic variations for multiple existing columns using custom variation tags. | |||
| 1182 | /// \param[in] colNames set of names of the columns for which varied values are provided. | |||
| 1183 | /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied | |||
| 1184 | /// values for the specified columns. | |||
| 1185 | /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`. | |||
| 1186 | /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. | |||
| 1187 | /// | |||
| 1188 | /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time | |||
| 1189 | /// compiled. The example below shows how Vary() is used while dealing with multiple columns. The tags are defined as | |||
| 1190 | /// `{"down", "up"}`. | |||
| 1191 | /// ~~~{.cpp} | |||
| 1192 | /// auto nominal_hx = | |||
| 1193 | /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy") | |||
| 1194 | /// .Histo1D("x", "y"); | |||
| 1195 | /// | |||
| 1196 | /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); | |||
| 1197 | /// hx["nominal"].Draw(); | |||
| 1198 | /// hx["xy:down"].Draw("SAME"); | |||
| 1199 | /// hx["xy:up"].Draw("SAME"); | |||
| 1200 | /// ~~~ | |||
| 1201 | /// | |||
| 1202 | /// \note See also This Vary() overload for more information. | |||
| 1203 | RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression, | |||
| 1204 | const std::vector<std::string> &variationTags, std::string_view variationName) | |||
| 1205 | { | |||
| 1206 | return JittedVaryImpl(colNames, expression, variationTags, variationName, /*isSingleColumn=*/false); | |||
| 1207 | } | |||
| 1208 | ||||
| 1209 | //////////////////////////////////////////////////////////////////////////// | |||
| 1210 | /// \brief Allow to refer to a column with a different name. | |||
| 1211 | /// \param[in] alias name of the column alias | |||
| 1212 | /// \param[in] columnName of the column to be aliased | |||
| 1213 | /// \return the first node of the computation graph for which the alias is available. | |||
| 1214 | /// | |||
| 1215 | /// Aliasing an alias is supported. | |||
| 1216 | /// | |||
| 1217 | /// ### Example usage: | |||
| 1218 | /// ~~~{.cpp} | |||
| 1219 | /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!"); | |||
| 1220 | /// ~~~ | |||
| 1221 | RInterface<Proxied, DS_t> Alias(std::string_view alias, std::string_view columnName) | |||
| 1222 | { | |||
| 1223 | // The symmetry with Define is clear. We want to: | |||
| 1224 | // - Create globally the alias and return this very node, unchanged | |||
| 1225 | // - Make aliases accessible based on chains and not globally | |||
| 1226 | ||||
| 1227 | // Helper to find out if a name is a column | |||
| 1228 | auto &dsColumnNames = GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}; | |||
| 1229 | ||||
| 1230 | constexpr auto where = "Alias"; | |||
| 1231 | RDFInternal::CheckValidCppVarName(alias, where); | |||
| 1232 | // If the alias name is a column name, there is a problem | |||
| 1233 | RDFInternal::CheckForRedefinition(where, alias, fColRegister, fLoopManager->GetBranchNames(), dsColumnNames); | |||
| 1234 | ||||
| 1235 | const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0]; | |||
| 1236 | ||||
| 1237 | RDFInternal::RColumnRegister newCols(fColRegister); | |||
| 1238 | newCols.AddAlias(alias, validColumnName); | |||
| 1239 | ||||
| 1240 | RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); | |||
| 1241 | ||||
| 1242 | return newInterface; | |||
| 1243 | } | |||
| 1244 | ||||
| 1245 | //////////////////////////////////////////////////////////////////////////// | |||
| 1246 | /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`. | |||
| 1247 | /// \tparam ColumnTypes variadic list of branch/column types. | |||
| 1248 | /// \param[in] treename The name of the output TTree. | |||
| 1249 | /// \param[in] filename The name of the output TFile. | |||
| 1250 | /// \param[in] columnList The list of names of the columns/branches to be written. | |||
| 1251 | /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree. | |||
| 1252 | /// \return a `RDataFrame` that wraps the snapshotted dataset. | |||
| 1253 | /// | |||
| 1254 | /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.') | |||
| 1255 | /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot. | |||
| 1256 | /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also | |||
| 1257 | /// written out and it appears before the array in the columnList. | |||
| 1258 | /// | |||
| 1259 | /// By default, in case of TTree or TChain inputs, Snapshot will try to write out all top-level branches. For other | |||
| 1260 | /// types of inputs, all columns returned by GetColumnNames() will be written out. If friend trees or chains are | |||
| 1261 | /// present, by default all friend top-level branches that have names that do not collide with | |||
| 1262 | /// names of branches in the main TTree/TChain will be written out. Since v6.24, Snapshot will also write out | |||
| 1263 | /// friend branches with the same names of branches in the main TTree/TChain with names of the form | |||
| 1264 | /// `<friendname>_<branchname>` in order to differentiate them from the branches in the main tree/chain. | |||
| 1265 | /// | |||
| 1266 | /// ### Writing to a sub-directory | |||
| 1267 | /// | |||
| 1268 | /// Snapshot supports writing the TTree in a sub-directory inside the TFile. It is sufficient to specify the path to | |||
| 1269 | /// the TTree as part of the TTree name, e.g. `df.Snapshot("subdir/t", "f.root")` write TTree `t` in the | |||
| 1270 | /// sub-directory `subdir` of file `f.root` (creating file and sub-directory as needed). | |||
| 1271 | /// | |||
| 1272 | /// \attention In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of | |||
| 1273 | /// entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled | |||
| 1274 | /// with respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in | |||
| 1275 | /// wrong associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will | |||
| 1276 | /// error out if such a "shuffled" TTree is used in a friendship. | |||
| 1277 | /// | |||
| 1278 | /// \note In case no events are written out (e.g. because no event passes all filters), Snapshot will still write the | |||
| 1279 | /// requested output TTree to the file, with all the branches requested to preserve the dataset schema. | |||
| 1280 | /// | |||
| 1281 | /// \note Snapshot will refuse to process columns with names of the form `#columnname`. These are special columns | |||
| 1282 | /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are | |||
| 1283 | /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an | |||
| 1284 | /// Alias(): `df.Alias("nbar", "#bar").Snapshot(..., {"nbar"})`. | |||
| 1285 | /// | |||
| 1286 | /// ### Example invocations: | |||
| 1287 | /// | |||
| 1288 | /// ~~~{.cpp} | |||
| 1289 | /// // without specifying template parameters (column types automatically deduced) | |||
| 1290 | /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"}); | |||
| 1291 | /// | |||
| 1292 | /// // specifying template parameters ("x" is `int`, "y" is `float`) | |||
| 1293 | /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"}); | |||
| 1294 | /// ~~~ | |||
| 1295 | /// | |||
| 1296 | /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in | |||
| 1297 | /// `RSnapshotOptions`: | |||
| 1298 | /// ~~~{.cpp} | |||
| 1299 | /// RSnapshotOptions opts; | |||
| 1300 | /// opts.fLazy = true; | |||
| 1301 | /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts); | |||
| 1302 | /// ~~~ | |||
| 1303 | template <typename... ColumnTypes> | |||
| 1304 | RResultPtr<RInterface<RLoopManager>> | |||
| 1305 | Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, | |||
| 1306 | const RSnapshotOptions &options = RSnapshotOptions()) | |||
| 1307 | { | |||
| 1308 | return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options); | |||
| 1309 | } | |||
| 1310 | ||||
| 1311 | //////////////////////////////////////////////////////////////////////////// | |||
| 1312 | /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`. | |||
| 1313 | /// \param[in] treename The name of the output TTree. | |||
| 1314 | /// \param[in] filename The name of the output TFile. | |||
| 1315 | /// \param[in] columnList The list of names of the columns/branches to be written. | |||
| 1316 | /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree. | |||
| 1317 | /// \return a `RDataFrame` that wraps the snapshotted dataset. | |||
| 1318 | /// | |||
| 1319 | /// This function returns a `RDataFrame` built with the output tree as a source. | |||
| 1320 | /// The types of the columns are automatically inferred and do not need to be specified. | |||
| 1321 | /// | |||
| 1322 | /// See above for a more complete description and example usages. | |||
| 1323 | RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename, | |||
| 1324 | const ColumnNames_t &columnList, | |||
| 1325 | const RSnapshotOptions &options = RSnapshotOptions()) | |||
| 1326 | { | |||
| 1327 | // like columnList but with `#var` columns removed | |||
| 1328 | auto colListNoPoundSizes = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot"); | |||
| 1329 | // like columnListWithoutSizeColumns but with aliases resolved | |||
| 1330 | auto colListNoAliases = GetValidatedColumnNames(colListNoPoundSizes.size(), colListNoPoundSizes); | |||
| 1331 | RDFInternal::CheckForDuplicateSnapshotColumns(colListNoAliases); | |||
| 1332 | // like validCols but with missing size branches required by array branches added in the right positions | |||
| 1333 | const auto pairOfColumnLists = RDFInternal::AddSizeBranches( | |||
| 1334 | fLoopManager->GetBranchNames(), GetDataSource(), std::move(colListNoAliases), std::move(colListNoPoundSizes)); | |||
| 1335 | const auto &colListNoAliasesWithSizeBranches = pairOfColumnLists.first; | |||
| 1336 | const auto &colListWithAliasesAndSizeBranches = pairOfColumnLists.second; | |||
| 1337 | ||||
| 1338 | const auto fullTreeName = treename; | |||
| 1339 | const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName); | |||
| 1340 | treename = parsedTreePath.fTreeName; | |||
| 1341 | const auto &dirname = parsedTreePath.fDirName; | |||
| 1342 | ||||
| 1343 | ::TDirectory::TContext ctxt; | |||
| 1344 | ||||
| 1345 | RResultPtr<RInterface<RLoopManager>> resPtr; | |||
| 1346 | ||||
| 1347 | if (options.fOutputFormat == ESnapshotOutputFormat::kRNTuple) { | |||
| 1348 | if (RDFInternal::GetDataSourceLabel(*this) == "TTreeDS") { | |||
| 1349 | throw std::runtime_error("Snapshotting from TTree to RNTuple is not yet supported. The current recommended " | |||
| 1350 | "way to convert TTrees to RNTuple is through the RNTupleImporter."); | |||
| 1351 | } | |||
| 1352 | ||||
| 1353 | // The data source of the RNTuple resulting from the Snapshot action does not exist yet here, so we create one | |||
| 1354 | // without a data source for now, and set it once the actual data source can be created (i.e., after | |||
| 1355 | // writing the RNTuple). | |||
| 1356 | auto newRDF = std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(colListNoPoundSizes)); | |||
| 1357 | ||||
| 1358 | auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{ | |||
| 1359 | std::string(filename), std::string(dirname), std::string(treename), colListWithAliasesAndSizeBranches, | |||
| 1360 | options, newRDF->GetLoopManager(), GetLoopManager(), true /* fToNTuple */}); | |||
| 1361 | ||||
| 1362 | // The Snapshot helper will use colListNoAliasesWithSizeBranches (with aliases resolved) as input columns, and | |||
| 1363 | // colListWithAliasesAndSizeBranches (still with aliases in it, passed through snapHelperArgs) as output column | |||
| 1364 | // names. | |||
| 1365 | resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>( | |||
| 1366 | colListNoAliasesWithSizeBranches, newRDF, snapHelperArgs, fProxiedPtr, | |||
| 1367 | colListNoAliasesWithSizeBranches.size()); | |||
| 1368 | } else { | |||
| 1369 | if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS" && | |||
| 1370 | options.fOutputFormat == ESnapshotOutputFormat::kDefault) { | |||
| 1371 | Warning("Snapshot", | |||
| 1372 | "The default Snapshot output data format is TTree, but the input data format is RNTuple. If you " | |||
| 1373 | "want to Snapshot to RNTuple or suppress this warning, set the appropriate fOutputFormat option in " | |||
| 1374 | "RSnapshotOptions. Note that this current default behaviour might change in the future."); | |||
| 1375 | } | |||
| 1376 | ||||
| 1377 | // We create an RLoopManager without a data source. This needs to be initialised when the output TTree dataset | |||
| 1378 | // has actually been created and written to TFile, i.e. at the end of the Snapshot execution. | |||
| 1379 | auto newRDF = std::make_shared<RInterface<RLoopManager>>( | |||
| 1380 | std::make_shared<RLoopManager>(colListNoAliasesWithSizeBranches)); | |||
| 1381 | ||||
| 1382 | auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{ | |||
| 1383 | std::string(filename), std::string(dirname), std::string(treename), colListWithAliasesAndSizeBranches, | |||
| 1384 | options, newRDF->GetLoopManager(), GetLoopManager(), false /* fToRNTuple */}); | |||
| 1385 | ||||
| 1386 | resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>( | |||
| 1387 | colListNoAliasesWithSizeBranches, newRDF, snapHelperArgs, fProxiedPtr, | |||
| 1388 | colListNoAliasesWithSizeBranches.size(), options.fVector2RVec); | |||
| 1389 | } | |||
| 1390 | ||||
| 1391 | if (!options.fLazy) | |||
| 1392 | *resPtr; | |||
| 1393 | return resPtr; | |||
| 1394 | } | |||
| 1395 | ||||
| 1396 | // clang-format off | |||
| 1397 | //////////////////////////////////////////////////////////////////////////// | |||
| 1398 | /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`. | |||
| 1399 | /// \param[in] treename The name of the output TTree. | |||
| 1400 | /// \param[in] filename The name of the output TFile. | |||
| 1401 | /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns. | |||
| 1402 | /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree | |||
| 1403 | /// \return a `RDataFrame` that wraps the snapshotted dataset. | |||
| 1404 | /// | |||
| 1405 | /// This function returns a `RDataFrame` built with the output tree as a source. | |||
| 1406 | /// The types of the columns are automatically inferred and do not need to be specified. | |||
| 1407 | /// | |||
| 1408 | /// See above for a more complete description and example usages. | |||
| 1409 | RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename, | |||
| 1410 | std::string_view columnNameRegexp = "", | |||
| 1411 | const RSnapshotOptions &options = RSnapshotOptions()) | |||
| 1412 | { | |||
| 1413 | const auto definedColumns = fColRegister.GenerateColumnNames(); | |||
| 1414 | auto *tree = fLoopManager->GetTree(); | |||
| 1415 | ||||
| 1416 | const auto treeBranchNames = tree != nullptr ? ROOT::Internal::TreeUtils::GetTopLevelBranchNames(*tree) : ColumnNames_t{}; | |||
| 1417 | const auto dsColumns = GetDataSource() ? ROOT::Internal::RDF::GetTopLevelFieldNames(*GetDataSource()) : ColumnNames_t{}; | |||
| 1418 | // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those | |||
| 1419 | ColumnNames_t dsColumnsWithoutSizeColumns; | |||
| 1420 | std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns), | |||
| 1421 | [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; }); | |||
| 1422 | ColumnNames_t columnNames; | |||
| 1423 | columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumnsWithoutSizeColumns.size()); | |||
| 1424 | columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end()); | |||
| 1425 | columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end()); | |||
| 1426 | columnNames.insert(columnNames.end(), dsColumnsWithoutSizeColumns.begin(), dsColumnsWithoutSizeColumns.end()); | |||
| 1427 | ||||
| 1428 | // The only way we can get duplicate entries is if a column coming from a tree or data-source is Redefine'd. | |||
| 1429 | // RemoveDuplicates should preserve ordering of the columns: it might be meaningful. | |||
| 1430 | RDFInternal::RemoveDuplicates(columnNames); | |||
| 1431 | ||||
| 1432 | auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot"); | |||
| 1433 | ||||
| 1434 | if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS") { | |||
| 1435 | RDFInternal::RemoveRNTupleSubFields(selectedColumns); | |||
| 1436 | } | |||
| 1437 | ||||
| 1438 | return Snapshot(treename, filename, selectedColumns, options); | |||
| 1439 | } | |||
| 1440 | // clang-format on | |||
| 1441 | ||||
| 1442 | // clang-format off | |||
| 1443 | //////////////////////////////////////////////////////////////////////////// | |||
| 1444 | /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`. | |||
| 1445 | /// \param[in] treename The name of the output TTree. | |||
| 1446 | /// \param[in] filename The name of the output TFile. | |||
| 1447 | /// \param[in] columnList The list of names of the columns/branches to be written. | |||
| 1448 | /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree. | |||
| 1449 | /// \return a `RDataFrame` that wraps the snapshotted dataset. | |||
| 1450 | /// | |||
| 1451 | /// This function returns a `RDataFrame` built with the output tree as a source. | |||
| 1452 | /// The types of the columns are automatically inferred and do not need to be specified. | |||
| 1453 | /// | |||
| 1454 | /// See above for a more complete description and example usages. | |||
| 1455 | RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename, | |||
| 1456 | std::initializer_list<std::string> columnList, | |||
| 1457 | const RSnapshotOptions &options = RSnapshotOptions()) | |||
| 1458 | { | |||
| 1459 | ColumnNames_t selectedColumns(columnList); | |||
| 1460 | return Snapshot(treename, filename, selectedColumns, options); | |||
| 1461 | } | |||
| 1462 | // clang-format on | |||
| 1463 | ||||
| 1464 | //////////////////////////////////////////////////////////////////////////// | |||
| 1465 | /// \brief Save selected columns in memory. | |||
| 1466 | /// \tparam ColumnTypes variadic list of branch/column types. | |||
| 1467 | /// \param[in] columnList columns to be cached in memory. | |||
| 1468 | /// \return a `RDataFrame` that wraps the cached dataset. | |||
| 1469 | /// | |||
| 1470 | /// This action returns a new `RDataFrame` object, completely detached from | |||
| 1471 | /// the originating `RDataFrame`. The new dataframe only contains the cached | |||
| 1472 | /// columns and stores their content in memory for fast, zero-copy subsequent access. | |||
| 1473 | /// | |||
| 1474 | /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that | |||
| 1475 | /// fits in memory and that will be accessed many times. | |||
| 1476 | /// | |||
| 1477 | /// \note Cache will refuse to process columns with names of the form `#columnname`. These are special columns | |||
| 1478 | /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are | |||
| 1479 | /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an | |||
| 1480 | /// Alias(): `df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"})`. | |||
| 1481 | /// | |||
| 1482 | /// ### Example usage: | |||
| 1483 | /// | |||
| 1484 | /// **Types and columns specified:** | |||
| 1485 | /// ~~~{.cpp} | |||
| 1486 | /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"}); | |||
| 1487 | /// ~~~ | |||
| 1488 | /// | |||
| 1489 | /// **Types inferred and columns specified (this invocation relies on jitting):** | |||
| 1490 | /// ~~~{.cpp} | |||
| 1491 | /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"}); | |||
| 1492 | /// ~~~ | |||
| 1493 | /// | |||
| 1494 | /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):** | |||
| 1495 | /// ~~~{.cpp} | |||
| 1496 | /// auto cache_all_cols_df = df.Cache(myRegexp); | |||
| 1497 | /// ~~~ | |||
| 1498 | template <typename... ColumnTypes> | |||
| 1499 | RInterface<RLoopManager> Cache(const ColumnNames_t &columnList) | |||
| 1500 | { | |||
| 1501 | auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>(); | |||
| 1502 | return CacheImpl<ColumnTypes...>(columnList, staticSeq); | |||
| 1503 | } | |||
| 1504 | ||||
| 1505 | //////////////////////////////////////////////////////////////////////////// | |||
| 1506 | /// \brief Save selected columns in memory. | |||
| 1507 | /// \param[in] columnList columns to be cached in memory | |||
| 1508 | /// \return a `RDataFrame` that wraps the cached dataset. | |||
| 1509 | /// | |||
| 1510 | /// See the previous overloads for more information. | |||
| 1511 | RInterface<RLoopManager> Cache(const ColumnNames_t &columnList) | |||
| 1512 | { | |||
| 1513 | // Early return: if the list of columns is empty, just return an empty RDF | |||
| 1514 | // If we proceed, the jitted call will not compile! | |||
| 1515 | if (columnList.empty()) { | |||
| 1516 | auto nEntries = *this->Count(); | |||
| 1517 | RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries)); | |||
| 1518 | return emptyRDF; | |||
| 1519 | } | |||
| 1520 | ||||
| 1521 | std::stringstream cacheCall; | |||
| 1522 | auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr); | |||
| 1523 | RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager, | |||
| 1524 | fColRegister); | |||
| 1525 | // build a string equivalent to | |||
| 1526 | // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))" | |||
| 1527 | RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0)); | |||
| 1528 | cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>(" | |||
| 1529 | << RDFInternal::PrettyPrintAddr(&resRDF) | |||
| 1530 | << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>(" | |||
| 1531 | << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<"; | |||
| 1532 | ||||
| 1533 | const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Cache"); | |||
| 1534 | ||||
| 1535 | const auto validColumnNames = | |||
| 1536 | GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns); | |||
| 1537 | const auto colTypes = GetValidatedArgTypes(validColumnNames, fColRegister, fLoopManager->GetTree(), | |||
| 1538 | GetDataSource(), "Cache", /*vector2RVec=*/false); | |||
| 1539 | for (const auto &colType : colTypes) | |||
| 1540 | cacheCall << colType << ", "; | |||
| 1541 | if (!columnListWithoutSizeColumns.empty()) | |||
| 1542 | cacheCall.seekp(-2, cacheCall.cur); // remove the last ", | |||
| 1543 | cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t | |||
| 1544 | << RDFInternal::PrettyPrintAddr(&columnListWithoutSizeColumns) << "));"; | |||
| 1545 | ||||
| 1546 | // book the code to jit with the RLoopManager and trigger the event loop | |||
| 1547 | fLoopManager->ToJitExec(cacheCall.str()); | |||
| 1548 | fLoopManager->Jit(); | |||
| 1549 | ||||
| 1550 | return resRDF; | |||
| 1551 | } | |||
| 1552 | ||||
| 1553 | //////////////////////////////////////////////////////////////////////////// | |||
| 1554 | /// \brief Save selected columns in memory. | |||
| 1555 | /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns. | |||
| 1556 | /// \return a `RDataFrame` that wraps the cached dataset. | |||
| 1557 | /// | |||
| 1558 | /// The existing columns are matched against the regular expression. If the string provided | |||
| 1559 | /// is empty, all columns are selected. See the previous overloads for more information. | |||
| 1560 | RInterface<RLoopManager> Cache(std::string_view columnNameRegexp = "") | |||
| 1561 | { | |||
| 1562 | const auto definedColumns = fColRegister.GenerateColumnNames(); | |||
| 1563 | auto *tree = fLoopManager->GetTree(); | |||
| 1564 | const auto treeBranchNames = | |||
| 1565 | tree != nullptr ? ROOT::Internal::TreeUtils::GetTopLevelBranchNames(*tree) : ColumnNames_t{}; | |||
| 1566 | const auto dsColumns = GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}; | |||
| 1567 | // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those | |||
| 1568 | ColumnNames_t dsColumnsWithoutSizeColumns; | |||
| 1569 | std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns), | |||
| 1570 | [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; }); | |||
| 1571 | ColumnNames_t columnNames; | |||
| 1572 | columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumns.size()); | |||
| 1573 | columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end()); | |||
| 1574 | columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end()); | |||
| 1575 | columnNames.insert(columnNames.end(), dsColumns.begin(), dsColumns.end()); | |||
| 1576 | const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Cache"); | |||
| 1577 | return Cache(selectedColumns); | |||
| 1578 | } | |||
| 1579 | ||||
| 1580 | //////////////////////////////////////////////////////////////////////////// | |||
| 1581 | /// \brief Save selected columns in memory. | |||
| 1582 | /// \param[in] columnList columns to be cached in memory. | |||
| 1583 | /// \return a `RDataFrame` that wraps the cached dataset. | |||
| 1584 | /// | |||
| 1585 | /// See the previous overloads for more information. | |||
| 1586 | RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList) | |||
| 1587 | { | |||
| 1588 | ColumnNames_t selectedColumns(columnList); | |||
| 1589 | return Cache(selectedColumns); | |||
| 1590 | } | |||
| 1591 | ||||
| 1592 | // clang-format off | |||
| 1593 | //////////////////////////////////////////////////////////////////////////// | |||
| 1594 | /// \brief Creates a node that filters entries based on range: [begin, end). | |||
| 1595 | /// \param[in] begin Initial entry number considered for this range. | |||
| 1596 | /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset. | |||
| 1597 | /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0. | |||
| 1598 | /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries. | |||
| 1599 | /// | |||
| 1600 | /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset. | |||
| 1601 | /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported. | |||
| 1602 | /// | |||
| 1603 | /// ### Example usage: | |||
| 1604 | /// ~~~{.cpp} | |||
| 1605 | /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries | |||
| 1606 | /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards | |||
| 1607 | /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3 | |||
| 1608 | /// ~~~ | |||
| 1609 | // clang-format on | |||
| 1610 | RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1) | |||
| 1611 | { | |||
| 1612 | // check invariants | |||
| 1613 | if (stride == 0 || (end != 0 && end < begin)) | |||
| 1614 | throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin."); | |||
| 1615 | CheckIMTDisabled("Range"); | |||
| 1616 | ||||
| 1617 | using Range_t = RDFDetail::RRange<Proxied>; | |||
| 1618 | auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr); | |||
| 1619 | RInterface<RDFDetail::RRange<Proxied>, DS_t> newInterface(std::move(rangePtr), *fLoopManager, fColRegister); | |||
| 1620 | return newInterface; | |||
| 1621 | } | |||
| 1622 | ||||
| 1623 | // clang-format off | |||
| 1624 | //////////////////////////////////////////////////////////////////////////// | |||
| 1625 | /// \brief Creates a node that filters entries based on range. | |||
| 1626 | /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset. | |||
| 1627 | /// \return a node of the computation graph for which the range is defined. | |||
| 1628 | /// | |||
| 1629 | /// See the other Range overload for a detailed description. | |||
| 1630 | // clang-format on | |||
| 1631 | RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); } | |||
| 1632 | ||||
| 1633 | // clang-format off | |||
| 1634 | //////////////////////////////////////////////////////////////////////////// | |||
| 1635 | /// \brief Execute a user-defined function on each entry (*instant action*). | |||
| 1636 | /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations. | |||
| 1637 | /// \param[in] columns Names of the columns/branches in input to the user function. | |||
| 1638 | /// | |||
| 1639 | /// The callable `f` is invoked once per entry. This is an *instant action*: | |||
| 1640 | /// upon invocation, an event loop as well as execution of all scheduled actions | |||
| 1641 | /// is triggered. | |||
| 1642 | /// Users are responsible for the thread-safety of this callable when executing | |||
| 1643 | /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT). | |||
| 1644 | /// | |||
| 1645 | /// ### Example usage: | |||
| 1646 | /// ~~~{.cpp} | |||
| 1647 | /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"}); | |||
| 1648 | /// ~~~ | |||
| 1649 | // clang-format on | |||
| 1650 | template <typename F> | |||
| 1651 | void Foreach(F f, const ColumnNames_t &columns = {}) | |||
| 1652 | { | |||
| 1653 | using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay; | |||
| 1654 | using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type; | |||
| 1655 | ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns); | |||
| 1656 | } | |||
| 1657 | ||||
| 1658 | // clang-format off | |||
| 1659 | //////////////////////////////////////////////////////////////////////////// | |||
| 1660 | /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*). | |||
| 1661 | /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations. | |||
| 1662 | /// \param[in] columns Names of the columns/branches in input to the user function. | |||
| 1663 | /// | |||
| 1664 | /// Same as `Foreach`, but the user-defined function takes an extra | |||
| 1665 | /// `unsigned int` as its first parameter, the *processing slot index*. | |||
| 1666 | /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`, | |||
| 1667 | /// for each thread of execution. | |||
| 1668 | /// This is meant as a helper in writing thread-safe `Foreach` | |||
| 1669 | /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`. | |||
| 1670 | /// The user-defined processing callable is able to follow different | |||
| 1671 | /// *streams of processing* indexed by the first parameter. | |||
| 1672 | /// `ForeachSlot` works just as well with single-thread execution: in that | |||
| 1673 | /// case `slot` will always be `0`. | |||
| 1674 | /// | |||
| 1675 | /// ### Example usage: | |||
| 1676 | /// ~~~{.cpp} | |||
| 1677 | /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"}); | |||
| 1678 | /// ~~~ | |||
| 1679 | // clang-format on | |||
| 1680 | template <typename F> | |||
| 1681 | void ForeachSlot(F f, const ColumnNames_t &columns = {}) | |||
| 1682 | { | |||
| 1683 | using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>; | |||
| 1684 | constexpr auto nColumns = ColTypes_t::list_size; | |||
| 1685 | ||||
| 1686 | const auto validColumnNames = GetValidatedColumnNames(nColumns, columns); | |||
| 1687 | CheckAndFillDSColumns(validColumnNames, ColTypes_t()); | |||
| 1688 | ||||
| 1689 | using Helper_t = RDFInternal::ForeachSlotHelper<F>; | |||
| 1690 | using Action_t = RDFInternal::RAction<Helper_t, Proxied>; | |||
| 1691 | ||||
| 1692 | auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, fColRegister); | |||
| 1693 | ||||
| 1694 | fLoopManager->Run(); | |||
| 1695 | } | |||
| 1696 | ||||
| 1697 | // clang-format off | |||
| 1698 | //////////////////////////////////////////////////////////////////////////// | |||
| 1699 | /// \brief Execute a user-defined reduce operation on the values of a column. | |||
| 1700 | /// \tparam F The type of the reduce callable. Automatically deduced. | |||
| 1701 | /// \tparam T The type of the column to apply the reduction to. Automatically deduced. | |||
| 1702 | /// \param[in] f A callable with signature `T(T,T)` | |||
| 1703 | /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead. | |||
| 1704 | /// \return the reduced quantity wrapped in a ROOT::RDF:RResultPtr. | |||
| 1705 | /// | |||
| 1706 | /// A reduction takes two values of a column and merges them into one (e.g. | |||
| 1707 | /// by summing them, taking the maximum, etc). This action performs the | |||
| 1708 | /// specified reduction operation on all processed column values, returning | |||
| 1709 | /// a single value of the same type. The callable f must satisfy the general | |||
| 1710 | /// requirements of a *processing function* besides having signature `T(T,T)` | |||
| 1711 | /// where `T` is the type of column columnName. | |||
| 1712 | /// | |||
| 1713 | /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a | |||
| 1714 | /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific | |||
| 1715 | /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this | |||
| 1716 | /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce` | |||
| 1717 | /// overload. | |||
| 1718 | /// | |||
| 1719 | /// ### Example usage: | |||
| 1720 | /// ~~~{.cpp} | |||
| 1721 | /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol"); | |||
| 1722 | /// ~~~ | |||
| 1723 | /// | |||
| 1724 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 1725 | /// booked but not executed. Also see RResultPtr. | |||
| 1726 | // clang-format on | |||
| 1727 | template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type> | |||
| 1728 | RResultPtr<T> Reduce(F f, std::string_view columnName = "") | |||
| 1729 | { | |||
| 1730 | static_assert( | |||
| 1731 | std::is_default_constructible<T>::value, | |||
| 1732 | "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)"); | |||
| 1733 | return Reduce(std::move(f), columnName, T()); | |||
| 1734 | } | |||
| 1735 | ||||
| 1736 | //////////////////////////////////////////////////////////////////////////// | |||
| 1737 | /// \brief Execute a user-defined reduce operation on the values of a column. | |||
| 1738 | /// \tparam F The type of the reduce callable. Automatically deduced. | |||
| 1739 | /// \tparam T The type of the column to apply the reduction to. Automatically deduced. | |||
| 1740 | /// \param[in] f A callable with signature `T(T,T)` | |||
| 1741 | /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead. | |||
| 1742 | /// \param[in] redIdentity The reduced object of each thread is initialized to this value. | |||
| 1743 | /// \return the reduced quantity wrapped in a RResultPtr. | |||
| 1744 | /// | |||
| 1745 | /// ### Example usage: | |||
| 1746 | /// ~~~{.cpp} | |||
| 1747 | /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42); | |||
| 1748 | /// ~~~ | |||
| 1749 | /// See the description of the first Reduce overload for more information. | |||
| 1750 | template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type> | |||
| 1751 | RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity) | |||
| 1752 | { | |||
| 1753 | return Aggregate(f, f, columnName, redIdentity); | |||
| 1754 | } | |||
| 1755 | ||||
| 1756 | //////////////////////////////////////////////////////////////////////////// | |||
| 1757 | /// \brief Return the number of entries processed (*lazy action*). | |||
| 1758 | /// \return the number of entries wrapped in a RResultPtr. | |||
| 1759 | /// | |||
| 1760 | /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`). | |||
| 1761 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 1762 | /// booked but not executed. Also see RResultPtr. | |||
| 1763 | /// | |||
| 1764 | /// ### Example usage: | |||
| 1765 | /// ~~~{.cpp} | |||
| 1766 | /// auto nEntriesAfterCuts = myFilteredDf.Count(); | |||
| 1767 | /// ~~~ | |||
| 1768 | /// | |||
| 1769 | RResultPtr<ULong64_t> Count() | |||
| 1770 | { | |||
| 1771 | const auto nSlots = fLoopManager->GetNSlots(); | |||
| 1772 | auto cSPtr = std::make_shared<ULong64_t>(0); | |||
| 1773 | using Helper_t = RDFInternal::CountHelper; | |||
| 1774 | using Action_t = RDFInternal::RAction<Helper_t, Proxied>; | |||
| 1775 | auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr, | |||
| 1776 | RDFInternal::RColumnRegister(fColRegister)); | |||
| 1777 | return MakeResultPtr(cSPtr, *fLoopManager, std::move(action)); | |||
| 1778 | } | |||
| 1779 | ||||
| 1780 | //////////////////////////////////////////////////////////////////////////// | |||
| 1781 | /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default). | |||
| 1782 | /// \tparam T The type of the column. | |||
| 1783 | /// \tparam COLL The type of collection used to store the values. | |||
| 1784 | /// \param[in] column The name of the column to collect the values of. | |||
| 1785 | /// \return the content of the selected column wrapped in a RResultPtr. | |||
| 1786 | /// | |||
| 1787 | /// The collection type to be specified for C-style array columns is `RVec<T>`: | |||
| 1788 | /// in this case the returned collection is a `std::vector<RVec<T>>`. | |||
| 1789 | /// ### Example usage: | |||
| 1790 | /// ~~~{.cpp} | |||
| 1791 | /// // In this case intCol is a std::vector<int> | |||
| 1792 | /// auto intCol = rdf.Take<int>("integerColumn"); | |||
| 1793 | /// // Same content as above but in this case taken as a RVec<int> | |||
| 1794 | /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn"); | |||
| 1795 | /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections | |||
| 1796 | /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt"); | |||
| 1797 | /// ~~~ | |||
| 1798 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 1799 | /// booked but not executed. Also see RResultPtr. | |||
| 1800 | template <typename T, typename COLL = std::vector<T>> | |||
| 1801 | RResultPtr<COLL> Take(std::string_view column = "") | |||
| 1802 | { | |||
| 1803 | const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)}); | |||
| 1804 | ||||
| 1805 | const auto validColumnNames = GetValidatedColumnNames(1, columns); | |||
| 1806 | CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>()); | |||
| 1807 | ||||
| 1808 | using Helper_t = RDFInternal::TakeHelper<T, T, COLL>; | |||
| 1809 | using Action_t = RDFInternal::RAction<Helper_t, Proxied>; | |||
| 1810 | auto valuesPtr = std::make_shared<COLL>(); | |||
| 1811 | const auto nSlots = fLoopManager->GetNSlots(); | |||
| 1812 | ||||
| 1813 | auto action = | |||
| 1814 | std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, fColRegister); | |||
| 1815 | return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action)); | |||
| 1816 | } | |||
| 1817 | ||||
| 1818 | //////////////////////////////////////////////////////////////////////////// | |||
| 1819 | /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*). | |||
| 1820 | /// \tparam V The type of the column used to fill the histogram. | |||
| 1821 | /// \param[in] model The returned histogram will be constructed using this as a model. | |||
| 1822 | /// \param[in] vName The name of the column that will fill the histogram. | |||
| 1823 | /// \return the monodimensional histogram wrapped in a RResultPtr. | |||
| 1824 | /// | |||
| 1825 | /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram | |||
| 1826 | /// is filled with each one of the elements of the container. In case multiple columns of container type | |||
| 1827 | /// are provided (e.g. values and weights) they must have the same length for each one of the events (but | |||
| 1828 | /// possibly different lengths between events). | |||
| 1829 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 1830 | /// booked but not executed. Also see RResultPtr. | |||
| 1831 | /// | |||
| 1832 | /// ### Example usage: | |||
| 1833 | /// ~~~{.cpp} | |||
| 1834 | /// // Deduce column type (this invocation needs jitting internally) | |||
| 1835 | /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn"); | |||
| 1836 | /// // Explicit column type | |||
| 1837 | /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn"); | |||
| 1838 | /// ~~~ | |||
| 1839 | /// | |||
| 1840 | /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory | |||
| 1841 | /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that | |||
| 1842 | /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). | |||
| 1843 | template <typename V = RDFDetail::RInferredType> | |||
| 1844 | RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "") | |||
| 1845 | { | |||
| 1846 | const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)}); | |||
| 1847 | ||||
| 1848 | const auto validatedColumns = GetValidatedColumnNames(1, userColumns); | |||
| 1849 | ||||
| 1850 | std::shared_ptr<::TH1D> h(nullptr); | |||
| 1851 | { | |||
| 1852 | ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); | |||
| 1853 | h = model.GetHistogram(); | |||
| 1854 | h->SetDirectory(nullptr); | |||
| 1855 | } | |||
| 1856 | ||||
| 1857 | if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin()) | |||
| 1858 | RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h); | |||
| 1859 | return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h, h, fProxiedPtr); | |||
| 1860 | } | |||
| 1861 | ||||
| 1862 | //////////////////////////////////////////////////////////////////////////// | |||
| 1863 | /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*). | |||
| 1864 | /// \tparam V The type of the column used to fill the histogram. | |||
| 1865 | /// \param[in] vName The name of the column that will fill the histogram. | |||
| 1866 | /// \return the monodimensional histogram wrapped in a RResultPtr. | |||
| 1867 | /// | |||
| 1868 | /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.). | |||
| 1869 | /// The "name" and "title" strings are built starting from the input column name. | |||
| 1870 | /// See the description of the first Histo1D() overload for more details. | |||
| 1871 | /// | |||
| 1872 | /// ### Example usage: | |||
| 1873 | /// ~~~{.cpp} | |||
| 1874 | /// // Deduce column type (this invocation needs jitting internally) | |||
| 1875 | /// auto myHist1 = myDf.Histo1D("myColumn"); | |||
| 1876 | /// // Explicit column type | |||
| 1877 | /// auto myHist2 = myDf.Histo1D<float>("myColumn"); | |||
| 1878 | /// ~~~ | |||
| 1879 | template <typename V = RDFDetail::RInferredType> | |||
| 1880 | RResultPtr<::TH1D> Histo1D(std::string_view vName) | |||
| 1881 | { | |||
| 1882 | const auto h_name = std::string(vName); | |||
| 1883 | const auto h_title = h_name + ";" + h_name + ";count"; | |||
| 1884 | return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName); | |||
| 1885 | } | |||
| 1886 | ||||
| 1887 | //////////////////////////////////////////////////////////////////////////// | |||
| 1888 | /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*). | |||
| 1889 | /// \tparam V The type of the column used to fill the histogram. | |||
| 1890 | /// \tparam W The type of the column used as weights. | |||
| 1891 | /// \param[in] model The returned histogram will be constructed using this as a model. | |||
| 1892 | /// \param[in] vName The name of the column that will fill the histogram. | |||
| 1893 | /// \param[in] wName The name of the column that will provide the weights. | |||
| 1894 | /// \return the monodimensional histogram wrapped in a RResultPtr. | |||
| 1895 | /// | |||
| 1896 | /// See the description of the first Histo1D() overload for more details. | |||
| 1897 | /// | |||
| 1898 | /// ### Example usage: | |||
| 1899 | /// ~~~{.cpp} | |||
| 1900 | /// // Deduce column type (this invocation needs jitting internally) | |||
| 1901 | /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight"); | |||
| 1902 | /// // Explicit column type | |||
| 1903 | /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight"); | |||
| 1904 | /// ~~~ | |||
| 1905 | template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType> | |||
| 1906 | RResultPtr<::TH1D> Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName) | |||
| 1907 | { | |||
| 1908 | const std::vector<std::string_view> columnViews = {vName, wName}; | |||
| 1909 | const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) | |||
| 1910 | ? ColumnNames_t() | |||
| 1911 | : ColumnNames_t(columnViews.begin(), columnViews.end()); | |||
| 1912 | std::shared_ptr<::TH1D> h(nullptr); | |||
| 1913 | { | |||
| 1914 | ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); | |||
| 1915 | h = model.GetHistogram(); | |||
| 1916 | } | |||
| 1917 | return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h, h, fProxiedPtr); | |||
| 1918 | } | |||
| 1919 | ||||
| 1920 | //////////////////////////////////////////////////////////////////////////// | |||
| 1921 | /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*). | |||
| 1922 | /// \tparam V The type of the column used to fill the histogram. | |||
| 1923 | /// \tparam W The type of the column used as weights. | |||
| 1924 | /// \param[in] vName The name of the column that will fill the histogram. | |||
| 1925 | /// \param[in] wName The name of the column that will provide the weights. | |||
| 1926 | /// \return the monodimensional histogram wrapped in a RResultPtr. | |||
| 1927 | /// | |||
| 1928 | /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.). | |||
| 1929 | /// The "name" and "title" strings are built starting from the input column names. | |||
| 1930 | /// See the description of the first Histo1D() overload for more details. | |||
| 1931 | /// | |||
| 1932 | /// ### Example usage: | |||
| 1933 | /// ~~~{.cpp} | |||
| 1934 | /// // Deduce column types (this invocation needs jitting internally) | |||
| 1935 | /// auto myHist1 = myDf.Histo1D("myValue", "myweight"); | |||
| 1936 | /// // Explicit column types | |||
| 1937 | /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight"); | |||
| 1938 | /// ~~~ | |||
| 1939 | template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType> | |||
| 1940 | RResultPtr<::TH1D> Histo1D(std::string_view vName, std::string_view wName) | |||
| 1941 | { | |||
| 1942 | // We build name and title based on the value and weight column names | |||
| 1943 | std::string str_vName{vName}; | |||
| 1944 | std::string str_wName{wName}; | |||
| 1945 | const auto h_name = str_vName + "_weighted_" + str_wName; | |||
| 1946 | const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName; | |||
| 1947 | return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName); | |||
| 1948 | } | |||
| 1949 | ||||
| 1950 | //////////////////////////////////////////////////////////////////////////// | |||
| 1951 | /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*). | |||
| 1952 | /// \tparam V The type of the column used to fill the histogram. | |||
| 1953 | /// \tparam W The type of the column used as weights. | |||
| 1954 | /// \param[in] model The returned histogram will be constructed using this as a model. | |||
| 1955 | /// \return the monodimensional histogram wrapped in a RResultPtr. | |||
| 1956 | /// | |||
| 1957 | /// This overload will use the first two default columns as column names. | |||
| 1958 | /// See the description of the first Histo1D() overload for more details. | |||
| 1959 | template <typename V, typename W> | |||
| 1960 | RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}) | |||
| 1961 | { | |||
| 1962 | return Histo1D<V, W>(model, "", ""); | |||
| 1963 | } | |||
| 1964 | ||||
| 1965 | //////////////////////////////////////////////////////////////////////////// | |||
| 1966 | /// \brief Fill and return a two-dimensional histogram (*lazy action*). | |||
| 1967 | /// \tparam V1 The type of the column used to fill the x axis of the histogram. | |||
| 1968 | /// \tparam V2 The type of the column used to fill the y axis of the histogram. | |||
| 1969 | /// \param[in] model The returned histogram will be constructed using this as a model. | |||
| 1970 | /// \param[in] v1Name The name of the column that will fill the x axis. | |||
| 1971 | /// \param[in] v2Name The name of the column that will fill the y axis. | |||
| 1972 | /// \return the bidimensional histogram wrapped in a RResultPtr. | |||
| 1973 | /// | |||
| 1974 | /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram | |||
| 1975 | /// is filled with each one of the elements of the container. In case multiple columns of container type | |||
| 1976 | /// are provided (e.g. values and weights) they must have the same length for each one of the events (but | |||
| 1977 | /// possibly different lengths between events). | |||
| 1978 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 1979 | /// booked but not executed. Also see RResultPtr. | |||
| 1980 | /// | |||
| 1981 | /// ### Example usage: | |||
| 1982 | /// ~~~{.cpp} | |||
| 1983 | /// // Deduce column types (this invocation needs jitting internally) | |||
| 1984 | /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY"); | |||
| 1985 | /// // Explicit column types | |||
| 1986 | /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY"); | |||
| 1987 | /// ~~~ | |||
| 1988 | /// | |||
| 1989 | /// | |||
| 1990 | /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory | |||
| 1991 | /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that | |||
| 1992 | /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). | |||
| 1993 | template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType> | |||
| 1994 | RResultPtr<::TH2D> Histo2D(const TH2DModel &model, std::string_view v1Name = "", std::string_view v2Name = "") | |||
| 1995 | { | |||
| 1996 | std::shared_ptr<::TH2D> h(nullptr); | |||
| 1997 | { | |||
| 1998 | ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); | |||
| 1999 | h = model.GetHistogram(); | |||
| 2000 | } | |||
| 2001 | if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) { | |||
| 2002 | throw std::runtime_error("2D histograms with no axes limits are not supported yet."); | |||
| 2003 | } | |||
| 2004 | const std::vector<std::string_view> columnViews = {v1Name, v2Name}; | |||
| 2005 | const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) | |||
| 2006 | ? ColumnNames_t() | |||
| 2007 | : ColumnNames_t(columnViews.begin(), columnViews.end()); | |||
| 2008 | return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h, h, fProxiedPtr); | |||
| 2009 | } | |||
| 2010 | ||||
| 2011 | //////////////////////////////////////////////////////////////////////////// | |||
| 2012 | /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*). | |||
| 2013 | /// \tparam V1 The type of the column used to fill the x axis of the histogram. | |||
| 2014 | /// \tparam V2 The type of the column used to fill the y axis of the histogram. | |||
| 2015 | /// \tparam W The type of the column used for the weights of the histogram. | |||
| 2016 | /// \param[in] model The returned histogram will be constructed using this as a model. | |||
| 2017 | /// \param[in] v1Name The name of the column that will fill the x axis. | |||
| 2018 | /// \param[in] v2Name The name of the column that will fill the y axis. | |||
| 2019 | /// \param[in] wName The name of the column that will provide the weights. | |||
| 2020 | /// \return the bidimensional histogram wrapped in a RResultPtr. | |||
| 2021 | /// | |||
| 2022 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 2023 | /// booked but not executed. Also see RResultPtr. | |||
| 2024 | /// | |||
| 2025 | /// ### Example usage: | |||
| 2026 | /// ~~~{.cpp} | |||
| 2027 | /// // Deduce column types (this invocation needs jitting internally) | |||
| 2028 | /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight"); | |||
| 2029 | /// // Explicit column types | |||
| 2030 | /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight"); | |||
| 2031 | /// ~~~ | |||
| 2032 | /// | |||
| 2033 | /// See the documentation of the first Histo2D() overload for more details. | |||
| 2034 | template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, | |||
| 2035 | typename W = RDFDetail::RInferredType> | |||
| 2036 | RResultPtr<::TH2D> | |||
| 2037 | Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName) | |||
| 2038 | { | |||
| 2039 | std::shared_ptr<::TH2D> h(nullptr); | |||
| 2040 | { | |||
| 2041 | ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); | |||
| 2042 | h = model.GetHistogram(); | |||
| 2043 | } | |||
| 2044 | if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) { | |||
| 2045 | throw std::runtime_error("2D histograms with no axes limits are not supported yet."); | |||
| 2046 | } | |||
| 2047 | const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName}; | |||
| 2048 | const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) | |||
| 2049 | ? ColumnNames_t() | |||
| 2050 | : ColumnNames_t(columnViews.begin(), columnViews.end()); | |||
| 2051 | return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h, h, fProxiedPtr); | |||
| 2052 | } | |||
| 2053 | ||||
| 2054 | template <typename V1, typename V2, typename W> | |||
| 2055 | RResultPtr<::TH2D> Histo2D(const TH2DModel &model) | |||
| 2056 | { | |||
| 2057 | return Histo2D<V1, V2, W>(model, "", "", ""); | |||
| 2058 | } | |||
| 2059 | ||||
| 2060 | //////////////////////////////////////////////////////////////////////////// | |||
| 2061 | /// \brief Fill and return a three-dimensional histogram (*lazy action*). | |||
| 2062 | /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present. | |||
| 2063 | /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present. | |||
| 2064 | /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present. | |||
| 2065 | /// \param[in] model The returned histogram will be constructed using this as a model. | |||
| 2066 | /// \param[in] v1Name The name of the column that will fill the x axis. | |||
| 2067 | /// \param[in] v2Name The name of the column that will fill the y axis. | |||
| 2068 | /// \param[in] v3Name The name of the column that will fill the z axis. | |||
| 2069 | /// \return the tridimensional histogram wrapped in a RResultPtr. | |||
| 2070 | /// | |||
| 2071 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 2072 | /// booked but not executed. Also see RResultPtr. | |||
| 2073 | /// | |||
| 2074 | /// ### Example usage: | |||
| 2075 | /// ~~~{.cpp} | |||
| 2076 | /// // Deduce column types (this invocation needs jitting internally) | |||
| 2077 | /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.}, | |||
| 2078 | /// "myValueX", "myValueY", "myValueZ"); | |||
| 2079 | /// // Explicit column types | |||
| 2080 | /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.}, | |||
| 2081 | /// "myValueX", "myValueY", "myValueZ"); | |||
| 2082 | /// ~~~ | |||
| 2083 | /// | |||
| 2084 | /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory | |||
| 2085 | /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that | |||
| 2086 | /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). | |||
| 2087 | template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, | |||
| 2088 | typename V3 = RDFDetail::RInferredType> | |||
| 2089 | RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name = "", std::string_view v2Name = "", | |||
| 2090 | std::string_view v3Name = "") | |||
| 2091 | { | |||
| 2092 | std::shared_ptr<::TH3D> h(nullptr); | |||
| 2093 | { | |||
| 2094 | ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); | |||
| 2095 | h = model.GetHistogram(); | |||
| 2096 | } | |||
| 2097 | if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) { | |||
| 2098 | throw std::runtime_error("3D histograms with no axes limits are not supported yet."); | |||
| 2099 | } | |||
| 2100 | const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name}; | |||
| 2101 | const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) | |||
| 2102 | ? ColumnNames_t() | |||
| 2103 | : ColumnNames_t(columnViews.begin(), columnViews.end()); | |||
| 2104 | return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h, h, fProxiedPtr); | |||
| 2105 | } | |||
| 2106 | ||||
| 2107 | //////////////////////////////////////////////////////////////////////////// | |||
| 2108 | /// \brief Fill and return a three-dimensional histogram (*lazy action*). | |||
| 2109 | /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present. | |||
| 2110 | /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present. | |||
| 2111 | /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present. | |||
| 2112 | /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present. | |||
| 2113 | /// \param[in] model The returned histogram will be constructed using this as a model. | |||
| 2114 | /// \param[in] v1Name The name of the column that will fill the x axis. | |||
| 2115 | /// \param[in] v2Name The name of the column that will fill the y axis. | |||
| 2116 | /// \param[in] v3Name The name of the column that will fill the z axis. | |||
| 2117 | /// \param[in] wName The name of the column that will provide the weights. | |||
| 2118 | /// \return the tridimensional histogram wrapped in a RResultPtr. | |||
| 2119 | /// | |||
| 2120 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 2121 | /// booked but not executed. Also see RResultPtr. | |||
| 2122 | /// | |||
| 2123 | /// ### Example usage: | |||
| 2124 | /// ~~~{.cpp} | |||
| 2125 | /// // Deduce column types (this invocation needs jitting internally) | |||
| 2126 | /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.}, | |||
| 2127 | /// "myValueX", "myValueY", "myValueZ", "myWeight"); | |||
| 2128 | /// // Explicit column types | |||
| 2129 | /// using d_t = double; | |||
| 2130 | /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.}, | |||
| 2131 | /// "myValueX", "myValueY", "myValueZ", "myWeight"); | |||
| 2132 | /// ~~~ | |||
| 2133 | /// | |||
| 2134 | /// | |||
| 2135 | /// See the documentation of the first Histo2D() overload for more details. | |||
| 2136 | template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, | |||
| 2137 | typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType> | |||
| 2138 | RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, | |||
| 2139 | std::string_view v3Name, std::string_view wName) | |||
| 2140 | { | |||
| 2141 | std::shared_ptr<::TH3D> h(nullptr); | |||
| 2142 | { | |||
| 2143 | ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); | |||
| 2144 | h = model.GetHistogram(); | |||
| 2145 | } | |||
| 2146 | if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) { | |||
| 2147 | throw std::runtime_error("3D histograms with no axes limits are not supported yet."); | |||
| 2148 | } | |||
| 2149 | const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName}; | |||
| 2150 | const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) | |||
| 2151 | ? ColumnNames_t() | |||
| 2152 | : ColumnNames_t(columnViews.begin(), columnViews.end()); | |||
| 2153 | return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr); | |||
| 2154 | } | |||
| 2155 | ||||
| 2156 | template <typename V1, typename V2, typename V3, typename W> | |||
| 2157 | RResultPtr<::TH3D> Histo3D(const TH3DModel &model) | |||
| 2158 | { | |||
| 2159 | return Histo3D<V1, V2, V3, W>(model, "", "", "", ""); | |||
| 2160 | } | |||
| 2161 | ||||
| 2162 | //////////////////////////////////////////////////////////////////////////// | |||
| 2163 | /// \brief Fill and return an N-dimensional histogram (*lazy action*). | |||
| 2164 | /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not | |||
| 2165 | /// present. | |||
| 2166 | /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the | |||
| 2167 | /// object. | |||
| 2168 | /// \param[in] model The returned histogram will be constructed using this as a model. | |||
| 2169 | /// \param[in] columnList | |||
| 2170 | /// A list containing the names of the columns that will be passed when calling `Fill`. | |||
| 2171 | /// (N columns for unweighted filling, or N+1 columns for weighted filling) | |||
| 2172 | /// \return the N-dimensional histogram wrapped in a RResultPtr. | |||
| 2173 | /// | |||
| 2174 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 2175 | /// booked but not executed. See RResultPtr documentation. | |||
| 2176 | /// | |||
| 2177 | /// ### Example usage: | |||
| 2178 | /// ~~~{.cpp} | |||
| 2179 | /// auto myFilledObj = myDf.HistoND<float, float, float, float>({"name","title", 4, | |||
| 2180 | /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}}, | |||
| 2181 | /// {"col0", "col1", "col2", "col3"}); | |||
| 2182 | /// ~~~ | |||
| 2183 | /// | |||
| 2184 | template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads | |||
| 2185 | RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList) | |||
| 2186 | { | |||
| 2187 | std::shared_ptr<::THnD> h(nullptr); | |||
| 2188 | { | |||
| 2189 | ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); | |||
| 2190 | h = model.GetHistogram(); | |||
| 2191 | ||||
| 2192 | if (int(columnList.size()) == (h->GetNdimensions() + 1)) { | |||
| 2193 | h->Sumw2(); | |||
| 2194 | } else if (int(columnList.size()) != h->GetNdimensions()) { | |||
| 2195 | throw std::runtime_error("Wrong number of columns for the specified number of histogram axes."); | |||
| 2196 | } | |||
| 2197 | } | |||
| 2198 | return CreateAction<RDFInternal::ActionTags::HistoND, FirstColumn, OtherColumns...>(columnList, h, h, | |||
| 2199 | fProxiedPtr); | |||
| 2200 | } | |||
| 2201 | ||||
| 2202 | //////////////////////////////////////////////////////////////////////////// | |||
| 2203 | /// \brief Fill and return an N-dimensional histogram (*lazy action*). | |||
| 2204 | /// \param[in] model The returned histogram will be constructed using this as a model. | |||
| 2205 | /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill` | |||
| 2206 | /// (N columns for unweighted filling, or N+1 columns for weighted filling) | |||
| 2207 | /// \return the N-dimensional histogram wrapped in a RResultPtr. | |||
| 2208 | /// | |||
| 2209 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 2210 | /// booked but not executed. Also see RResultPtr. | |||
| 2211 | /// | |||
| 2212 | /// ### Example usage: | |||
| 2213 | /// ~~~{.cpp} | |||
| 2214 | /// auto myFilledObj = myDf.HistoND({"name","title", 4, | |||
| 2215 | /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}}, | |||
| 2216 | /// {"col0", "col1", "col2", "col3"}); | |||
| 2217 | /// ~~~ | |||
| 2218 | /// | |||
| 2219 | RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList) | |||
| 2220 | { | |||
| 2221 | std::shared_ptr<::THnD> h(nullptr); | |||
| 2222 | { | |||
| 2223 | ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); | |||
| 2224 | h = model.GetHistogram(); | |||
| 2225 | ||||
| 2226 | if (int(columnList.size()) == (h->GetNdimensions() + 1)) { | |||
| 2227 | h->Sumw2(); | |||
| 2228 | } else if (int(columnList.size()) != h->GetNdimensions()) { | |||
| 2229 | throw std::runtime_error("Wrong number of columns for the specified number of histogram axes."); | |||
| 2230 | } | |||
| 2231 | } | |||
| 2232 | return CreateAction<RDFInternal::ActionTags::HistoND, RDFDetail::RInferredType>(columnList, h, h, fProxiedPtr, | |||
| 2233 | columnList.size()); | |||
| 2234 | } | |||
| 2235 | ||||
| 2236 | //////////////////////////////////////////////////////////////////////////// | |||
| 2237 | /// \brief Fill and return a TGraph object (*lazy action*). | |||
| 2238 | /// \tparam X The type of the column used to fill the x axis. | |||
| 2239 | /// \tparam Y The type of the column used to fill the y axis. | |||
| 2240 | /// \param[in] x The name of the column that will fill the x axis. | |||
| 2241 | /// \param[in] y The name of the column that will fill the y axis. | |||
| 2242 | /// \return the TGraph wrapped in a RResultPtr. | |||
| 2243 | /// | |||
| 2244 | /// Columns can be of a container type (e.g. std::vector<double>), in which case the TGraph | |||
| 2245 | /// is filled with each one of the elements of the container. | |||
| 2246 | /// If Multithreading is enabled, the order in which points are inserted is undefined. | |||
| 2247 | /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing. | |||
| 2248 | /// A name and a title to the TGraph is given based on the input column names. | |||
| 2249 | /// | |||
| 2250 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 2251 | /// booked but not executed. Also see RResultPtr. | |||
| 2252 | /// | |||
| 2253 | /// ### Example usage: | |||
| 2254 | /// ~~~{.cpp} | |||
| 2255 | /// // Deduce column types (this invocation needs jitting internally) | |||
| 2256 | /// auto myGraph1 = myDf.Graph("xValues", "yValues"); | |||
| 2257 | /// // Explicit column types | |||
| 2258 | /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues"); | |||
| 2259 | /// ~~~ | |||
| 2260 | /// | |||
| 2261 | /// \note Differently from other ROOT interfaces, the returned TGraph is not associated to gDirectory | |||
| 2262 | /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that | |||
| 2263 | /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). | |||
| 2264 | template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType> | |||
| 2265 | RResultPtr<::TGraph> Graph(std::string_view x = "", std::string_view y = "") | |||
| 2266 | { | |||
| 2267 | auto graph = std::make_shared<::TGraph>(); | |||
| 2268 | const std::vector<std::string_view> columnViews = {x, y}; | |||
| 2269 | const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) | |||
| 2270 | ? ColumnNames_t() | |||
| 2271 | : ColumnNames_t(columnViews.begin(), columnViews.end()); | |||
| 2272 | ||||
| 2273 | const auto validatedColumns = GetValidatedColumnNames(2, userColumns); | |||
| 2274 | ||||
| 2275 | // We build a default name and title based on the input columns | |||
| 2276 | const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0]; | |||
| 2277 | const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0]; | |||
| 2278 | graph->SetNameTitle(g_name.c_str(), g_title.c_str()); | |||
| 2279 | graph->GetXaxis()->SetTitle(validatedColumns[0].c_str()); | |||
| 2280 | graph->GetYaxis()->SetTitle(validatedColumns[1].c_str()); | |||
| 2281 | ||||
| 2282 | return CreateAction<RDFInternal::ActionTags::Graph, X, Y>(validatedColumns, graph, graph, fProxiedPtr); | |||
| 2283 | } | |||
| 2284 | ||||
| 2285 | //////////////////////////////////////////////////////////////////////////// | |||
| 2286 | /// \brief Fill and return a TGraphAsymmErrors object (*lazy action*). | |||
| 2287 | /// \param[in] x The name of the column that will fill the x axis. | |||
| 2288 | /// \param[in] y The name of the column that will fill the y axis. | |||
| 2289 | /// \param[in] exl The name of the column of X low errors | |||
| 2290 | /// \param[in] exh The name of the column of X high errors | |||
| 2291 | /// \param[in] eyl The name of the column of Y low errors | |||
| 2292 | /// \param[in] eyh The name of the column of Y high errors | |||
| 2293 | /// \return the TGraphAsymmErrors wrapped in a RResultPtr. | |||
| 2294 | /// | |||
| 2295 | /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph | |||
| 2296 | /// is filled with each one of the elements of the container. | |||
| 2297 | /// If Multithreading is enabled, the order in which points are inserted is undefined. | |||
| 2298 | /// | |||
| 2299 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 2300 | /// booked but not executed. Also see RResultPtr. | |||
| 2301 | /// | |||
| 2302 | /// ### Example usage: | |||
| 2303 | /// ~~~{.cpp} | |||
| 2304 | /// // Deduce column types (this invocation needs jitting internally) | |||
| 2305 | /// auto myGAE1 = myDf.GraphAsymmErrors("xValues", "yValues", "exl", "exh", "eyl", "eyh"); | |||
| 2306 | /// // Explicit column types | |||
| 2307 | /// using f = float | |||
| 2308 | /// auto myGAE2 = myDf.GraphAsymmErrors<f, f, f, f, f, f>("xValues", "yValues", "exl", "exh", "eyl", "eyh"); | |||
| 2309 | /// ~~~ | |||
| 2310 | /// | |||
| 2311 | /// `GraphAssymErrors` should also be used for the cases in which values associated only with | |||
| 2312 | /// one of the axes have associated errors. For example, only `ey` exist and `ex` are equal to zero. | |||
| 2313 | /// In such cases, user should do the following: | |||
| 2314 | /// ~~~{.cpp} | |||
| 2315 | /// // Create a column of zeros in RDataFrame | |||
| 2316 | /// auto rdf_withzeros = rdf.Define("zero", "0"); | |||
| 2317 | /// // or alternatively: | |||
| 2318 | /// auto rdf_withzeros = rdf.Define("zero", []() -> double { return 0.;}); | |||
| 2319 | /// // Create the graph with y errors only | |||
| 2320 | /// auto rdf_errorsOnYOnly = rdf_withzeros.GraphAsymmErrors("xValues", "yValues", "zero", "zero", "eyl", "eyh"); | |||
| 2321 | /// ~~~ | |||
| 2322 | /// | |||
| 2323 | /// \note Differently from other ROOT interfaces, the returned TGraphAsymmErrors is not associated to gDirectory | |||
| 2324 | /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that | |||
| 2325 | /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). | |||
| 2326 | template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType, | |||
| 2327 | typename EXL = RDFDetail::RInferredType, typename EXH = RDFDetail::RInferredType, | |||
| 2328 | typename EYL = RDFDetail::RInferredType, typename EYH = RDFDetail::RInferredType> | |||
| 2329 | RResultPtr<::TGraphAsymmErrors> | |||
| 2330 | GraphAsymmErrors(std::string_view x = "", std::string_view y = "", std::string_view exl = "", | |||
| 2331 | std::string_view exh = "", std::string_view eyl = "", std::string_view eyh = "") | |||
| 2332 | { | |||
| 2333 | auto graph = std::make_shared<::TGraphAsymmErrors>(); | |||
| 2334 | const std::vector<std::string_view> columnViews = {x, y, exl, exh, eyl, eyh}; | |||
| 2335 | const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) | |||
| 2336 | ? ColumnNames_t() | |||
| 2337 | : ColumnNames_t(columnViews.begin(), columnViews.end()); | |||
| 2338 | ||||
| 2339 | const auto validatedColumns = GetValidatedColumnNames(6, userColumns); | |||
| 2340 | ||||
| 2341 | // We build a default name and title based on the input columns | |||
| 2342 | const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0]; | |||
| 2343 | const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0]; | |||
| 2344 | graph->SetNameTitle(g_name.c_str(), g_title.c_str()); | |||
| 2345 | graph->GetXaxis()->SetTitle(validatedColumns[0].c_str()); | |||
| 2346 | graph->GetYaxis()->SetTitle(validatedColumns[1].c_str()); | |||
| 2347 | ||||
| 2348 | return CreateAction<RDFInternal::ActionTags::GraphAsymmErrors, X, Y, EXL, EXH, EYL, EYH>(validatedColumns, graph, | |||
| 2349 | graph, fProxiedPtr); | |||
| 2350 | } | |||
| 2351 | ||||
| 2352 | //////////////////////////////////////////////////////////////////////////// | |||
| 2353 | /// \brief Fill and return a one-dimensional profile (*lazy action*). | |||
| 2354 | /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present. | |||
| 2355 | /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present. | |||
| 2356 | /// \param[in] model The model to be considered to build the new return value. | |||
| 2357 | /// \param[in] v1Name The name of the column that will fill the x axis. | |||
| 2358 | /// \param[in] v2Name The name of the column that will fill the y axis. | |||
| 2359 | /// \return the monodimensional profile wrapped in a RResultPtr. | |||
| 2360 | /// | |||
| 2361 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 2362 | /// booked but not executed. Also see RResultPtr. | |||
| 2363 | /// | |||
| 2364 | /// ### Example usage: | |||
| 2365 | /// ~~~{.cpp} | |||
| 2366 | /// // Deduce column types (this invocation needs jitting internally) | |||
| 2367 | /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues"); | |||
| 2368 | /// // Explicit column types | |||
| 2369 | /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues"); | |||
| 2370 | /// ~~~ | |||
| 2371 | /// | |||
| 2372 | /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory | |||
| 2373 | /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that | |||
| 2374 | /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). | |||
| 2375 | template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType> | |||
| 2376 | RResultPtr<::TProfile> | |||
| 2377 | Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "") | |||
| 2378 | { | |||
| 2379 | std::shared_ptr<::TProfile> h(nullptr); | |||
| 2380 | { | |||
| 2381 | ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); | |||
| 2382 | h = model.GetProfile(); | |||
| 2383 | } | |||
| 2384 | ||||
| 2385 | if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) { | |||
| 2386 | throw std::runtime_error("Profiles with no axes limits are not supported yet."); | |||
| 2387 | } | |||
| 2388 | const std::vector<std::string_view> columnViews = {v1Name, v2Name}; | |||
| 2389 | const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) | |||
| 2390 | ? ColumnNames_t() | |||
| 2391 | : ColumnNames_t(columnViews.begin(), columnViews.end()); | |||
| 2392 | return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h, h, fProxiedPtr); | |||
| 2393 | } | |||
| 2394 | ||||
| 2395 | //////////////////////////////////////////////////////////////////////////// | |||
| 2396 | /// \brief Fill and return a one-dimensional profile (*lazy action*). | |||
| 2397 | /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present. | |||
| 2398 | /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present. | |||
| 2399 | /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present. | |||
| 2400 | /// \param[in] model The model to be considered to build the new return value. | |||
| 2401 | /// \param[in] v1Name The name of the column that will fill the x axis. | |||
| 2402 | /// \param[in] v2Name The name of the column that will fill the y axis. | |||
| 2403 | /// \param[in] wName The name of the column that will provide the weights. | |||
| 2404 | /// \return the monodimensional profile wrapped in a RResultPtr. | |||
| 2405 | /// | |||
| 2406 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 2407 | /// booked but not executed. Also see RResultPtr. | |||
| 2408 | /// | |||
| 2409 | /// ### Example usage: | |||
| 2410 | /// ~~~{.cpp} | |||
| 2411 | /// // Deduce column types (this invocation needs jitting internally) | |||
| 2412 | /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight"); | |||
| 2413 | /// // Explicit column types | |||
| 2414 | /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.}, | |||
| 2415 | /// "xValues", "yValues", "weight"); | |||
| 2416 | /// ~~~ | |||
| 2417 | /// | |||
| 2418 | /// See the first Profile1D() overload for more details. | |||
| 2419 | template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, | |||
| 2420 | typename W = RDFDetail::RInferredType> | |||
| 2421 | RResultPtr<::TProfile> | |||
| 2422 | Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName) | |||
| 2423 | { | |||
| 2424 | std::shared_ptr<::TProfile> h(nullptr); | |||
| 2425 | { | |||
| 2426 | ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); | |||
| 2427 | h = model.GetProfile(); | |||
| 2428 | } | |||
| 2429 | ||||
| 2430 | if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) { | |||
| 2431 | throw std::runtime_error("Profile histograms with no axes limits are not supported yet."); | |||
| 2432 | } | |||
| 2433 | const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName}; | |||
| 2434 | const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) | |||
| 2435 | ? ColumnNames_t() | |||
| 2436 | : ColumnNames_t(columnViews.begin(), columnViews.end()); | |||
| 2437 | return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h, h, fProxiedPtr); | |||
| 2438 | } | |||
| 2439 | ||||
| 2440 | //////////////////////////////////////////////////////////////////////////// | |||
| 2441 | /// \brief Fill and return a one-dimensional profile (*lazy action*). | |||
| 2442 | /// See the first Profile1D() overload for more details. | |||
| 2443 | template <typename V1, typename V2, typename W> | |||
| 2444 | RResultPtr<::TProfile> Profile1D(const TProfile1DModel &model) | |||
| 2445 | { | |||
| 2446 | return Profile1D<V1, V2, W>(model, "", "", ""); | |||
| 2447 | } | |||
| 2448 | ||||
| 2449 | //////////////////////////////////////////////////////////////////////////// | |||
| 2450 | /// \brief Fill and return a two-dimensional profile (*lazy action*). | |||
| 2451 | /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present. | |||
| 2452 | /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present. | |||
| 2453 | /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present. | |||
| 2454 | /// \param[in] model The returned profile will be constructed using this as a model. | |||
| 2455 | /// \param[in] v1Name The name of the column that will fill the x axis. | |||
| 2456 | /// \param[in] v2Name The name of the column that will fill the y axis. | |||
| 2457 | /// \param[in] v3Name The name of the column that will fill the z axis. | |||
| 2458 | /// \return the bidimensional profile wrapped in a RResultPtr. | |||
| 2459 | /// | |||
| 2460 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 2461 | /// booked but not executed. Also see RResultPtr. | |||
| 2462 | /// | |||
| 2463 | /// ### Example usage: | |||
| 2464 | /// ~~~{.cpp} | |||
| 2465 | /// // Deduce column types (this invocation needs jitting internally) | |||
| 2466 | /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20}, | |||
| 2467 | /// "xValues", "yValues", "zValues"); | |||
| 2468 | /// // Explicit column types | |||
| 2469 | /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20}, | |||
| 2470 | /// "xValues", "yValues", "zValues"); | |||
| 2471 | /// ~~~ | |||
| 2472 | /// | |||
| 2473 | /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory | |||
| 2474 | /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that | |||
| 2475 | /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). | |||
| 2476 | template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, | |||
| 2477 | typename V3 = RDFDetail::RInferredType> | |||
| 2478 | RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name = "", | |||
| 2479 | std::string_view v2Name = "", std::string_view v3Name = "") | |||
| 2480 | { | |||
| 2481 | std::shared_ptr<::TProfile2D> h(nullptr); | |||
| 2482 | { | |||
| 2483 | ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); | |||
| 2484 | h = model.GetProfile(); | |||
| 2485 | } | |||
| 2486 | ||||
| 2487 | if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) { | |||
| 2488 | throw std::runtime_error("2D profiles with no axes limits are not supported yet."); | |||
| 2489 | } | |||
| 2490 | const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name}; | |||
| 2491 | const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) | |||
| 2492 | ? ColumnNames_t() | |||
| 2493 | : ColumnNames_t(columnViews.begin(), columnViews.end()); | |||
| 2494 | return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h, h, fProxiedPtr); | |||
| 2495 | } | |||
| 2496 | ||||
| 2497 | //////////////////////////////////////////////////////////////////////////// | |||
| 2498 | /// \brief Fill and return a two-dimensional profile (*lazy action*). | |||
| 2499 | /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present. | |||
| 2500 | /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present. | |||
| 2501 | /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present. | |||
| 2502 | /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present. | |||
| 2503 | /// \param[in] model The returned histogram will be constructed using this as a model. | |||
| 2504 | /// \param[in] v1Name The name of the column that will fill the x axis. | |||
| 2505 | /// \param[in] v2Name The name of the column that will fill the y axis. | |||
| 2506 | /// \param[in] v3Name The name of the column that will fill the z axis. | |||
| 2507 | /// \param[in] wName The name of the column that will provide the weights. | |||
| 2508 | /// \return the bidimensional profile wrapped in a RResultPtr. | |||
| 2509 | /// | |||
| 2510 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 2511 | /// booked but not executed. Also see RResultPtr. | |||
| 2512 | /// | |||
| 2513 | /// ### Example usage: | |||
| 2514 | /// ~~~{.cpp} | |||
| 2515 | /// // Deduce column types (this invocation needs jitting internally) | |||
| 2516 | /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20}, | |||
| 2517 | /// "xValues", "yValues", "zValues", "weight"); | |||
| 2518 | /// // Explicit column types | |||
| 2519 | /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20}, | |||
| 2520 | /// "xValues", "yValues", "zValues", "weight"); | |||
| 2521 | /// ~~~ | |||
| 2522 | /// | |||
| 2523 | /// See the first Profile2D() overload for more details. | |||
| 2524 | template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, | |||
| 2525 | typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType> | |||
| 2526 | RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, | |||
| 2527 | std::string_view v3Name, std::string_view wName) | |||
| 2528 | { | |||
| 2529 | std::shared_ptr<::TProfile2D> h(nullptr); | |||
| 2530 | { | |||
| 2531 | ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); | |||
| 2532 | h = model.GetProfile(); | |||
| 2533 | } | |||
| 2534 | ||||
| 2535 | if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) { | |||
| 2536 | throw std::runtime_error("2D profiles with no axes limits are not supported yet."); | |||
| 2537 | } | |||
| 2538 | const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName}; | |||
| 2539 | const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) | |||
| 2540 | ? ColumnNames_t() | |||
| 2541 | : ColumnNames_t(columnViews.begin(), columnViews.end()); | |||
| 2542 | return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr); | |||
| 2543 | } | |||
| 2544 | ||||
| 2545 | /// \brief Fill and return a two-dimensional profile (*lazy action*). | |||
| 2546 | /// See the first Profile2D() overload for more details. | |||
| 2547 | template <typename V1, typename V2, typename V3, typename W> | |||
| 2548 | RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model) | |||
| 2549 | { | |||
| 2550 | return Profile2D<V1, V2, V3, W>(model, "", "", "", ""); | |||
| 2551 | } | |||
| 2552 | ||||
| 2553 | //////////////////////////////////////////////////////////////////////////// | |||
| 2554 | /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*). | |||
| 2555 | /// | |||
| 2556 | /// Type T must provide at least: | |||
| 2557 | /// - a copy-constructor | |||
| 2558 | /// - a `Fill` method that accepts as many arguments and with same types as the column names passed as columnList | |||
| 2559 | /// (these types can also be passed as template parameters to this method) | |||
| 2560 | /// - a `Merge` method with signature `Merge(TCollection *)` or `Merge(const std::vector<T *>&)` that merges the | |||
| 2561 | /// objects passed as argument into the object on which `Merge` was called (an analogous of TH1::Merge). Note that | |||
| 2562 | /// if the signature that takes a `TCollection*` is used, then T must inherit from TObject (to allow insertion in | |||
| 2563 | /// the TCollection*). | |||
| 2564 | /// | |||
| 2565 | /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present. | |||
| 2566 | /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object. | |||
| 2567 | /// \tparam T The type of the object to fill. Automatically deduced. | |||
| 2568 | /// \param[in] model The model to be considered to build the new return value. | |||
| 2569 | /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill` | |||
| 2570 | /// \return the filled object wrapped in a RResultPtr. | |||
| 2571 | /// | |||
| 2572 | /// The user gives up ownership of the model object. | |||
| 2573 | /// The list of column names to be used for filling must always be specified. | |||
| 2574 | /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. | |||
| 2575 | /// Also see RResultPtr. | |||
| 2576 | /// | |||
| 2577 | /// ### Example usage: | |||
| 2578 | /// ~~~{.cpp} | |||
| 2579 | /// MyClass obj; | |||
| 2580 | /// // Deduce column types (this invocation needs jitting internally, and in this case | |||
| 2581 | /// // MyClass needs to be known to the interpreter) | |||
| 2582 | /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"}); | |||
| 2583 | /// // explicit column types | |||
| 2584 | /// auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"}); | |||
| 2585 | /// ~~~ | |||
| 2586 | /// | |||
| 2587 | template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T> | |||
| 2588 | RResultPtr<std::decay_t<T>> Fill(T &&model, const ColumnNames_t &columnList) | |||
| 2589 | { | |||
| 2590 | auto h = std::make_shared<std::decay_t<T>>(std::forward<T>(model)); | |||
| 2591 | if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) { | |||
| 2592 | throw std::runtime_error("The absence of axes limits is not supported yet."); | |||
| 2593 | } | |||
| 2594 | return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h, h, fProxiedPtr, | |||
| 2595 | columnList.size()); | |||
| 2596 | } | |||
| 2597 | ||||
| 2598 | //////////////////////////////////////////////////////////////////////////// | |||
| 2599 | /// \brief Return a TStatistic object, filled once per event (*lazy action*). | |||
| 2600 | /// | |||
| 2601 | /// \tparam V The type of the value column | |||
| 2602 | /// \param[in] value The name of the column with the values to fill the statistics with. | |||
| 2603 | /// \return the filled TStatistic object wrapped in a RResultPtr. | |||
| 2604 | /// | |||
| 2605 | /// ### Example usage: | |||
| 2606 | /// ~~~{.cpp} | |||
| 2607 | /// // Deduce column type (this invocation needs jitting internally) | |||
| 2608 | /// auto stats0 = myDf.Stats("values"); | |||
| 2609 | /// // Explicit column type | |||
| 2610 | /// auto stats1 = myDf.Stats<float>("values"); | |||
| 2611 | /// ~~~ | |||
| 2612 | /// | |||
| 2613 | template <typename V = RDFDetail::RInferredType> | |||
| 2614 | RResultPtr<TStatistic> Stats(std::string_view value = "") | |||
| 2615 | { | |||
| 2616 | ColumnNames_t columns; | |||
| 2617 | if (!value.empty()) { | |||
| 2618 | columns.emplace_back(std::string(value)); | |||
| 2619 | } | |||
| 2620 | const auto validColumnNames = GetValidatedColumnNames(1, columns); | |||
| 2621 | if (std::is_same<V, RDFDetail::RInferredType>::value) { | |||
| 2622 | return Fill(TStatistic(), validColumnNames); | |||
| 2623 | } else { | |||
| 2624 | return Fill<V>(TStatistic(), validColumnNames); | |||
| 2625 | } | |||
| 2626 | } | |||
| 2627 | ||||
| 2628 | //////////////////////////////////////////////////////////////////////////// | |||
| 2629 | /// \brief Return a TStatistic object, filled once per event (*lazy action*). | |||
| 2630 | /// | |||
| 2631 | /// \tparam V The type of the value column | |||
| 2632 | /// \tparam W The type of the weight column | |||
| 2633 | /// \param[in] value The name of the column with the values to fill the statistics with. | |||
| 2634 | /// \param[in] weight The name of the column with the weights to fill the statistics with. | |||
| 2635 | /// \return the filled TStatistic object wrapped in a RResultPtr. | |||
| 2636 | /// | |||
| 2637 | /// ### Example usage: | |||
| 2638 | /// ~~~{.cpp} | |||
| 2639 | /// // Deduce column types (this invocation needs jitting internally) | |||
| 2640 | /// auto stats0 = myDf.Stats("values", "weights"); | |||
| 2641 | /// // Explicit column types | |||
| 2642 | /// auto stats1 = myDf.Stats<int, float>("values", "weights"); | |||
| 2643 | /// ~~~ | |||
| 2644 | /// | |||
| 2645 | template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType> | |||
| 2646 | RResultPtr<TStatistic> Stats(std::string_view value, std::string_view weight) | |||
| 2647 | { | |||
| 2648 | ColumnNames_t columns{std::string(value), std::string(weight)}; | |||
| 2649 | constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value; | |||
| 2650 | constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value; | |||
| 2651 | const auto validColumnNames = GetValidatedColumnNames(2, columns); | |||
| 2652 | // We have 3 cases: | |||
| 2653 | // 1. Both types are inferred: we use Fill and let the jit kick in. | |||
| 2654 | // 2. One of the two types is explicit and the other one is inferred: the case is not supported. | |||
| 2655 | // 3. Both types are explicit: we invoke the fully compiled Fill method. | |||
| 2656 | if (vIsInferred && wIsInferred) { | |||
| 2657 | return Fill(TStatistic(), validColumnNames); | |||
| 2658 | } else if (vIsInferred != wIsInferred) { | |||
| 2659 | std::string error("The "); | |||
| 2660 | error += vIsInferred ? "value " : "weight "; | |||
| 2661 | error += "column type is explicit, while the "; | |||
| 2662 | error += vIsInferred ? "weight " : "value "; | |||
| 2663 | error += " is specified to be inferred. This case is not supported: please specify both types or none."; | |||
| 2664 | throw std::runtime_error(error); | |||
| 2665 | } else { | |||
| 2666 | return Fill<V, W>(TStatistic(), validColumnNames); | |||
| 2667 | } | |||
| 2668 | } | |||
| 2669 | ||||
| 2670 | //////////////////////////////////////////////////////////////////////////// | |||
| 2671 | /// \brief Return the minimum of processed column values (*lazy action*). | |||
| 2672 | /// \tparam T The type of the branch/column. | |||
| 2673 | /// \param[in] columnName The name of the branch/column to be treated. | |||
| 2674 | /// \return the minimum value of the selected column wrapped in a RResultPtr. | |||
| 2675 | /// | |||
| 2676 | /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct | |||
| 2677 | /// template specialization of this method. | |||
| 2678 | /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise. | |||
| 2679 | /// | |||
| 2680 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 2681 | /// booked but not executed. Also see RResultPtr. | |||
| 2682 | /// | |||
| 2683 | /// ### Example usage: | |||
| 2684 | /// ~~~{.cpp} | |||
| 2685 | /// // Deduce column type (this invocation needs jitting internally) | |||
| 2686 | /// auto minVal0 = myDf.Min("values"); | |||
| 2687 | /// // Explicit column type | |||
| 2688 | /// auto minVal1 = myDf.Min<double>("values"); | |||
| 2689 | /// ~~~ | |||
| 2690 | /// | |||
| 2691 | template <typename T = RDFDetail::RInferredType> | |||
| 2692 | RResultPtr<RDFDetail::MinReturnType_t<T>> Min(std::string_view columnName = "") | |||
| 2693 | { | |||
| 2694 | const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); | |||
| 2695 | using RetType_t = RDFDetail::MinReturnType_t<T>; | |||
| 2696 | auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max()); | |||
| 2697 | return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV, minV, fProxiedPtr); | |||
| 2698 | } | |||
| 2699 | ||||
| 2700 | //////////////////////////////////////////////////////////////////////////// | |||
| 2701 | /// \brief Return the maximum of processed column values (*lazy action*). | |||
| 2702 | /// \tparam T The type of the branch/column. | |||
| 2703 | /// \param[in] columnName The name of the branch/column to be treated. | |||
| 2704 | /// \return the maximum value of the selected column wrapped in a RResultPtr. | |||
| 2705 | /// | |||
| 2706 | /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct | |||
| 2707 | /// template specialization of this method. | |||
| 2708 | /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise. | |||
| 2709 | /// | |||
| 2710 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 2711 | /// booked but not executed. Also see RResultPtr. | |||
| 2712 | /// | |||
| 2713 | /// ### Example usage: | |||
| 2714 | /// ~~~{.cpp} | |||
| 2715 | /// // Deduce column type (this invocation needs jitting internally) | |||
| 2716 | /// auto maxVal0 = myDf.Max("values"); | |||
| 2717 | /// // Explicit column type | |||
| 2718 | /// auto maxVal1 = myDf.Max<double>("values"); | |||
| 2719 | /// ~~~ | |||
| 2720 | /// | |||
| 2721 | template <typename T = RDFDetail::RInferredType> | |||
| 2722 | RResultPtr<RDFDetail::MaxReturnType_t<T>> Max(std::string_view columnName = "") | |||
| 2723 | { | |||
| 2724 | const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); | |||
| 2725 | using RetType_t = RDFDetail::MaxReturnType_t<T>; | |||
| 2726 | auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest()); | |||
| 2727 | return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV, maxV, fProxiedPtr); | |||
| 2728 | } | |||
| 2729 | ||||
| 2730 | //////////////////////////////////////////////////////////////////////////// | |||
| 2731 | /// \brief Return the mean of processed column values (*lazy action*). | |||
| 2732 | /// \tparam T The type of the branch/column. | |||
| 2733 | /// \param[in] columnName The name of the branch/column to be treated. | |||
| 2734 | /// \return the mean value of the selected column wrapped in a RResultPtr. | |||
| 2735 | /// | |||
| 2736 | /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct | |||
| 2737 | /// template specialization of this method. | |||
| 2738 | /// | |||
| 2739 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 2740 | /// booked but not executed. Also see RResultPtr. | |||
| 2741 | /// | |||
| 2742 | /// ### Example usage: | |||
| 2743 | /// ~~~{.cpp} | |||
| 2744 | /// // Deduce column type (this invocation needs jitting internally) | |||
| 2745 | /// auto meanVal0 = myDf.Mean("values"); | |||
| 2746 | /// // Explicit column type | |||
| 2747 | /// auto meanVal1 = myDf.Mean<double>("values"); | |||
| 2748 | /// ~~~ | |||
| 2749 | /// | |||
| 2750 | template <typename T = RDFDetail::RInferredType> | |||
| 2751 | RResultPtr<double> Mean(std::string_view columnName = "") | |||
| 2752 | { | |||
| 2753 | const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); | |||
| 2754 | auto meanV = std::make_shared<double>(0); | |||
| 2755 | return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV, meanV, fProxiedPtr); | |||
| 2756 | } | |||
| 2757 | ||||
| 2758 | //////////////////////////////////////////////////////////////////////////// | |||
| 2759 | /// \brief Return the unbiased standard deviation of processed column values (*lazy action*). | |||
| 2760 | /// \tparam T The type of the branch/column. | |||
| 2761 | /// \param[in] columnName The name of the branch/column to be treated. | |||
| 2762 | /// \return the standard deviation value of the selected column wrapped in a RResultPtr. | |||
| 2763 | /// | |||
| 2764 | /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct | |||
| 2765 | /// template specialization of this method. | |||
| 2766 | /// | |||
| 2767 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 2768 | /// booked but not executed. Also see RResultPtr. | |||
| 2769 | /// | |||
| 2770 | /// ### Example usage: | |||
| 2771 | /// ~~~{.cpp} | |||
| 2772 | /// // Deduce column type (this invocation needs jitting internally) | |||
| 2773 | /// auto stdDev0 = myDf.StdDev("values"); | |||
| 2774 | /// // Explicit column type | |||
| 2775 | /// auto stdDev1 = myDf.StdDev<double>("values"); | |||
| 2776 | /// ~~~ | |||
| 2777 | /// | |||
| 2778 | template <typename T = RDFDetail::RInferredType> | |||
| 2779 | RResultPtr<double> StdDev(std::string_view columnName = "") | |||
| 2780 | { | |||
| 2781 | const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); | |||
| 2782 | auto stdDeviationV = std::make_shared<double>(0); | |||
| 2783 | return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV, stdDeviationV, fProxiedPtr); | |||
| 2784 | } | |||
| 2785 | ||||
| 2786 | // clang-format off | |||
| 2787 | //////////////////////////////////////////////////////////////////////////// | |||
| 2788 | /// \brief Return the sum of processed column values (*lazy action*). | |||
| 2789 | /// \tparam T The type of the branch/column. | |||
| 2790 | /// \param[in] columnName The name of the branch/column. | |||
| 2791 | /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible. | |||
| 2792 | /// \return the sum of the selected column wrapped in a RResultPtr. | |||
| 2793 | /// | |||
| 2794 | /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct | |||
| 2795 | /// template specialization of this method. | |||
| 2796 | /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise. | |||
| 2797 | /// | |||
| 2798 | /// This action is *lazy*: upon invocation of this method the calculation is | |||
| 2799 | /// booked but not executed. Also see RResultPtr. | |||
| 2800 | /// | |||
| 2801 | /// ### Example usage: | |||
| 2802 | /// ~~~{.cpp} | |||
| 2803 | /// // Deduce column type (this invocation needs jitting internally) | |||
| 2804 | /// auto sum0 = myDf.Sum("values"); | |||
| 2805 | /// // Explicit column type | |||
| 2806 | /// auto sum1 = myDf.Sum<double>("values"); | |||
| 2807 | /// ~~~ | |||
| 2808 | /// | |||
| 2809 | template <typename T = RDFDetail::RInferredType> | |||
| 2810 | RResultPtr<RDFDetail::SumReturnType_t<T>> | |||
| 2811 | Sum(std::string_view columnName = "", | |||
| 2812 | const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{}) | |||
| 2813 | { | |||
| 2814 | const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); | |||
| 2815 | auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue); | |||
| 2816 | return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV, sumV, fProxiedPtr); | |||
| 2817 | } | |||
| 2818 | // clang-format on | |||
| 2819 | ||||
| 2820 | //////////////////////////////////////////////////////////////////////////// | |||
| 2821 | /// \brief Gather filtering statistics. | |||
| 2822 | /// \return the resulting `RCutFlowReport` instance wrapped in a RResultPtr. | |||
| 2823 | /// | |||
| 2824 | /// Calling `Report` on the main `RDataFrame` object gathers stats for | |||
| 2825 | /// all named filters in the call graph. Calling this method on a | |||
| 2826 | /// stored chain state (i.e. a graph node different from the first) gathers | |||
| 2827 | /// the stats for all named filters in the chain section between the original | |||
| 2828 | /// `RDataFrame` and that node (included). Stats are gathered in the same | |||
| 2829 | /// order as the named filters have been added to the graph. | |||
| 2830 | /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the | |||
| 2831 | /// effects cuts had. | |||
| 2832 | /// | |||
| 2833 | /// This action is *lazy*: upon invocation of | |||
| 2834 | /// this method the calculation is booked but not executed. See RResultPtr | |||
| 2835 | /// documentation. | |||
| 2836 | /// | |||
| 2837 | /// ### Example usage: | |||
| 2838 | /// ~~~{.cpp} | |||
| 2839 | /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2"); | |||
| 2840 | /// auto cutReport = filtered3.Report(); | |||
| 2841 | /// cutReport->Print(); | |||
| 2842 | /// ~~~ | |||
| 2843 | /// | |||
| 2844 | RResultPtr<RCutFlowReport> Report() | |||
| 2845 | { | |||
| 2846 | bool returnEmptyReport = false; | |||
| 2847 | // if this is a RInterface<RLoopManager> on which `Define` has been called, users | |||
| 2848 | // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which | |||
| 2849 | // certainly does not contain named filters. | |||
| 2850 | // The number 4 takes into account the implicit columns for entry and slot number | |||
| 2851 | // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_) | |||
| 2852 | if (std::is_same<Proxied, RLoopManager>::value && fColRegister.GenerateColumnNames().size() > 4) | |||
| 2853 | returnEmptyReport = true; | |||
| 2854 | ||||
| 2855 | auto rep = std::make_shared<RCutFlowReport>(); | |||
| 2856 | using Helper_t = RDFInternal::ReportHelper<Proxied>; | |||
| 2857 | using Action_t = RDFInternal::RAction<Helper_t, Proxied>; | |||
| 2858 | ||||
| 2859 | auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr.get(), returnEmptyReport), ColumnNames_t({}), | |||
| 2860 | fProxiedPtr, RDFInternal::RColumnRegister(fColRegister)); | |||
| 2861 | ||||
| 2862 | return MakeResultPtr(rep, *fLoopManager, std::move(action)); | |||
| 2863 | } | |||
| 2864 | ||||
| 2865 | /// \brief Returns the names of the filters created. | |||
| 2866 | /// \return the container of filters names. | |||
| 2867 | /// | |||
| 2868 | /// If called on a root node, all the filters in the computation graph will | |||
| 2869 | /// be printed. For any other node, only the filters upstream of that node. | |||
| 2870 | /// Filters without a name are printed as "Unnamed Filter" | |||
| 2871 | /// This is not an action nor a transformation, just a query to the RDataFrame object. | |||
| 2872 | /// | |||
| 2873 | /// ### Example usage: | |||
| 2874 | /// ~~~{.cpp} | |||
| 2875 | /// auto filtNames = d.GetFilterNames(); | |||
| 2876 | /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl; | |||
| 2877 | /// ~~~ | |||
| 2878 | /// | |||
| 2879 | std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); } | |||
| 2880 | ||||
| 2881 | // clang-format off | |||
| 2882 | //////////////////////////////////////////////////////////////////////////// | |||
| 2883 | /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot. | |||
| 2884 | /// \tparam F The type of the aggregator callable. Automatically deduced. | |||
| 2885 | /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced. | |||
| 2886 | /// \tparam T The type of the column to apply the reduction to. Automatically deduced. | |||
| 2887 | /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable | |||
| 2888 | /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread | |||
| 2889 | /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead. | |||
| 2890 | /// \param[in] aggIdentity The aggregator variable of each thread is initialized to this value (or is default-constructed if the parameter is omitted) | |||
| 2891 | /// \return the result of the aggregation wrapped in a RResultPtr. | |||
| 2892 | /// | |||
| 2893 | /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is | |||
| 2894 | /// initialized to aggIdentity or default-constructed if aggIdentity is omitted. | |||
| 2895 | /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and | |||
| 2896 | /// the value of the column columnName. | |||
| 2897 | /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable. | |||
| 2898 | /// Otherwise the signature of aggregator must be `void(U&,T)`. | |||
| 2899 | /// | |||
| 2900 | /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions. | |||
| 2901 | /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two. | |||
| 2902 | /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0]. | |||
| 2903 | /// | |||
| 2904 | /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr. | |||
| 2905 | /// | |||
| 2906 | /// Example usage: | |||
| 2907 | /// ~~~{.cpp} | |||
| 2908 | /// auto aggregator = [](double acc, double x) { return acc * x; }; | |||
| 2909 | /// ROOT::EnableImplicitMT(); | |||
| 2910 | /// // If multithread is enabled, the aggregator function will be called by more threads | |||
| 2911 | /// // and will produce a vector of partial accumulators. | |||
| 2912 | /// // The merger function performs the final aggregation of these partial results. | |||
| 2913 | /// auto merger = [](std::vector<double> &accumulators) { | |||
| 2914 | /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) { | |||
| 2915 | /// accumulators[0] *= accumulators[i]; | |||
| 2916 | /// } | |||
| 2917 | /// }; | |||
| 2918 | /// | |||
| 2919 | /// // The accumulator is initialized at this value by every thread. | |||
| 2920 | /// double initValue = 1.; | |||
| 2921 | /// | |||
| 2922 | /// // Multiplies all elements of the column "x" | |||
| 2923 | /// auto result = d.Aggregate(aggregator, merger, "x", initValue); | |||
| 2924 | /// ~~~ | |||
| 2925 | // clang-format on | |||
| 2926 | template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type, | |||
| 2927 | typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types, | |||
| 2928 | typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay, | |||
| 2929 | typename U = TTraits::TakeFirstParameter_t<ArgTypes>, | |||
| 2930 | typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>> | |||
| 2931 | RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity) | |||
| 2932 | { | |||
| 2933 | RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay()); | |||
| 2934 | const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); | |||
| 2935 | ||||
| 2936 | const auto validColumnNames = GetValidatedColumnNames(1, columns); | |||
| 2937 | CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>()); | |||
| 2938 | ||||
| 2939 | auto accObjPtr = std::make_shared<U>(aggIdentity); | |||
| 2940 | using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>; | |||
| 2941 | using Action_t = RDFInternal::RAction<Helper_t, Proxied>; | |||
| 2942 | auto action = std::make_unique<Action_t>( | |||
| 2943 | Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames, | |||
| 2944 | fProxiedPtr, fColRegister); | |||
| 2945 | return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action)); | |||
| 2946 | } | |||
| 2947 | ||||
| 2948 | // clang-format off | |||
| 2949 | //////////////////////////////////////////////////////////////////////////// | |||
| 2950 | /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot. | |||
| 2951 | /// \tparam F The type of the aggregator callable. Automatically deduced. | |||
| 2952 | /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced. | |||
| 2953 | /// \tparam T The type of the column to apply the reduction to. Automatically deduced. | |||
| 2954 | /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable | |||
| 2955 | /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread | |||
| 2956 | /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead. | |||
| 2957 | /// \return the result of the aggregation wrapped in a RResultPtr. | |||
| 2958 | /// | |||
| 2959 | /// See previous Aggregate overload for more information. | |||
| 2960 | // clang-format on | |||
| 2961 | template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type, | |||
| 2962 | typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types, | |||
| 2963 | typename U = TTraits::TakeFirstParameter_t<ArgTypes>, | |||
| 2964 | typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>> | |||
| 2965 | RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "") | |||
| 2966 | { | |||
| 2967 | static_assert( | |||
| 2968 | std::is_default_constructible<U>::value, | |||
| 2969 | "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)"); | |||
| 2970 | return Aggregate(std::move(aggregator), std::move(merger), columnName, U()); | |||
| 2971 | } | |||
| 2972 | ||||
| 2973 | // clang-format off | |||
| 2974 | //////////////////////////////////////////////////////////////////////////// | |||
| 2975 | /// \brief Book execution of a custom action using a user-defined helper object. | |||
| 2976 | /// \tparam FirstColumn The type of the first column used by this action. Inferred together with OtherColumns if not present. | |||
| 2977 | /// \tparam OtherColumns A list of the types of the other columns used by this action | |||
| 2978 | /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose. | |||
| 2979 | /// \param[in] helper The Action Helper to be scheduled. | |||
| 2980 | /// \param[in] columns The names of the columns on which the helper acts. | |||
| 2981 | /// \return the result of the helper wrapped in a RResultPtr. | |||
| 2982 | /// | |||
| 2983 | /// This method books a custom action for execution. The behavior of the action is completely dependent on the | |||
| 2984 | /// Helper object provided by the caller. The required interface for the helper is described below (more | |||
| 2985 | /// methods that the ones required can be present, e.g. a constructor that takes the number of worker threads is usually useful): | |||
| 2986 | /// | |||
| 2987 | /// ### Mandatory interface | |||
| 2988 | /// | |||
| 2989 | /// * `Helper` must publicly inherit from `ROOT::Detail::RDF::RActionImpl<Helper>` | |||
| 2990 | /// * `Helper::Result_t`: public alias for the type of the result of this action helper. `Result_t` must be default-constructible. | |||
| 2991 | /// * `Helper(Helper &&)`: a move-constructor is required. Copy-constructors are discouraged. | |||
| 2992 | /// * `std::shared_ptr<Result_t> GetResultPtr() const`: return a shared_ptr to the result of this action (of type | |||
| 2993 | /// Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called | |||
| 2994 | /// _before_ Initialize(), because the RResultPtr is constructed before the event loop is started. | |||
| 2995 | /// * `void Initialize()`: this method is called once before starting the event-loop. Useful for setup operations. | |||
| 2996 | /// It must reset the state of the helper to the expected state at the beginning of the event loop: the same helper, | |||
| 2997 | /// or copies of it, might be used for multiple event loops (e.g. in the presence of systematic variations). | |||
| 2998 | /// * `void InitTask(TTreeReader *, unsigned int slot)`: each working thread shall call this method during the event | |||
| 2999 | /// loop, before processing a batch of entries. The pointer passed as argument, if not null, will point to the TTreeReader | |||
| 3000 | /// that RDataFrame has set up to read the task's batch of entries. It is passed to the helper to allow certain advanced optimizations | |||
| 3001 | /// it should not usually serve any purpose for the Helper. This method is often no-op for simple helpers. | |||
| 3002 | /// * `void Exec(unsigned int slot, ColumnTypes...columnValues)`: each working thread shall call this method | |||
| 3003 | /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value: | |||
| 3004 | /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of | |||
| 3005 | /// the requested columns for the particular entry being processed. | |||
| 3006 | /// * `void Finalize()`: this method is called at the end of the event loop. Commonly used to finalize the contents of the result. | |||
| 3007 | /// * `std::string GetActionName()`: it returns a string identifier for this type of action that RDataFrame will use in | |||
| 3008 | /// diagnostics, SaveGraph(), etc. | |||
| 3009 | /// | |||
| 3010 | /// ### Optional methods | |||
| 3011 | /// | |||
| 3012 | /// If these methods are implemented they enable extra functionality as per the description below. | |||
| 3013 | /// | |||
| 3014 | /// * `Result_t &PartialUpdate(unsigned int slot)`: if present, it must return the value of the partial result of this action for the given 'slot'. | |||
| 3015 | /// Different threads might call this method concurrently, but will do so with different 'slot' numbers. | |||
| 3016 | /// RDataFrame leverages this method to implement RResultPtr::OnPartialResult(). | |||
| 3017 | /// * `ROOT::RDF::SampleCallback_t GetSampleCallback()`: if present, it must return a callable with the | |||
| 3018 | /// appropriate signature (see ROOT::RDF::SampleCallback_t) that will be invoked at the beginning of the processing | |||
| 3019 | /// of every sample, as in DefinePerSample(). | |||
| 3020 | /// * `Helper MakeNew(void *newResult, std::string_view variation = "nominal")`: if implemented, it enables varying | |||
| 3021 | /// the action's result with VariationsFor(). It takes a type-erased new result that can be safely cast to a | |||
| 3022 | /// `std::shared_ptr<Result_t> *` (a pointer to shared pointer) and should be used as the action's output result. | |||
| 3023 | /// The function optionally takes the name of the current variation which could be useful in customizing its behaviour. | |||
| 3024 | /// | |||
| 3025 | /// In case Book is called without specifying column types as template arguments, corresponding typed code will be just-in-time compiled | |||
| 3026 | /// by RDataFrame. In that case the Helper class needs to be known to the ROOT interpreter. | |||
| 3027 | /// | |||
| 3028 | /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr. | |||
| 3029 | /// | |||
| 3030 | /// ### Examples | |||
| 3031 | /// See [this tutorial](https://root.cern/doc/master/df018__customActions_8C.html) for an example implementation of an action helper. | |||
| 3032 | /// | |||
| 3033 | /// It is also possible to inspect the code used by built-in RDataFrame actions at ActionHelpers.hxx. | |||
| 3034 | /// | |||
| 3035 | // clang-format on | |||
| 3036 | template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper> | |||
| 3037 | RResultPtr<typename std::decay_t<Helper>::Result_t> Book(Helper &&helper, const ColumnNames_t &columns = {}) | |||
| 3038 | { | |||
| 3039 | using HelperT = std::decay_t<Helper>; | |||
| 3040 | // TODO add more static sanity checks on Helper | |||
| 3041 | using AH = RDFDetail::RActionImpl<HelperT>; | |||
| 3042 | static_assert(std::is_base_of<AH, HelperT>::value && std::is_convertible<HelperT *, AH *>::value, | |||
| 3043 | "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>"); | |||
| 3044 | ||||
| 3045 | auto hPtr = std::make_shared<HelperT>(std::forward<Helper>(helper)); | |||
| 3046 | auto resPtr = hPtr->GetResultPtr(); | |||
| 3047 | ||||
| 3048 | if (std::is_same<FirstColumn, RDFDetail::RInferredType>::value && columns.empty()) { | |||
| 3049 | return CallCreateActionWithoutColsIfPossible<HelperT>(resPtr, hPtr, TTraits::TypeList<FirstColumn>{}); | |||
| 3050 | } else { | |||
| 3051 | return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr, | |||
| 3052 | fProxiedPtr, columns.size()); | |||
| 3053 | } | |||
| 3054 | } | |||
| 3055 | ||||
| 3056 | //////////////////////////////////////////////////////////////////////////// | |||
| 3057 | /// \brief Provides a representation of the columns in the dataset. | |||
| 3058 | /// \tparam ColumnTypes variadic list of branch/column types. | |||
| 3059 | /// \param[in] columnList Names of the columns to be displayed. | |||
| 3060 | /// \param[in] nRows Number of events for each column to be displayed. | |||
| 3061 | /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row. | |||
| 3062 | /// \return the `RDisplay` instance wrapped in a RResultPtr. | |||
| 3063 | /// | |||
| 3064 | /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular | |||
| 3065 | /// form. RDisplay will either print on the standard output a summarized version through `RDisplay::Print()` or will | |||
| 3066 | /// return a complete version through `RDisplay::AsString()`. | |||
| 3067 | /// | |||
| 3068 | /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see | |||
| 3069 | /// RResultPtr. | |||
| 3070 | /// | |||
| 3071 | /// Example usage: | |||
| 3072 | /// ~~~{.cpp} | |||
| 3073 | /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries | |||
| 3074 | /// auto d1 = rdf.Display(""); | |||
| 3075 | /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries | |||
| 3076 | /// auto d2 = d.Display({"x", "y"}, 128); | |||
| 3077 | /// // Printing the short representations, the event loop will run | |||
| 3078 | /// d1->Print(); | |||
| 3079 | /// d2->Print(); | |||
| 3080 | /// ~~~ | |||
| 3081 | template <typename... ColumnTypes> | |||
| 3082 | RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10) | |||
| 3083 | { | |||
| 3084 | CheckIMTDisabled("Display"); | |||
| 3085 | auto newCols = columnList; | |||
| 3086 | newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column | |||
| 3087 | auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements); | |||
| 3088 | using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>; | |||
| 3089 | // Need to add ULong64_t type corresponding to the first column rdfentry_ | |||
| 3090 | return CreateAction<RDFInternal::ActionTags::Display, ULong64_t, ColumnTypes...>( | |||
| 3091 | std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr); | |||
| 3092 | } | |||
| 3093 | ||||
| 3094 | //////////////////////////////////////////////////////////////////////////// | |||
| 3095 | /// \brief Provides a representation of the columns in the dataset. | |||
| 3096 | /// \param[in] columnList Names of the columns to be displayed. | |||
| 3097 | /// \param[in] nRows Number of events for each column to be displayed. | |||
| 3098 | /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row. | |||
| 3099 | /// \return the `RDisplay` instance wrapped in a RResultPtr. | |||
| 3100 | /// | |||
| 3101 | /// This overload automatically infers the column types. | |||
| 3102 | /// See the previous overloads for further details. | |||
| 3103 | /// | |||
| 3104 | /// Invoked when no types are specified to Display | |||
| 3105 | RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10) | |||
| 3106 | { | |||
| 3107 | CheckIMTDisabled("Display"); | |||
| 3108 | auto newCols = columnList; | |||
| 3109 | newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column | |||
| 3110 | auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements); | |||
| 3111 | using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>; | |||
| 3112 | return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>( | |||
| 3113 | std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr, | |||
| 3114 | columnList.size() + 1); | |||
| 3115 | } | |||
| 3116 | ||||
| 3117 | //////////////////////////////////////////////////////////////////////////// | |||
| 3118 | /// \brief Provides a representation of the columns in the dataset. | |||
| 3119 | /// \param[in] columnNameRegexp A regular expression to select the columns. | |||
| 3120 | /// \param[in] nRows Number of events for each column to be displayed. | |||
| 3121 | /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row. | |||
| 3122 | /// \return the `RDisplay` instance wrapped in a RResultPtr. | |||
| 3123 | /// | |||
| 3124 | /// The existing columns are matched against the regular expression. If the string provided | |||
| 3125 | /// is empty, all columns are selected. | |||
| 3126 | /// See the previous overloads for further details. | |||
| 3127 | RResultPtr<RDisplay> | |||
| 3128 | Display(std::string_view columnNameRegexp = "", size_t nRows = 5, size_t nMaxCollectionElements = 10) | |||
| 3129 | { | |||
| 3130 | const auto columnNames = GetColumnNames(); | |||
| 3131 | const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Display"); | |||
| 3132 | return Display(selectedColumns, nRows, nMaxCollectionElements); | |||
| 3133 | } | |||
| 3134 | ||||
| 3135 | //////////////////////////////////////////////////////////////////////////// | |||
| 3136 | /// \brief Provides a representation of the columns in the dataset. | |||
| 3137 | /// \param[in] columnList Names of the columns to be displayed. | |||
| 3138 | /// \param[in] nRows Number of events for each column to be displayed. | |||
| 3139 | /// \param[in] nMaxCollectionElements Number of maximum elements in collection. | |||
| 3140 | /// \return the `RDisplay` instance wrapped in a RResultPtr. | |||
| 3141 | /// | |||
| 3142 | /// See the previous overloads for further details. | |||
| 3143 | RResultPtr<RDisplay> | |||
| 3144 | Display(std::initializer_list<std::string> columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10) | |||
| 3145 | { | |||
| 3146 | ColumnNames_t selectedColumns(columnList); | |||
| 3147 | return Display(selectedColumns, nRows, nMaxCollectionElements); | |||
| 3148 | } | |||
| 3149 | ||||
| 3150 | private: | |||
| 3151 | template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type> | |||
| 3152 | std::enable_if_t<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>> | |||
| 3153 | DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where) | |||
| 3154 | { | |||
| 3155 | if (where.compare(0, 8, "Redefine") != 0) { // not a Redefine | |||
| 3156 | RDFInternal::CheckValidCppVarName(name, where); | |||
| 3157 | RDFInternal::CheckForRedefinition(where, name, fColRegister, fLoopManager->GetBranchNames(), | |||
| 3158 | GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}); | |||
| 3159 | } else { | |||
| 3160 | RDFInternal::CheckForDefinition(where, name, fColRegister, fLoopManager->GetBranchNames(), | |||
| 3161 | GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}); | |||
| 3162 | RDFInternal::CheckForNoVariations(where, name, fColRegister); | |||
| 3163 | } | |||
| 3164 | ||||
| 3165 | using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types; | |||
| 3166 | using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf< | |||
| 3167 | std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::Slot>::value, ArgTypes_t>::type; | |||
| 3168 | using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf< | |||
| 3169 | std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::SlotAndEntry>::value, ColTypesTmp_t>::type; | |||
| 3170 | ||||
| 3171 | constexpr auto nColumns = ColTypes_t::list_size; | |||
| 3172 | ||||
| 3173 | const auto validColumnNames = GetValidatedColumnNames(nColumns, columns); | |||
| 3174 | CheckAndFillDSColumns(validColumnNames, ColTypes_t()); | |||
| 3175 | ||||
| 3176 | // Declare return type to the interpreter, for future use by jitted actions | |||
| 3177 | auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType)); | |||
| 3178 | if (retTypeName.empty()) { | |||
| 3179 | // The type is not known to the interpreter. | |||
| 3180 | // We must not error out here, but if/when this column is used in jitted code | |||
| 3181 | const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType)); | |||
| 3182 | retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType; | |||
| 3183 | } | |||
| 3184 | ||||
| 3185 | using NewCol_t = RDFDetail::RDefine<F, DefineType>; | |||
| 3186 | auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames, | |||
| 3187 | fColRegister, *fLoopManager); | |||
| 3188 | ||||
| 3189 | RDFInternal::RColumnRegister newCols(fColRegister); | |||
| 3190 | newCols.AddDefine(std::move(newColumn)); | |||
| 3191 | ||||
| 3192 | RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); | |||
| 3193 | ||||
| 3194 | return newInterface; | |||
| 3195 | } | |||
| 3196 | ||||
| 3197 | // This overload is chosen when the callable passed to Define or DefineSlot returns void. | |||
| 3198 | // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because | |||
| 3199 | // this way compilation of `Define` has no way to continue after throwing the error. | |||
| 3200 | template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type, | |||
| 3201 | bool IsFStringConv = std::is_convertible<F, std::string>::value, | |||
| 3202 | bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value> | |||
| 3203 | std::enable_if_t<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>> | |||
| 3204 | DefineImpl(std::string_view, F, const ColumnNames_t &, const std::string &) | |||
| 3205 | { | |||
| 3206 | static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value, | |||
| 3207 | "Error in `Define`: type returned by expression is not default-constructible"); | |||
| 3208 | return *this; // never reached | |||
| 3209 | } | |||
| 3210 | ||||
| 3211 | template <typename... ColumnTypes> | |||
| 3212 | RResultPtr<RInterface<RLoopManager>> SnapshotImpl(std::string_view fullTreeName, std::string_view filename, | |||
| 3213 | const ColumnNames_t &columnList, const RSnapshotOptions &options) | |||
| 3214 | { | |||
| 3215 | const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot"); | |||
| 3216 | ||||
| 3217 | RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnListWithoutSizeColumns.size()); | |||
| 3218 | // validCols has aliases resolved, while columnListWithoutSizeColumns still has aliases in it. | |||
| 3219 | const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns); | |||
| 3220 | RDFInternal::CheckForDuplicateSnapshotColumns(validCols); | |||
| 3221 | CheckAndFillDSColumns(validCols, TTraits::TypeList<ColumnTypes...>()); | |||
| 3222 | ||||
| 3223 | const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName); | |||
| 3224 | const auto &treename = parsedTreePath.fTreeName; | |||
| 3225 | const auto &dirname = parsedTreePath.fDirName; | |||
| 3226 | ||||
| 3227 | ::TDirectory::TContext ctxt; | |||
| 3228 | ||||
| 3229 | RResultPtr<RInterface<RLoopManager>> resPtr; | |||
| 3230 | ||||
| 3231 | if (options.fOutputFormat == ESnapshotOutputFormat::kRNTuple) { | |||
| 3232 | if (RDFInternal::GetDataSourceLabel(*this) == "TTreeDS") { | |||
| 3233 | throw std::runtime_error("Snapshotting from TTree to RNTuple is not yet supported. The current recommended " | |||
| 3234 | "way to convert TTrees to RNTuple is through the RNTupleImporter."); | |||
| 3235 | } | |||
| 3236 | ||||
| 3237 | auto newRDF = | |||
| 3238 | std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(columnListWithoutSizeColumns)); | |||
| 3239 | ||||
| 3240 | auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{ | |||
| 3241 | std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options, | |||
| 3242 | newRDF->GetLoopManager(), GetLoopManager(), true /* fToRNTuple */}); | |||
| 3243 | ||||
| 3244 | // The Snapshot helper will use validCols (with aliases resolved) as input columns, and | |||
| 3245 | // columnListWithoutSizeColumns (still with aliases in it, passed through snapHelperArgs) as output column | |||
| 3246 | // names. | |||
| 3247 | resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs, | |||
| 3248 | fProxiedPtr); | |||
| 3249 | } else { | |||
| 3250 | if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS" && | |||
| 3251 | options.fOutputFormat == ESnapshotOutputFormat::kDefault) { | |||
| 3252 | Warning("Snapshot", | |||
| 3253 | "The default Snapshot output data format is TTree, but the input data format is RNTuple. If you " | |||
| 3254 | "want to Snapshot to RNTuple or suppress this warning, set the appropriate fOutputFormat option in " | |||
| 3255 | "RSnapshotOptions. Note that this current default behaviour might change in the future."); | |||
| 3256 | } | |||
| 3257 | ||||
| 3258 | // We create an RLoopManager without a data source. This needs to be initialised when the output TTree dataset | |||
| 3259 | // has actually been created and written to TFile, i.e. at the end of the Snapshot execution. | |||
| 3260 | auto newRDF = | |||
| 3261 | std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(columnListWithoutSizeColumns)); | |||
| 3262 | ||||
| 3263 | auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{ | |||
| 3264 | std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options, | |||
| 3265 | newRDF->GetLoopManager(), GetLoopManager(), false /* fToRNTuple */}); | |||
| 3266 | ||||
| 3267 | // The Snapshot helper will use validCols (with aliases resolved) as input columns, and | |||
| 3268 | // columnListWithoutSizeColumns (still with aliases in it, passed through snapHelperArgs) as output column | |||
| 3269 | // names. | |||
| 3270 | resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs, | |||
| 3271 | fProxiedPtr); | |||
| 3272 | } | |||
| 3273 | ||||
| 3274 | if (!options.fLazy) | |||
| 3275 | *resPtr; | |||
| 3276 | return resPtr; | |||
| 3277 | } | |||
| 3278 | ||||
| 3279 | //////////////////////////////////////////////////////////////////////////// | |||
| 3280 | /// \brief Implementation of cache. | |||
| 3281 | template <typename... ColTypes, std::size_t... S> | |||
| 3282 | RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...>) | |||
| 3283 | { | |||
| 3284 | const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot"); | |||
| 3285 | ||||
| 3286 | // Check at compile time that the columns types are copy constructible | |||
| 3287 | constexpr bool areCopyConstructible = | |||
| 3288 | RDFInternal::TEvalAnd<std::is_copy_constructible<ColTypes>::value...>::value; | |||
| 3289 | static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet."); | |||
| 3290 | ||||
| 3291 | RDFInternal::CheckTypesAndPars(sizeof...(ColTypes), columnListWithoutSizeColumns.size()); | |||
| 3292 | ||||
| 3293 | auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...); | |||
| 3294 | auto ds = std::make_unique<RLazyDS<ColTypes...>>( | |||
| 3295 | std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...); | |||
| 3296 | ||||
| 3297 | RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnListWithoutSizeColumns)); | |||
| 3298 | ||||
| 3299 | return cachedRDF; | |||
| 3300 | } | |||
| 3301 | ||||
| 3302 | template <bool IsSingleColumn, typename F> | |||
| 3303 | RInterface<Proxied, DS_t> | |||
| 3304 | VaryImpl(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns, | |||
| 3305 | const std::vector<std::string> &variationTags, std::string_view variationName) | |||
| 3306 | { | |||
| 3307 | using F_t = std::decay_t<F>; | |||
| 3308 | using ColTypes_t = typename TTraits::CallableTraits<F_t>::arg_types; | |||
| 3309 | using RetType = typename TTraits::CallableTraits<F_t>::ret_type; | |||
| 3310 | constexpr auto nColumns = ColTypes_t::list_size; | |||
| 3311 | ||||
| 3312 | SanityChecksForVary<RetType>(colNames, variationTags, variationName); | |||
| 3313 | ||||
| 3314 | const auto validColumnNames = GetValidatedColumnNames(nColumns, inputColumns); | |||
| 3315 | CheckAndFillDSColumns(validColumnNames, ColTypes_t{}); | |||
| 3316 | ||||
| 3317 | auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType)); | |||
| 3318 | if (retTypeName.empty()) { | |||
| 3319 | // The type is not known to the interpreter, but we don't want to error out | |||
| 3320 | // here, rather if/when this column is used in jitted code, so we inject a broken but telling type name. | |||
| 3321 | const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType)); | |||
| 3322 | retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType; | |||
| 3323 | } | |||
| 3324 | ||||
| 3325 | auto variation = std::make_shared<RDFInternal::RVariation<F_t, IsSingleColumn>>( | |||
| 3326 | colNames, variationName, std::forward<F>(expression), variationTags, retTypeName, fColRegister, *fLoopManager, | |||
| 3327 | validColumnNames); | |||
| 3328 | ||||
| 3329 | RDFInternal::RColumnRegister newCols(fColRegister); | |||
| 3330 | newCols.AddVariation(std::move(variation)); | |||
| 3331 | ||||
| 3332 | RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); | |||
| 3333 | ||||
| 3334 | return newInterface; | |||
| 3335 | } | |||
| 3336 | ||||
| 3337 | RInterface<Proxied, DS_t> JittedVaryImpl(const std::vector<std::string> &colNames, std::string_view expression, | |||
| 3338 | const std::vector<std::string> &variationTags, | |||
| 3339 | std::string_view variationName, bool isSingleColumn) | |||
| 3340 | { | |||
| 3341 | R__ASSERT(!variationTags.empty() && "Must have at least one variation.")do { if (__builtin_expect(!!(!(!variationTags.empty() && "Must have at least one variation.")), 0)) ::Fatal("", kAssertMsg , "!variationTags.empty() && \"Must have at least one variation.\"" , 3341, "/cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/root/ROOT/RDF/RInterface.hxx" ); } while (false); | |||
| 3342 | R__ASSERT(!colNames.empty() && "Must have at least one varied column.")do { if (__builtin_expect(!!(!(!colNames.empty() && "Must have at least one varied column." )), 0)) ::Fatal("", kAssertMsg, "!colNames.empty() && \"Must have at least one varied column.\"" , 3342, "/cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/root/ROOT/RDF/RInterface.hxx" ); } while (false); | |||
| 3343 | R__ASSERT(!variationName.empty() && "Must provide a variation name.")do { if (__builtin_expect(!!(!(!variationName.empty() && "Must provide a variation name.")), 0)) ::Fatal("", kAssertMsg , "!variationName.empty() && \"Must provide a variation name.\"" , 3343, "/cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/root/ROOT/RDF/RInterface.hxx" ); } while (false); | |||
| 3344 | ||||
| 3345 | for (auto &colName : colNames) { | |||
| 3346 | RDFInternal::CheckValidCppVarName(colName, "Vary"); | |||
| 3347 | RDFInternal::CheckForDefinition("Vary", colName, fColRegister, fLoopManager->GetBranchNames(), | |||
| 3348 | GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}); | |||
| 3349 | } | |||
| 3350 | RDFInternal::CheckValidCppVarName(variationName, "Vary"); | |||
| 3351 | ||||
| 3352 | // when varying multiple columns, they must be different columns | |||
| 3353 | if (colNames.size() > 1) { | |||
| 3354 | std::set<std::string> uniqueCols(colNames.begin(), colNames.end()); | |||
| 3355 | if (uniqueCols.size() != colNames.size()) | |||
| 3356 | throw std::logic_error("A column name was passed to the same Vary invocation multiple times."); | |||
| 3357 | } | |||
| 3358 | ||||
| 3359 | auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); | |||
| 3360 | auto jittedVariation = RDFInternal::BookVariationJit( | |||
| 3361 | colNames, variationName, variationTags, expression, *fLoopManager, GetDataSource(), fColRegister, | |||
| 3362 | fLoopManager->GetBranchNames(), upcastNodeOnHeap, isSingleColumn); | |||
| 3363 | ||||
| 3364 | RDFInternal::RColumnRegister newColRegister(fColRegister); | |||
| 3365 | newColRegister.AddVariation(std::move(jittedVariation)); | |||
| 3366 | ||||
| 3367 | RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newColRegister)); | |||
| 3368 | ||||
| 3369 | return newInterface; | |||
| 3370 | } | |||
| 3371 | ||||
| 3372 | template <typename Helper, typename ActionResultType> | |||
| 3373 | auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &resPtr, | |||
| 3374 | const std::shared_ptr<Helper> &hPtr, | |||
| 3375 | TTraits::TypeList<RDFDetail::RInferredType>) | |||
| 3376 | -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{}) | |||
| 3377 | { | |||
| 3378 | return CreateAction<RDFInternal::ActionTags::Book>(/*columns=*/{}, resPtr, hPtr, fProxiedPtr, 0u); | |||
| 3379 | } | |||
| 3380 | ||||
| 3381 | template <typename Helper, typename ActionResultType, typename... Others> | |||
| 3382 | RResultPtr<ActionResultType> | |||
| 3383 | CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &, | |||
| 3384 | const std::shared_ptr<Helper>& /*hPtr*/, | |||
| 3385 | Others...) | |||
| 3386 | { | |||
| 3387 | throw std::logic_error(std::string("An action was booked with no input columns, but the action requires " | |||
| 3388 | "columns! The action helper type was ") + | |||
| 3389 | typeid(Helper).name()); | |||
| 3390 | return {}; | |||
| 3391 | } | |||
| 3392 | ||||
| 3393 | protected: | |||
| 3394 | RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm, | |||
| 3395 | const RDFInternal::RColumnRegister &colRegister) | |||
| 3396 | : RInterfaceBase(lm, colRegister), fProxiedPtr(proxied) | |||
| 3397 | { | |||
| 3398 | } | |||
| 3399 | ||||
| 3400 | const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; } | |||
| 3401 | }; | |||
| 3402 | ||||
| 3403 | } // namespace RDF | |||
| 3404 | ||||
| 3405 | } // namespace ROOT | |||
| 3406 | ||||
| 3407 | #endif // ROOT_RDF_INTERFACE |
| 1 | // Author: Enrico Guiraud, Danilo Piparo CERN 02/2018 |
| 2 | |
| 3 | /************************************************************************* |
| 4 | * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. * |
| 5 | * All rights reserved. * |
| 6 | * * |
| 7 | * For the licensing terms see $ROOTSYS/LICENSE. * |
| 8 | * For the list of contributors see $ROOTSYS/README/CREDITS. * |
| 9 | *************************************************************************/ |
| 10 | |
| 11 | #ifndef ROOT_RDF_TINTERFACE_UTILS |
| 12 | #define ROOT_RDF_TINTERFACE_UTILS |
| 13 | |
| 14 | #include "RColumnRegister.hxx" |
| 15 | #include <ROOT/RDF/RAction.hxx> |
| 16 | #include <ROOT/RDF/ActionHelpers.hxx> // for BuildAction |
| 17 | #include <ROOT/RDF/RColumnRegister.hxx> |
| 18 | #include <ROOT/RDF/RDefine.hxx> |
| 19 | #include <ROOT/RDF/RDefinePerSample.hxx> |
| 20 | #include <ROOT/RDF/RFilter.hxx> |
| 21 | #include <ROOT/RDF/Utils.hxx> |
| 22 | #include <ROOT/RDF/RJittedAction.hxx> |
| 23 | #include <ROOT/RDF/RJittedDefine.hxx> |
| 24 | #include <ROOT/RDF/RJittedFilter.hxx> |
| 25 | #include <ROOT/RDF/RJittedVariation.hxx> |
| 26 | #include <ROOT/RDF/RLoopManager.hxx> |
| 27 | #include <string_view> |
| 28 | #include <ROOT/RDF/RVariation.hxx> |
| 29 | #include <ROOT/TypeTraits.hxx> |
| 30 | #include <TError.h> // gErrorIgnoreLevel |
| 31 | #include <TH1.h> |
| 32 | #include <TROOT.h> // IsImplicitMTEnabled |
| 33 | |
| 34 | #include <deque> |
| 35 | #include <functional> |
| 36 | #include <map> |
| 37 | #include <memory> |
| 38 | #include <string> |
| 39 | #include <type_traits> |
| 40 | #include <typeinfo> |
| 41 | #include <vector> |
| 42 | #include <unordered_map> |
| 43 | |
| 44 | class TObjArray; |
| 45 | class TTree; |
| 46 | namespace ROOT { |
| 47 | namespace Detail { |
| 48 | namespace RDF { |
| 49 | class RNodeBase; |
| 50 | } |
| 51 | } |
| 52 | namespace RDF { |
| 53 | template <typename T> |
| 54 | class RResultPtr; |
| 55 | template<typename T, typename V> |
| 56 | class RInterface; |
| 57 | using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>; |
| 58 | class RDataSource; |
| 59 | } // namespace RDF |
| 60 | |
| 61 | } // namespace ROOT |
| 62 | |
| 63 | /// \cond HIDDEN_SYMBOLS |
| 64 | |
| 65 | namespace ROOT { |
| 66 | namespace Internal { |
| 67 | namespace RDF { |
| 68 | using namespace ROOT::Detail::RDF; |
| 69 | using namespace ROOT::RDF; |
| 70 | namespace TTraits = ROOT::TypeTraits; |
| 71 | |
| 72 | std::string DemangleTypeIdName(const std::type_info &typeInfo); |
| 73 | |
| 74 | ColumnNames_t |
| 75 | ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName); |
| 76 | |
| 77 | /// An helper object that sets and resets gErrorIgnoreLevel via RAII. |
| 78 | class RIgnoreErrorLevelRAII { |
| 79 | private: |
| 80 | int fCurIgnoreErrorLevel = gErrorIgnoreLevel; |
| 81 | |
| 82 | public: |
| 83 | RIgnoreErrorLevelRAII(int errorIgnoreLevel) { gErrorIgnoreLevel = errorIgnoreLevel; } |
| 84 | ~RIgnoreErrorLevelRAII() { gErrorIgnoreLevel = fCurIgnoreErrorLevel; } |
| 85 | }; |
| 86 | |
| 87 | /****** BuildAction overloads *******/ |
| 88 | |
| 89 | // clang-format off |
| 90 | /// This namespace defines types to be used for tag dispatching in RInterface. |
| 91 | namespace ActionTags { |
| 92 | struct Histo1D{}; |
| 93 | struct Histo2D{}; |
| 94 | struct Histo3D{}; |
| 95 | struct HistoND{}; |
| 96 | struct Graph{}; |
| 97 | struct GraphAsymmErrors{}; |
| 98 | struct Profile1D{}; |
| 99 | struct Profile2D{}; |
| 100 | struct Min{}; |
| 101 | struct Max{}; |
| 102 | struct Sum{}; |
| 103 | struct Mean{}; |
| 104 | struct Fill{}; |
| 105 | struct StdDev{}; |
| 106 | struct Display{}; |
| 107 | struct Snapshot{}; |
| 108 | struct Book{}; |
| 109 | } |
| 110 | // clang-format on |
| 111 | |
| 112 | template <typename T, bool ISV6HISTO = std::is_base_of<TH1, std::decay_t<T>>::value> |
| 113 | struct HistoUtils { |
| 114 | static void SetCanExtendAllAxes(T &h) { h.SetCanExtend(::TH1::kAllAxes); } |
| 115 | static bool HasAxisLimits(T &h) |
| 116 | { |
| 117 | auto xaxis = h.GetXaxis(); |
| 118 | return !(xaxis->GetXmin() == 0. && xaxis->GetXmax() == 0.); |
| 119 | } |
| 120 | }; |
| 121 | |
| 122 | template <typename T> |
| 123 | struct HistoUtils<T, false> { |
| 124 | static void SetCanExtendAllAxes(T &) {} |
| 125 | static bool HasAxisLimits(T &) { return true; } |
| 126 | }; |
| 127 | |
| 128 | // Generic filling (covers Histo2D, Histo3D, HistoND, Profile1D and Profile2D actions, with and without weights) |
| 129 | template <typename... ColTypes, typename ActionTag, typename ActionResultType, typename PrevNodeType> |
| 130 | std::unique_ptr<RActionBase> |
| 131 | BuildAction(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &h, const unsigned int nSlots, |
| 132 | std::shared_ptr<PrevNodeType> prevNode, ActionTag, const RColumnRegister &colRegister) |
| 133 | { |
| 134 | using Helper_t = FillHelper<ActionResultType>; |
| 135 | using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>; |
| 136 | return std::make_unique<Action_t>(Helper_t(h, nSlots), bl, std::move(prevNode), colRegister); |
| 137 | } |
| 138 | |
| 139 | // Histo1D filling (must handle the special case of distinguishing FillHelper and BufferedFillHelper |
| 140 | template <typename... ColTypes, typename PrevNodeType> |
| 141 | std::unique_ptr<RActionBase> |
| 142 | BuildAction(const ColumnNames_t &bl, const std::shared_ptr<::TH1D> &h, const unsigned int nSlots, |
| 143 | std::shared_ptr<PrevNodeType> prevNode, ActionTags::Histo1D, const RColumnRegister &colRegister) |
| 144 | { |
| 145 | auto hasAxisLimits = HistoUtils<::TH1D>::HasAxisLimits(*h); |
| 146 | |
| 147 | if (hasAxisLimits || !IsImplicitMTEnabled()) { |
| 148 | using Helper_t = FillHelper<::TH1D>; |
| 149 | using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>; |
| 150 | return std::make_unique<Action_t>(Helper_t(h, nSlots), bl, std::move(prevNode), colRegister); |
| 151 | } else { |
| 152 | using Helper_t = BufferedFillHelper; |
| 153 | using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>; |
| 154 | return std::make_unique<Action_t>(Helper_t(h, nSlots), bl, std::move(prevNode), colRegister); |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | template <typename... ColTypes, typename PrevNodeType> |
| 159 | std::unique_ptr<RActionBase> |
| 160 | BuildAction(const ColumnNames_t &bl, const std::shared_ptr<TGraph> &g, const unsigned int nSlots, |
| 161 | std::shared_ptr<PrevNodeType> prevNode, ActionTags::Graph, const RColumnRegister &colRegister) |
| 162 | { |
| 163 | using Helper_t = FillTGraphHelper; |
| 164 | using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>; |
| 165 | return std::make_unique<Action_t>(Helper_t(g, nSlots), bl, std::move(prevNode), colRegister); |
| 166 | } |
| 167 | |
| 168 | template <typename... ColTypes, typename PrevNodeType> |
| 169 | std::unique_ptr<RActionBase> |
| 170 | BuildAction(const ColumnNames_t &bl, const std::shared_ptr<TGraphAsymmErrors> &g, const unsigned int nSlots, |
| 171 | std::shared_ptr<PrevNodeType> prevNode, ActionTags::GraphAsymmErrors, const RColumnRegister &colRegister) |
| 172 | { |
| 173 | using Helper_t = FillTGraphAsymmErrorsHelper; |
| 174 | using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>; |
| 175 | return std::make_unique<Action_t>(Helper_t(g, nSlots), bl, std::move(prevNode), colRegister); |
| 176 | } |
| 177 | |
| 178 | // Min action |
| 179 | template <typename ColType, typename PrevNodeType, typename ActionResultType> |
| 180 | std::unique_ptr<RActionBase> |
| 181 | BuildAction(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &minV, const unsigned int nSlots, |
| 182 | std::shared_ptr<PrevNodeType> prevNode, ActionTags::Min, const RColumnRegister &colRegister) |
| 183 | { |
| 184 | using Helper_t = MinHelper<ActionResultType>; |
| 185 | using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColType>>; |
| 186 | return std::make_unique<Action_t>(Helper_t(minV, nSlots), bl, std::move(prevNode), colRegister); |
| 187 | } |
| 188 | |
| 189 | // Max action |
| 190 | template <typename ColType, typename PrevNodeType, typename ActionResultType> |
| 191 | std::unique_ptr<RActionBase> |
| 192 | BuildAction(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &maxV, const unsigned int nSlots, |
| 193 | std::shared_ptr<PrevNodeType> prevNode, ActionTags::Max, const RColumnRegister &colRegister) |
| 194 | { |
| 195 | using Helper_t = MaxHelper<ActionResultType>; |
| 196 | using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColType>>; |
| 197 | return std::make_unique<Action_t>(Helper_t(maxV, nSlots), bl, std::move(prevNode), colRegister); |
| 198 | } |
| 199 | |
| 200 | // Sum action |
| 201 | template <typename ColType, typename PrevNodeType, typename ActionResultType> |
| 202 | std::unique_ptr<RActionBase> |
| 203 | BuildAction(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &sumV, const unsigned int nSlots, |
| 204 | std::shared_ptr<PrevNodeType> prevNode, ActionTags::Sum, const RColumnRegister &colRegister) |
| 205 | { |
| 206 | using Helper_t = SumHelper<ActionResultType>; |
| 207 | using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColType>>; |
| 208 | return std::make_unique<Action_t>(Helper_t(sumV, nSlots), bl, std::move(prevNode), colRegister); |
| 209 | } |
| 210 | |
| 211 | // Mean action |
| 212 | template <typename ColType, typename PrevNodeType> |
| 213 | std::unique_ptr<RActionBase> |
| 214 | BuildAction(const ColumnNames_t &bl, const std::shared_ptr<double> &meanV, const unsigned int nSlots, |
| 215 | std::shared_ptr<PrevNodeType> prevNode, ActionTags::Mean, const RColumnRegister &colRegister) |
| 216 | { |
| 217 | using Helper_t = MeanHelper; |
| 218 | using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColType>>; |
| 219 | return std::make_unique<Action_t>(Helper_t(meanV, nSlots), bl, std::move(prevNode), colRegister); |
| 220 | } |
| 221 | |
| 222 | // Standard Deviation action |
| 223 | template <typename ColType, typename PrevNodeType> |
| 224 | std::unique_ptr<RActionBase> |
| 225 | BuildAction(const ColumnNames_t &bl, const std::shared_ptr<double> &stdDeviationV, const unsigned int nSlots, |
| 226 | std::shared_ptr<PrevNodeType> prevNode, ActionTags::StdDev, const RColumnRegister &colRegister) |
| 227 | { |
| 228 | using Helper_t = StdDevHelper; |
| 229 | using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColType>>; |
| 230 | return std::make_unique<Action_t>(Helper_t(stdDeviationV, nSlots), bl, prevNode, colRegister); |
| 231 | } |
| 232 | |
| 233 | using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<ROOT::RDF::RDisplay>>; |
| 234 | |
| 235 | // Display action |
| 236 | template <typename... ColTypes, typename PrevNodeType> |
| 237 | std::unique_ptr<RActionBase> |
| 238 | BuildAction(const ColumnNames_t &bl, const std::shared_ptr<displayHelperArgs_t> &helperArgs, const unsigned int, |
| 239 | std::shared_ptr<PrevNodeType> prevNode, ActionTags::Display, const RColumnRegister &colRegister) |
| 240 | { |
| 241 | using Helper_t = DisplayHelper<PrevNodeType>; |
| 242 | using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>; |
| 243 | return std::make_unique<Action_t>(Helper_t(helperArgs->first, helperArgs->second, prevNode), bl, prevNode, |
| 244 | colRegister); |
| 245 | } |
| 246 | |
| 247 | struct SnapshotHelperArgs { |
| 248 | std::string fFileName; |
| 249 | std::string fDirName; |
| 250 | std::string fTreeName; |
| 251 | std::vector<std::string> fOutputColNames; |
| 252 | ROOT::RDF::RSnapshotOptions fOptions; |
| 253 | ROOT::Detail::RDF::RLoopManager *fOutputLoopManager; |
| 254 | ROOT::Detail::RDF::RLoopManager *fInputLoopManager; |
| 255 | bool fToNTuple; |
| 256 | }; |
| 257 | |
| 258 | // SnapshotTTree action |
| 259 | template <typename... ColTypes, typename PrevNodeType> |
| 260 | std::unique_ptr<RActionBase> |
| 261 | BuildAction(const ColumnNames_t &colNames, const std::shared_ptr<SnapshotHelperArgs> &snapHelperArgs, |
| 262 | const unsigned int nSlots, std::shared_ptr<PrevNodeType> prevNode, ActionTags::Snapshot, |
| 263 | const RColumnRegister &colRegister) |
| 264 | { |
| 265 | const auto &filename = snapHelperArgs->fFileName; |
| 266 | const auto &dirname = snapHelperArgs->fDirName; |
| 267 | const auto &treename = snapHelperArgs->fTreeName; |
| 268 | const auto &outputColNames = snapHelperArgs->fOutputColNames; |
| 269 | const auto &options = snapHelperArgs->fOptions; |
| 270 | const auto &lmPtr = snapHelperArgs->fOutputLoopManager; |
| 271 | const auto &inputLM = snapHelperArgs->fInputLoopManager; |
| 272 | |
| 273 | auto sz = sizeof...(ColTypes); |
| 274 | std::vector<bool> isDefine(sz); |
| 275 | for (auto i = 0u; i < sz; ++i) |
| 276 | isDefine[i] = colRegister.IsDefineOrAlias(colNames[i]); |
| 277 | |
| 278 | std::unique_ptr<RActionBase> actionPtr; |
| 279 | if (snapHelperArgs->fToNTuple) { |
| 280 | if (!ROOT::IsImplicitMTEnabled()) { |
| 281 | // single-thread snapshot |
| 282 | using Helper_t = SnapshotRNTupleHelper<ColTypes...>; |
| 283 | using Action_t = RAction<Helper_t, PrevNodeType>; |
| 284 | |
| 285 | actionPtr.reset(new Action_t( |
| 286 | Helper_t(filename, dirname, treename, colNames, outputColNames, options, lmPtr, std::move(isDefine)), |
| 287 | colNames, prevNode, colRegister)); |
| 288 | } else { |
| 289 | // multi-thread snapshot to RNTuple is not yet supported |
| 290 | // TODO(fdegeus) Add MT snapshotting |
| 291 | throw std::runtime_error("Snapshot: Snapshotting to RNTuple with IMT enabled is not supported yet."); |
| 292 | } |
| 293 | |
| 294 | return actionPtr; |
| 295 | } else { |
| 296 | if (!ROOT::IsImplicitMTEnabled()) { |
| 297 | // single-thread snapshot |
| 298 | using Helper_t = SnapshotTTreeHelper<ColTypes...>; |
| 299 | using Action_t = RAction<Helper_t, PrevNodeType>; |
| 300 | actionPtr.reset(new Action_t(Helper_t(filename, dirname, treename, colNames, outputColNames, options, |
| 301 | std::move(isDefine), lmPtr, inputLM), |
| 302 | colNames, prevNode, colRegister)); |
| 303 | } else { |
| 304 | // multi-thread snapshot |
| 305 | using Helper_t = SnapshotTTreeHelperMT<ColTypes...>; |
| 306 | using Action_t = RAction<Helper_t, PrevNodeType>; |
| 307 | actionPtr.reset(new Action_t(Helper_t(nSlots, filename, dirname, treename, colNames, outputColNames, options, |
| 308 | std::move(isDefine), lmPtr, inputLM), |
| 309 | colNames, prevNode, colRegister)); |
| 310 | } |
| 311 | } |
| 312 | return actionPtr; |
| 313 | } |
| 314 | |
| 315 | // Book with custom helper type |
| 316 | template <typename... ColTypes, typename PrevNodeType, typename Helper_t> |
| 317 | std::unique_ptr<RActionBase> |
| 318 | BuildAction(const ColumnNames_t &bl, const std::shared_ptr<Helper_t> &h, const unsigned int /*nSlots*/, |
| 319 | std::shared_ptr<PrevNodeType> prevNode, ActionTags::Book, const RColumnRegister &colRegister) |
| 320 | { |
| 321 | using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>; |
| 322 | return std::make_unique<Action_t>(Helper_t(std::move(*h)), bl, std::move(prevNode), colRegister); |
| 323 | } |
| 324 | |
| 325 | /****** end BuildAndBook ******/ |
| 326 | |
| 327 | template <typename Filter> |
| 328 | void CheckFilter(Filter &) |
| 329 | { |
| 330 | using FilterRet_t = typename RDF::CallableTraits<Filter>::ret_type; |
| 331 | static_assert(std::is_convertible<FilterRet_t, bool>::value, |
| 332 | "filter expression returns a type that is not convertible to bool"); |
| 333 | } |
| 334 | |
| 335 | ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action); |
| 336 | |
| 337 | void CheckValidCppVarName(std::string_view var, const std::string &where); |
| 338 | |
| 339 | void CheckForRedefinition(const std::string &where, std::string_view definedCol, const RColumnRegister &colRegister, |
| 340 | const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns); |
| 341 | |
| 342 | void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, |
| 343 | const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns); |
| 344 | |
| 345 | void CheckForNoVariations(const std::string &where, std::string_view definedColView, |
| 346 | const RColumnRegister &colRegister); |
| 347 | |
| 348 | std::string PrettyPrintAddr(const void *const addr); |
| 349 | |
| 350 | std::shared_ptr<RJittedFilter> BookFilterJit(std::shared_ptr<RNodeBase> *prevNodeOnHeap, std::string_view name, |
| 351 | std::string_view expression, const ColumnNames_t &branches, |
| 352 | const RColumnRegister &colRegister, TTree *tree, RDataSource *ds); |
| 353 | |
| 354 | std::shared_ptr<RJittedDefine> BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, |
| 355 | RDataSource *ds, const RColumnRegister &colRegister, |
| 356 | const ColumnNames_t &branches, std::shared_ptr<RNodeBase> *prevNodeOnHeap); |
| 357 | |
| 358 | std::shared_ptr<RJittedDefine> BookDefinePerSampleJit(std::string_view name, std::string_view expression, |
| 359 | RLoopManager &lm, const RColumnRegister &colRegister, |
| 360 | std::shared_ptr<RNodeBase> *upcastNodeOnHeap); |
| 361 | |
| 362 | std::shared_ptr<RJittedVariation> |
| 363 | BookVariationJit(const std::vector<std::string> &colNames, std::string_view variationName, |
| 364 | const std::vector<std::string> &variationTags, std::string_view expression, RLoopManager &lm, |
| 365 | RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, |
| 366 | std::shared_ptr<RNodeBase> *upcastNodeOnHeap, bool isSingleColumn); |
| 367 | |
| 368 | std::string JitBuildAction(const ColumnNames_t &bl, std::shared_ptr<RDFDetail::RNodeBase> *prevNode, |
| 369 | const std::type_info &art, const std::type_info &at, void *rOnHeap, TTree *tree, |
| 370 | const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, |
| 371 | std::weak_ptr<RJittedAction> *jittedActionOnHeap, const bool vector2RVec = true); |
| 372 | |
| 373 | // Allocate a weak_ptr on the heap, return a pointer to it. The user is responsible for deleting this weak_ptr. |
| 374 | // This function is meant to be used by RInterface's methods that book code for jitting. |
| 375 | // The problem it solves is that we generate code to be lazily jitted with the addresses of certain objects in them, |
| 376 | // and we need to check those objects are still alive when the generated code is finally jitted and executed. |
| 377 | // So we pass addresses to weak_ptrs allocated on the heap to the jitted code, which is then responsible for |
| 378 | // the deletion of the weak_ptr object. |
| 379 | template <typename T> |
| 380 | std::weak_ptr<T> *MakeWeakOnHeap(const std::shared_ptr<T> &shPtr) |
| 381 | { |
| 382 | return new std::weak_ptr<T>(shPtr); |
| 383 | } |
| 384 | |
| 385 | // Same as MakeWeakOnHeap, but create a shared_ptr that makes sure the object is definitely kept alive. |
| 386 | template <typename T> |
| 387 | std::shared_ptr<T> *MakeSharedOnHeap(const std::shared_ptr<T> &shPtr) |
| 388 | { |
| 389 | return new std::shared_ptr<T>(shPtr); |
| 390 | } |
| 391 | |
| 392 | bool AtLeastOneEmptyString(const std::vector<std::string_view> strings); |
| 393 | |
| 394 | /// Take a shared_ptr<AnyNodeType> and return a shared_ptr<RNodeBase>. |
| 395 | /// This works for RLoopManager nodes as well as filters and ranges. |
| 396 | std::shared_ptr<RNodeBase> UpcastNode(std::shared_ptr<RNodeBase> ptr); |
| 397 | |
| 398 | ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, |
| 399 | const RColumnRegister &validDefines, RDataSource *ds); |
| 400 | |
| 401 | std::vector<std::string> GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister, |
| 402 | TTree *tree, RDataSource *ds, const std::string &context, |
| 403 | bool vector2RVec); |
| 404 | |
| 405 | std::vector<bool> FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedDSCols); |
| 406 | |
| 407 | template <typename T> |
| 408 | void AddDSColumnsHelper(const std::string &colName, RLoopManager &lm, RDataSource &ds, RColumnRegister &colRegister) |
| 409 | { |
| 410 | |
| 411 | if (colRegister.IsDefineOrAlias(colName)) |
| 412 | return; |
| 413 | |
| 414 | if (lm.HasDataSourceColumnReaders(colName, typeid(T))) |
| 415 | return; |
| 416 | |
| 417 | if (!ds.HasColumn(colName) && |
| 418 | lm.GetSuppressErrorsForMissingBranches().find(colName) == lm.GetSuppressErrorsForMissingBranches().end()) |
| 419 | return; |
| 420 | |
| 421 | const auto nSlots = lm.GetNSlots(); |
| 422 | std::vector<std::unique_ptr<RColumnReaderBase>> colReaders; |
| 423 | colReaders.reserve(nSlots); |
| 424 | |
| 425 | const auto valuePtrs = ds.GetColumnReaders<T>(colName); |
| 426 | if (!valuePtrs.empty()) { // we are using the old GetColumnReaders mechanism in this RDataSource |
| 427 | for (auto *ptr : valuePtrs) |
| 428 | colReaders.emplace_back(new RDSColumnReader<T>(ptr)); |
| 429 | |
| 430 | } else { // using the new GetColumnReaders mechanism |
| 431 | // TODO consider changing the interface so we return all of these for all slots in one go |
| 432 | for (auto slot = 0u; slot < lm.GetNSlots(); ++slot) |
| 433 | colReaders.emplace_back( |
| 434 | ROOT::Internal::RDF::CreateColumnReader(ds, slot, colName, typeid(T), /*treeReader*/ nullptr)); |
| 435 | } |
| 436 | |
| 437 | lm.AddDataSourceColumnReaders(colName, std::move(colReaders), typeid(T)); |
| 438 | } |
| 439 | |
| 440 | /// Take list of column names that must be defined, current map of custom columns, current list of defined column names, |
| 441 | /// and return a new map of custom columns (with the new datasource columns added to it) |
| 442 | template <typename... ColumnTypes> |
| 443 | void AddDSColumns(const std::vector<std::string> &requiredCols, RLoopManager &lm, RDataSource &ds, |
| 444 | TTraits::TypeList<ColumnTypes...>, RColumnRegister &colRegister) |
| 445 | { |
| 446 | // hack to expand a template parameter pack without c++17 fold expressions. |
| 447 | using expander = int[]; |
| 448 | int i = 0; |
| 449 | (void)expander{(AddDSColumnsHelper<ColumnTypes>(requiredCols[i], lm, ds, colRegister), ++i)..., 0}; |
| 450 | } |
| 451 | |
| 452 | // this function is meant to be called by the jitted code generated by BookFilterJit |
| 453 | template <typename F, typename PrevNode> |
| 454 | void JitFilterHelper(F &&f, const char **colsPtr, std::size_t colsSize, std::string_view name, |
| 455 | std::weak_ptr<RJittedFilter> *wkJittedFilter, std::shared_ptr<PrevNode> *prevNodeOnHeap, |
| 456 | RColumnRegister *colRegister) noexcept |
| 457 | { |
| 458 | if (wkJittedFilter->expired()) { |
| 459 | // The branch of the computation graph that needed this jitted code went out of scope between the type |
| 460 | // jitting was booked and the time jitting actually happened. Nothing to do other than cleaning up. |
| 461 | delete wkJittedFilter; |
| 462 | delete colRegister; |
| 463 | delete prevNodeOnHeap; |
| 464 | return; |
| 465 | } |
| 466 | |
| 467 | const ColumnNames_t cols(colsPtr, colsPtr + colsSize); |
| 468 | delete[] colsPtr; |
| 469 | |
| 470 | const auto jittedFilter = wkJittedFilter->lock(); |
| 471 | |
| 472 | // mock Filter logic -- validity checks and Define-ition of RDataSource columns |
| 473 | using Callable_t = std::decay_t<F>; |
| 474 | using F_t = RFilter<Callable_t, PrevNode>; |
| 475 | using ColTypes_t = typename TTraits::CallableTraits<Callable_t>::arg_types; |
| 476 | constexpr auto nColumns = ColTypes_t::list_size; |
| 477 | CheckFilter(f); |
| 478 | |
| 479 | auto &lm = *jittedFilter->GetLoopManagerUnchecked(); // RLoopManager must exist at this time |
| 480 | auto ds = lm.GetDataSource(); |
| 481 | |
| 482 | if (ds != nullptr) |
| 483 | AddDSColumns(cols, lm, *ds, ColTypes_t(), *colRegister); |
| 484 | |
| 485 | jittedFilter->SetFilter( |
| 486 | std::unique_ptr<RFilterBase>(new F_t(std::forward<F>(f), cols, *prevNodeOnHeap, *colRegister, name))); |
| 487 | // colRegister points to the columns structure in the heap, created before the jitted call so that the jitter can |
| 488 | // share data after it has lazily compiled the code. Here the data has been used and the memory can be freed. |
| 489 | delete colRegister; |
| 490 | delete prevNodeOnHeap; |
| 491 | delete wkJittedFilter; |
| 492 | } |
| 493 | |
| 494 | namespace DefineTypes { |
| 495 | struct RDefineTag {}; |
| 496 | struct RDefinePerSampleTag {}; |
| 497 | } |
| 498 | |
| 499 | template <typename F> |
| 500 | auto MakeDefineNode(DefineTypes::RDefineTag, std::string_view name, std::string_view dummyType, F &&f, |
| 501 | const ColumnNames_t &cols, RColumnRegister &colRegister, RLoopManager &lm) |
| 502 | { |
| 503 | return std::unique_ptr<RDefineBase>(new RDefine<std::decay_t<F>, ExtraArgsForDefine::None>( |
| 504 | name, dummyType, std::forward<F>(f), cols, colRegister, lm)); |
| 505 | } |
| 506 | |
| 507 | template <typename F> |
| 508 | auto MakeDefineNode(DefineTypes::RDefinePerSampleTag, std::string_view name, std::string_view dummyType, F &&f, |
| 509 | const ColumnNames_t &, RColumnRegister &, RLoopManager &lm) |
| 510 | { |
| 511 | return std::unique_ptr<RDefineBase>( |
| 512 | new RDefinePerSample<std::decay_t<F>>(name, dummyType, std::forward<F>(f), lm)); |
| 513 | } |
| 514 | |
| 515 | // Build a RDefine or a RDefinePerSample object and attach it to an existing RJittedDefine |
| 516 | // This function is meant to be called by jitted code right before starting the event loop. |
| 517 | // If colsPtr is null, build a RDefinePerSample (it has no input columns), otherwise a RDefine. |
| 518 | template <typename RDefineTypeTag, typename F> |
| 519 | void JitDefineHelper(F &&f, const char **colsPtr, std::size_t colsSize, std::string_view name, RLoopManager *lm, |
| 520 | std::weak_ptr<RJittedDefine> *wkJittedDefine, RColumnRegister *colRegister, |
| 521 | std::shared_ptr<RNodeBase> *prevNodeOnHeap) noexcept |
| 522 | { |
| 523 | // a helper to delete objects allocated before jitting, so that the jitter can share data with lazily jitted code |
| 524 | auto doDeletes = [&] { |
| 525 | delete wkJittedDefine; |
| 526 | delete colRegister; |
| 527 | delete prevNodeOnHeap; |
| 528 | delete[] colsPtr; |
| 529 | }; |
| 530 | |
| 531 | if (wkJittedDefine->expired()) { |
| 532 | // The branch of the computation graph that needed this jitted code went out of scope between the type |
| 533 | // jitting was booked and the time jitting actually happened. Nothing to do other than cleaning up. |
| 534 | doDeletes(); |
| 535 | return; |
| 536 | } |
| 537 | |
| 538 | const ColumnNames_t cols(colsPtr, colsPtr + colsSize); |
| 539 | |
| 540 | auto jittedDefine = wkJittedDefine->lock(); |
| 541 | |
| 542 | using Callable_t = std::decay_t<F>; |
| 543 | using ColTypes_t = typename TTraits::CallableTraits<Callable_t>::arg_types; |
| 544 | |
| 545 | auto ds = lm->GetDataSource(); |
| 546 | if (ds != nullptr && colsPtr) |
| 547 | AddDSColumns(cols, *lm, *ds, ColTypes_t(), *colRegister); |
| 548 | |
| 549 | // will never actually be used (trumped by jittedDefine->GetTypeName()), but we set it to something meaningful |
| 550 | // to help devs debugging |
| 551 | const auto dummyType = "jittedCol_t"; |
| 552 | // use unique_ptr<RDefineBase> instead of make_unique<NewCol_t> to reduce jit/compile-times |
| 553 | std::unique_ptr<RDefineBase> newCol{ |
| 554 | MakeDefineNode(RDefineTypeTag{}, name, dummyType, std::forward<F>(f), cols, *colRegister, *lm)}; |
| 555 | jittedDefine->SetDefine(std::move(newCol)); |
| 556 | |
| 557 | doDeletes(); |
| 558 | } |
| 559 | |
| 560 | template <bool IsSingleColumn, typename F> |
| 561 | void JitVariationHelper(F &&f, const char **colsPtr, std::size_t colsSize, const char **variedCols, |
| 562 | std::size_t variedColsSize, const char **variationTags, std::size_t variationTagsSize, |
| 563 | std::string_view variationName, RLoopManager *lm, |
| 564 | std::weak_ptr<RJittedVariation> *wkJittedVariation, RColumnRegister *colRegister, |
| 565 | std::shared_ptr<RNodeBase> *prevNodeOnHeap) noexcept |
| 566 | { |
| 567 | // a helper to delete objects allocated before jitting, so that the jitter can share data with lazily jitted code |
| 568 | auto doDeletes = [&] { |
| 569 | delete[] colsPtr; |
| 570 | delete[] variedCols; |
| 571 | delete[] variationTags; |
| 572 | |
| 573 | delete wkJittedVariation; |
| 574 | delete colRegister; |
| 575 | delete prevNodeOnHeap; |
| 576 | }; |
| 577 | |
| 578 | if (wkJittedVariation->expired()) { |
| 579 | // The branch of the computation graph that needed this jitted variation went out of scope between the type |
| 580 | // jitting was booked and the time jitting actually happened. Nothing to do other than cleaning up. |
| 581 | doDeletes(); |
| 582 | return; |
| 583 | } |
| 584 | |
| 585 | const ColumnNames_t inputColNames(colsPtr, colsPtr + colsSize); |
| 586 | std::vector<std::string> variedColNames(variedCols, variedCols + variedColsSize); |
| 587 | std::vector<std::string> tags(variationTags, variationTags + variationTagsSize); |
| 588 | |
| 589 | auto jittedVariation = wkJittedVariation->lock(); |
| 590 | |
| 591 | using Callable_t = std::decay_t<F>; |
| 592 | using ColTypes_t = typename TTraits::CallableTraits<Callable_t>::arg_types; |
| 593 | |
| 594 | auto ds = lm->GetDataSource(); |
| 595 | if (ds != nullptr) |
| 596 | AddDSColumns(inputColNames, *lm, *ds, ColTypes_t(), *colRegister); |
| 597 | |
| 598 | // use unique_ptr<RDefineBase> instead of make_unique<NewCol_t> to reduce jit/compile-times |
| 599 | std::unique_ptr<RVariationBase> newVariation{new RVariation<std::decay_t<F>, IsSingleColumn>( |
| 600 | std::move(variedColNames), variationName, std::forward<F>(f), std::move(tags), jittedVariation->GetTypeName(), |
| 601 | *colRegister, *lm, inputColNames)}; |
| 602 | jittedVariation->SetVariation(std::move(newVariation)); |
| 603 | |
| 604 | doDeletes(); |
| 605 | } |
| 606 | |
| 607 | /// Convenience function invoked by jitted code to build action nodes at runtime |
| 608 | template <typename ActionTag, typename... ColTypes, typename PrevNodeType, typename HelperArgType> |
| 609 | void CallBuildAction(std::shared_ptr<PrevNodeType> *prevNodeOnHeap, const char **colsPtr, std::size_t colsSize, |
| 610 | const unsigned int nSlots, std::shared_ptr<HelperArgType> *helperArgOnHeap, |
| 611 | std::weak_ptr<RJittedAction> *wkJittedActionOnHeap, RColumnRegister *colRegister) noexcept |
| 612 | { |
| 613 | // a helper to delete objects allocated before jitting, so that the jitter can share data with lazily jitted code |
| 614 | auto doDeletes = [&] { |
| 615 | delete[] colsPtr; |
| 616 | delete helperArgOnHeap; |
| 617 | delete wkJittedActionOnHeap; |
| 618 | // colRegister must be deleted before prevNodeOnHeap because their dtor needs the RLoopManager to be alive |
| 619 | // and prevNodeOnHeap is what keeps it alive if the rest of the computation graph is already out of scope |
| 620 | delete colRegister; |
| 621 | delete prevNodeOnHeap; |
| 622 | }; |
| 623 | |
| 624 | if (wkJittedActionOnHeap->expired()) { |
| 625 | // The branch of the computation graph that needed this jitted variation went out of scope between the type |
| 626 | // jitting was booked and the time jitting actually happened. Nothing to do other than cleaning up. |
| 627 | doDeletes(); |
| 628 | return; |
| 629 | } |
| 630 | |
| 631 | const ColumnNames_t cols(colsPtr, colsPtr + colsSize); |
| 632 | |
| 633 | auto jittedActionOnHeap = wkJittedActionOnHeap->lock(); |
| 634 | |
| 635 | // if we are here it means we are jitting, if we are jitting the loop manager must be alive |
| 636 | auto &prevNodePtr = *prevNodeOnHeap; |
| 637 | auto &loopManager = *prevNodePtr->GetLoopManagerUnchecked(); |
| 638 | using ColTypes_t = TypeList<ColTypes...>; |
| 639 | constexpr auto nColumns = ColTypes_t::list_size; |
| 640 | auto ds = loopManager.GetDataSource(); |
| 641 | if (ds != nullptr) |
| 642 | AddDSColumns(cols, loopManager, *ds, ColTypes_t(), *colRegister); |
| 643 | |
| 644 | auto actionPtr = BuildAction<ColTypes...>(cols, std::move(*helperArgOnHeap), nSlots, std::move(prevNodePtr), |
| 645 | ActionTag{}, *colRegister); |
| 646 | jittedActionOnHeap->SetAction(std::move(actionPtr)); |
| 647 | |
| 648 | doDeletes(); |
| 649 | } |
| 650 | |
| 651 | /// The contained `type` alias is `double` if `T == RInferredType`, `U` if `T == std::container<U>`, `T` otherwise. |
| 652 | template <typename T, bool Container = IsDataContainer<T>::value && !std::is_same<T, std::string>::value> |
| 653 | struct RMinReturnType { |
| 654 | using type = T; |
| 655 | }; |
| 656 | |
| 657 | template <> |
| 658 | struct RMinReturnType<RInferredType, false> { |
| 659 | using type = double; |
| 660 | }; |
| 661 | |
| 662 | template <typename T> |
| 663 | struct RMinReturnType<T, true> { |
| 664 | using type = TTraits::TakeFirstParameter_t<T>; |
| 665 | }; |
| 666 | |
| 667 | // return wrapper around f that prepends an `unsigned int slot` parameter |
| 668 | template <typename R, typename F, typename... Args> |
| 669 | std::function<R(unsigned int, Args...)> AddSlotParameter(F &f, TypeList<Args...>) |
| 670 | { |
| 671 | return [f](unsigned int, Args... a) mutable -> R { return f(a...); }; |
| 672 | } |
| 673 | |
| 674 | template <typename ColType, typename... Rest> |
| 675 | struct RNeedJittingHelper { |
| 676 | static constexpr bool value = RNeedJittingHelper<Rest...>::value; |
| 677 | }; |
| 678 | |
| 679 | template <typename... Rest> |
| 680 | struct RNeedJittingHelper<RInferredType, Rest...> { |
| 681 | static constexpr bool value = true; |
| 682 | }; |
| 683 | |
| 684 | template <typename T> |
| 685 | struct RNeedJittingHelper<T> { |
| 686 | static constexpr bool value = false; |
| 687 | }; |
| 688 | |
| 689 | template <> |
| 690 | struct RNeedJittingHelper<RInferredType> { |
| 691 | static constexpr bool value = true; |
| 692 | }; |
| 693 | |
| 694 | template <typename ...ColTypes> |
| 695 | struct RNeedJitting { |
| 696 | static constexpr bool value = RNeedJittingHelper<ColTypes...>::value; |
| 697 | }; |
| 698 | |
| 699 | template <> |
| 700 | struct RNeedJitting<> { |
| 701 | static constexpr bool value = false; |
| 702 | }; |
| 703 | |
| 704 | /////////////////////////////////////////////////////////////////////////////// |
| 705 | /// Check preconditions for RInterface::Aggregate: |
| 706 | /// - the aggregator callable must have signature `U(U,T)` or `void(U&,T)`. |
| 707 | /// - the merge callable must have signature `U(U,U)` or `void(std::vector<U>&)` |
| 708 | template <typename R, typename Merge, typename U, typename T, typename decayedU = std::decay_t<U>, |
| 709 | typename mergeArgsNoDecay_t = typename CallableTraits<Merge>::arg_types_nodecay, |
| 710 | typename mergeArgs_t = typename CallableTraits<Merge>::arg_types, |
| 711 | typename mergeRet_t = typename CallableTraits<Merge>::ret_type> |
| 712 | void CheckAggregate(TypeList<U, T>) |
| 713 | { |
| 714 | constexpr bool isAggregatorOk = |
| 715 | (std::is_same<R, decayedU>::value) || (std::is_same<R, void>::value && std::is_lvalue_reference<U>::value); |
| 716 | static_assert(isAggregatorOk, "aggregator function must have signature `U(U,T)` or `void(U&,T)`"); |
| 717 | constexpr bool isMergeOk = |
| 718 | (std::is_same<TypeList<decayedU, decayedU>, mergeArgs_t>::value && std::is_same<decayedU, mergeRet_t>::value) || |
| 719 | (std::is_same<TypeList<std::vector<decayedU> &>, mergeArgsNoDecay_t>::value && |
| 720 | std::is_same<void, mergeRet_t>::value); |
| 721 | static_assert(isMergeOk, "merge function must have signature `U(U,U)` or `void(std::vector<U>&)`"); |
| 722 | } |
| 723 | |
| 724 | /////////////////////////////////////////////////////////////////////////////// |
| 725 | /// This overload of CheckAggregate is called when the aggregator takes more than two arguments |
| 726 | template <typename R, typename T> |
| 727 | void CheckAggregate(T) |
| 728 | { |
| 729 | static_assert(sizeof(T) == 0, "aggregator function must take exactly two arguments"); |
| 730 | } |
| 731 | |
| 732 | /////////////////////////////////////////////////////////////////////////////// |
| 733 | /// Check as many template parameters were passed as the number of column names, throw if this is not the case. |
| 734 | void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames); |
| 735 | |
| 736 | /// Return local BranchNames or default BranchNames according to which one should be used |
| 737 | const ColumnNames_t SelectColumns(unsigned int nArgs, const ColumnNames_t &bl, const ColumnNames_t &defBl); |
| 738 | |
| 739 | /// Check whether column names refer to a valid branch of a TTree or have been `Define`d. Return invalid column names. |
| 740 | ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const ColumnNames_t &datasetColumns, |
| 741 | const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns); |
| 742 | |
| 743 | /// Returns the list of Filters defined in the whole graph |
| 744 | std::vector<std::string> GetFilterNames(const std::shared_ptr<RLoopManager> &loopManager); |
| 745 | |
| 746 | /// Returns the list of Filters defined in the branch |
| 747 | template <typename NodeType> |
| 748 | std::vector<std::string> GetFilterNames(const std::shared_ptr<NodeType> &node) |
| 749 | { |
| 750 | std::vector<std::string> filterNames; |
| 751 | node->AddFilterName(filterNames); |
| 752 | return filterNames; |
| 753 | } |
| 754 | |
| 755 | struct ParsedTreePath { |
| 756 | std::string fTreeName; |
| 757 | std::string fDirName; |
| 758 | }; |
| 759 | |
| 760 | ParsedTreePath ParseTreePath(std::string_view fullTreeName); |
| 761 | |
| 762 | // Check if a condition is true for all types |
| 763 | template <bool...> |
| 764 | struct TBoolPack; |
| 765 | |
| 766 | template <bool... bs> |
| 767 | using IsTrueForAllImpl_t = typename std::is_same<TBoolPack<bs..., true>, TBoolPack<true, bs...>>; |
| 768 | |
| 769 | template <bool... Conditions> |
| 770 | struct TEvalAnd { |
| 771 | static constexpr bool value = IsTrueForAllImpl_t<Conditions...>::value; |
| 772 | }; |
| 773 | |
| 774 | // Check if a class is a specialisation of stl containers templates |
| 775 | // clang-format off |
| 776 | |
| 777 | template <typename> |
| 778 | struct IsList_t : std::false_type {}; |
| 779 | |
| 780 | template <typename T> |
| 781 | struct IsList_t<std::list<T>> : std::true_type {}; |
| 782 | |
| 783 | template <typename> |
| 784 | struct IsDeque_t : std::false_type {}; |
| 785 | |
| 786 | template <typename T> |
| 787 | struct IsDeque_t<std::deque<T>> : std::true_type {}; |
| 788 | // clang-format on |
| 789 | |
| 790 | void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols); |
| 791 | |
| 792 | template <typename T> |
| 793 | struct InnerValueType { |
| 794 | using type = T; // fallback for when T is not a nested RVec |
| 795 | }; |
| 796 | |
| 797 | template <typename Elem> |
| 798 | struct InnerValueType<ROOT::VecOps::RVec<ROOT::VecOps::RVec<Elem>>> { |
| 799 | using type = Elem; |
| 800 | }; |
| 801 | |
| 802 | template <typename T> |
| 803 | using InnerValueType_t = typename InnerValueType<T>::type; |
| 804 | |
| 805 | std::pair<std::vector<std::string>, std::vector<std::string>> |
| 806 | AddSizeBranches(const std::vector<std::string> &branches, ROOT::RDF::RDataSource *ds, |
| 807 | std::vector<std::string> &&colsWithoutAliases, std::vector<std::string> &&colsWithAliases); |
| 808 | |
| 809 | void RemoveDuplicates(ColumnNames_t &columnNames); |
| 810 | void RemoveRNTupleSubFields(ColumnNames_t &columnNames); |
| 811 | |
| 812 | } // namespace RDF |
| 813 | } // namespace Internal |
| 814 | |
| 815 | namespace Detail { |
| 816 | namespace RDF { |
| 817 | |
| 818 | /// The aliased type is `double` if `T == RInferredType`, `U` if `T == container<U>`, `T` otherwise. |
| 819 | template <typename T> |
| 820 | using MinReturnType_t = typename RDFInternal::RMinReturnType<T>::type; |
| 821 | |
| 822 | template <typename T> |
| 823 | using MaxReturnType_t = MinReturnType_t<T>; |
| 824 | |
| 825 | template <typename T> |
| 826 | using SumReturnType_t = MinReturnType_t<T>; |
| 827 | |
| 828 | } // namespace RDF |
| 829 | } // namespace Detail |
| 830 | } // namespace ROOT |
| 831 | |
| 832 | /// \endcond |
| 833 | |
| 834 | #endif |