Bug Summary

File:cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/root/ROOT/RDF/RInterface.hxx
Warning:line 299, column 7
Potential leak of memory pointed to by 'upcastNodeOnHeap'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -O3 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name SVDdEdxValidationAlgorithm.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/data/b2soft/buildbot/development/build -fcoverage-compilation-dir=/data/b2soft/buildbot/development/build -resource-dir /cvmfs/belle.cern.ch/el9/externals/v02-04-00/Linux_x86_64/common/lib/clang/21 -isystem /cvmfs/belle.cern.ch/el9/externals/v02-04-00/Linux_x86_64/common/include/c++ -isystem /cvmfs/belle.cern.ch/el9/externals/v02-04-00/Linux_x86_64/common/include/c++/x86_64-redhat-linux -isystem /cvmfs/belle.cern.ch/el9/externals/v02-04-00/Linux_x86_64/common/include/c++/backward -isystem /cvmfs/belle.cern.ch/el9/externals/v02-04-00/include -isystem /cvmfs/belle.cern.ch/el9/externals/v02-04-00/Linux_x86_64/common/include/python3.12 -isystem /cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/CLHEP -isystem /cvmfs/belle.cern.ch/el9/externals/v02-04-00/Linux_x86_64/common/include/Geant4 -isystem /cvmfs/belle.cern.ch/el9/externals/v02-04-00/Linux_x86_64/common/include -isystem /cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/root -isystem /cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/belle_legacy -I include/ -D _PACKAGE_="svd" -D G4UI_USE_TCSH -D RaveDllExport= -D HAS_SQLITE -D HAS_CALLGRIND -I include -I /cvmfs/belle.cern.ch/el9/externals/v02-04-00/Linux_x86_64/common/include/libxml2 -internal-isystem /cvmfs/belle.cern.ch/el9/externals/v02-04-00/Linux_x86_64/common/bin/../lib64/gcc/x86_64-redhat-linux/15.2.0/../../../../include/c++ -internal-isystem /cvmfs/belle.cern.ch/el9/externals/v02-04-00/Linux_x86_64/common/bin/../lib64/gcc/x86_64-redhat-linux/15.2.0/../../../../include/c++/x86_64-redhat-linux -internal-isystem /cvmfs/belle.cern.ch/el9/externals/v02-04-00/Linux_x86_64/common/bin/../lib64/gcc/x86_64-redhat-linux/15.2.0/../../../../include/c++/backward -internal-isystem /cvmfs/belle.cern.ch/el9/externals/v02-04-00/Linux_x86_64/common/lib/clang/21/include -internal-isystem /usr/local/include -internal-isystem /cvmfs/belle.cern.ch/el9/externals/v02-04-00/Linux_x86_64/common/bin/../lib64/gcc/x86_64-redhat-linux/15.2.0/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -Wno-missing-braces -Wno-unused-command-line-argument -std=c++20 -fdeprecated-macro -ferror-limit 19 -fgnuc-version=4.2.1 -fno-implicit-modules -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /scan_build/2026-05-31-004316-385593-1 -x c++ svd/calibration/src/SVDdEdxValidationAlgorithm.cc

svd/calibration/src/SVDdEdxValidationAlgorithm.cc

1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <svd/calibration/SVDdEdxValidationAlgorithm.h>
10
11#include <tuple>
12#include <vector>
13#include <string>
14
15#include <TROOT.h>
16#include <TStyle.h>
17#include <TMath.h>
18#include <TFile.h>
19#include <TColor.h>
20#include <TLegend.h>
21#include <TCanvas.h>
22#include <TH1D.h>
23#include <TH1D.h>
24#include <TH2D.h>
25#include <TAxis.h>
26#include <TGraph.h>
27#include <TMultiGraph.h>
28#include <TCut.h>
29
30#include <RooDataSet.h>
31#include <RooRealVar.h>
32#include <RooAddPdf.h>
33#include <RooGaussian.h>
34#include <RooChebychev.h>
35#include <RooBifurGauss.h>
36#include <RooDstD0BG.h>
37#include <RooAbsDataStore.h>
38#include <RooTreeDataStore.h>
39#include <RooMsgService.h>
40#include <RooStats/SPlot.h>
41#include <ROOT/RDataFrame.hxx>
42
43using namespace ROOT;
44using namespace RooFit;
45using namespace Belle2;
46
47SVDdEdxValidationAlgorithm::SVDdEdxValidationAlgorithm() : CalibrationAlgorithm("SVDdEdxValidationCollector"),
48 m_fullValidation(true), m_isMakePlots(true)
49{
50 setDescription("SVD dE/dx validation algorithm");
51}
52
53/* Main calibration method */
54CalibrationAlgorithm::EResult SVDdEdxValidationAlgorithm::calibrate()
55{
56 gROOT(ROOT::GetROOT())->SetBatch(true);
57
58 // const auto exprun = getRunList()[0];
59 // B2INFO("ExpRun used for calibration: " << exprun.first << " " << exprun.second);
60
61 // Get data objects
62 auto TTreeLambda = getObjectPtr<TTree>("Lambda");
63 auto TTreeDstar = getObjectPtr<TTree>("Dstar");
64 auto TTreeGamma = getObjectPtr<TTree>("Gamma");
65
66 if (TTreeLambda->GetEntries() < m_MinEvtsPerTree) {
67 B2WARNING("Not enough data for calibration.")do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Warning, 0, "svd")) { { LogVariableStream varStream
; varStream << "Not enough data for calibration."; Belle2
::LogSystem::Instance().sendMessage(Belle2::LogMessage(Belle2
::LogConfig::c_Warning, std::move(varStream), "svd", __PRETTY_FUNCTION__
, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 67, 0)
); }; } } while(false)
;
68 return c_NotEnoughData;
69 }
70
71 // call the calibration functions
72 TTree* TTreeLambdaSW = LambdaMassFit(TTreeLambda);
73 TTree* TTreeDstarSW = DstarMassFit(TTreeDstar);
74 TTree* TTreeGammaWrap = TTreeGamma.get();
75
76 std::vector<TString> PIDDetectors;
77 PIDDetectors.push_back("SVDonly");
78 if (m_fullValidation) {
79 PIDDetectors.push_back("ALL");
80 PIDDetectors.push_back("noSVD");
81 }
82
83 std::map<TTree*, TString> SWeightNameMap = {
84 {TTreeGammaWrap, "1"},
85 {TTreeDstarSW, "nSignalDstar_sw"},
86 {TTreeLambdaSW, "nSignalLambda_sw"}
87 };
88
89 for (const TString& PIDDetectorsName : PIDDetectors) {
90 PlotEfficiencyPlots(PIDDetectorsName, TTreeGammaWrap, SWeightNameMap[TTreeGammaWrap], "FirstElectron", "electron", TTreeDstarSW,
91 SWeightNameMap[TTreeDstarSW], "PionD", "pion",
92 "BinaryElectronPionID",
93 "0.5", m_NumEffBins, 0., m_MomHighEff);
94 PlotEfficiencyPlots(PIDDetectorsName, TTreeGammaWrap, SWeightNameMap[TTreeGammaWrap], "FirstElectron", "electron", TTreeDstarSW,
95 SWeightNameMap[TTreeDstarSW], "Kaon", "kaon",
96 "BinaryElectronKaonID", "0.5",
97 m_NumEffBins, 0., m_MomHighEff);
98 PlotEfficiencyPlots(PIDDetectorsName, TTreeLambdaSW, SWeightNameMap[TTreeLambdaSW], "Proton", "proton", TTreeDstarSW,
99 SWeightNameMap[TTreeDstarSW], "PionD", "pion",
100 "BinaryProtonPionID", "0.5",
101 m_NumEffBins, 0.25, m_MomHighEff);
102 PlotEfficiencyPlots(PIDDetectorsName, TTreeLambdaSW, SWeightNameMap[TTreeLambdaSW], "Proton", "proton", TTreeDstarSW,
103 SWeightNameMap[TTreeDstarSW], "Kaon", "kaon",
104 "BinaryProtonKaonID", "0.5",
105 m_NumEffBins, 0.25, m_MomHighEff);
106 PlotEfficiencyPlots(PIDDetectorsName, TTreeDstarSW, SWeightNameMap[TTreeDstarSW], "PionD", "pion", TTreeDstarSW,
107 SWeightNameMap[TTreeDstarSW],
108 "Kaon", "kaon",
109 "BinaryPionKaonID", "0.5", m_NumEffBins,
110 0., m_MomHighEff);
111 PlotEfficiencyPlots(PIDDetectorsName, TTreeDstarSW, SWeightNameMap[TTreeDstarSW], "Kaon", "kaon", TTreeDstarSW,
112 SWeightNameMap[TTreeDstarSW],
113 "PionD", "pion",
114 "BinaryKaonPionID", "0.5", m_NumEffBins,
115 0., m_MomHighEff);
116 }
117
118 if (m_fullValidation) {
119 PlotROCCurve(TTreeGammaWrap, SWeightNameMap[TTreeGammaWrap], "FirstElectron", "electron", TTreeDstarSW,
120 SWeightNameMap[TTreeDstarSW], "PionD",
121 "pion", "BinaryElectronPionID");
122 PlotROCCurve(TTreeGammaWrap, SWeightNameMap[TTreeGammaWrap], "FirstElectron", "electron", TTreeDstarSW,
123 SWeightNameMap[TTreeDstarSW], "Kaon",
124 "kaon", "BinaryElectronKaonID");
125 PlotROCCurve(TTreeLambdaSW, SWeightNameMap[TTreeLambdaSW], "Proton", "proton", TTreeDstarSW, SWeightNameMap[TTreeDstarSW], "PionD",
126 "pion",
127 "BinaryProtonPionID");
128 PlotROCCurve(TTreeLambdaSW, SWeightNameMap[TTreeLambdaSW], "Proton", "proton", TTreeDstarSW, SWeightNameMap[TTreeDstarSW], "Kaon",
129 "kaon",
130 "BinaryProtonKaonID");
131 PlotROCCurve(TTreeDstarSW, SWeightNameMap[TTreeDstarSW], "PionD", "pion", TTreeDstarSW, SWeightNameMap[TTreeDstarSW], "Kaon",
132 "kaon",
133 "BinaryPionKaonID");
134 PlotROCCurve(TTreeDstarSW, SWeightNameMap[TTreeDstarSW], "Kaon", "kaon", TTreeDstarSW, SWeightNameMap[TTreeDstarSW], "PionD",
135 "pion",
136 "BinaryKaonPionID");
137 }
138 B2INFO("SVD dE/dx validation done!")do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream
; varStream << "SVD dE/dx validation done!"; Belle2::LogSystem
::Instance().sendMessage(Belle2::LogMessage(Belle2::LogConfig
::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc"
, 138, 0)); }; } } while(false)
;
139
140 return c_OK;
141}
142
143// generic efficiency and fake rate
144void SVDdEdxValidationAlgorithm::PlotEfficiencyPlots(const TString& PIDDetectorsName, TTree* SignalTree, TString SignalWeightName,
145 TString SignalVarName, TString SignalVarNameFull, TTree* FakeTree, TString FakeWeightName, TString FakeVarName,
146 TString FakeVarNameFull, TString PIDVarName, TString PIDCut, unsigned int nbins, double MomLow, double MomHigh)
147{
148
149 if ((SignalTree == nullptr) || (FakeTree == nullptr)) {
150 B2FATAL("Invalid dataset, stopping here")do { { LogVariableStream varStream; varStream << "Invalid dataset, stopping here"
; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage
(Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__
, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 150, 0
)); }; exit(1); } while(false)
;
151 }
152
153 if ((SignalTree->GetEntries() == 0) || (FakeTree->GetEntries() == 0)) {
154 B2FATAL("The dataset is empty, stopping here")do { { LogVariableStream varStream; varStream << "The dataset is empty, stopping here"
; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage
(Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__
, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 154, 0
)); }; exit(1); } while(false)
;
155 }
156
157 if ((SignalTree->GetBranch(Form("%sMomentum", SignalVarName.Data())) == nullptr)
158 || (FakeTree->GetBranch(Form("%sMomentum", FakeVarName.Data())) == nullptr)) {
159 B2FATAL("Check the provided branch name, stopping here")do { { LogVariableStream varStream; varStream << "Check the provided branch name, stopping here"
; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage
(Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__
, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 159, 0
)); }; exit(1); } while(false)
;
160 }
161
162 TString SignalFiducialCut = "(1>0)"; // placeholder for a possible sanity cut
163 TString FakesFiducialCut = "(1>0)";
164
165 // Produce the plots of the SVD PID distribution
166 if (PIDDetectorsName == "SVDonly") {
167 SignalTree->Draw(Form("%s%s%s>>hSignalPIDDistribution(100,0.,1.)", SignalVarName.Data(), PIDVarName.Data(),
168 PIDDetectorsName.Data()),
169 SignalWeightName + Form("* (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), MomLow, SignalVarName.Data(), MomHigh), "goff");
170 TH1D* hSignalPIDDistribution = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalPIDDistribution"));
171 hSignalPIDDistribution->Scale(1. / hSignalPIDDistribution->Integral());
172 hSignalPIDDistribution->GetXaxis()->SetTitle(PIDVarName + PIDDetectorsName + " for " + SignalVarNameFull);
173 hSignalPIDDistribution->GetYaxis()->SetTitle("Candidates, normalised");
174 hSignalPIDDistribution->SetMaximum(1.35 * hSignalPIDDistribution->GetMaximum());
175
176 SignalTree->Draw(Form("%sElectronLLSVDonly>>hSignalElectronLLDistribution(100,-17.,3.)", SignalVarName.Data()),
177 SignalWeightName + Form("* (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), MomLow, SignalVarName.Data(), MomHigh), "goff");
178 TH1D* hSignalElectronLLDistribution = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalElectronLLDistribution"));
179 SignalTree->Draw(Form("%sPionLLSVDonly>>hSignalPionLLDistribution(100,-17.,3.)", SignalVarName.Data()),
180 SignalWeightName + Form("* (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), MomLow, SignalVarName.Data(), MomHigh), "goff");
181 TH1D* hSignalPionLLDistribution = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalPionLLDistribution"));
182 SignalTree->Draw(Form("%sKaonLLSVDonly>>hSignalKaonLLDistribution(100,-17.,3.)", SignalVarName.Data()),
183 SignalWeightName + Form("* (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), MomLow, SignalVarName.Data(), MomHigh), "goff");
184 TH1D* hSignalKaonLLDistribution = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalKaonLLDistribution"));
185 SignalTree->Draw(Form("%sProtonLLSVDonly>>hSignalProtonLLDistribution(100,-17.,3.)", SignalVarName.Data()),
186 SignalWeightName + Form("* (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), MomLow, SignalVarName.Data(), MomHigh), "goff");
187 TH1D* hSignalProtonLLDistribution = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalProtonLLDistribution"));
188
189 // same but only for tracks that are expected to actually have SVD dEdx info
190 SignalTree->Draw(Form("%sElectronLLSVDonly>>hSignalElectronLLDistributionGood(100,-17.,3.)", SignalVarName.Data()),
191 SignalWeightName + Form("* (%sSVDdEdx>0) * (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), SignalVarName.Data(), MomLow,
192 SignalVarName.Data(), MomHigh), "goff");
193 TH1D* hSignalElectronLLDistributionGood = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalElectronLLDistributionGood"));
194 SignalTree->Draw(Form("%sPionLLSVDonly>>hSignalPionLLDistributionGood(100,-17.,3.)", SignalVarName.Data()),
195 SignalWeightName + Form("* (%sSVDdEdx>0) * (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), SignalVarName.Data(), MomLow,
196 SignalVarName.Data(), MomHigh), "goff");
197 TH1D* hSignalPionLLDistributionGood = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalPionLLDistributionGood"));
198 SignalTree->Draw(Form("%sKaonLLSVDonly>>hSignalKaonLLDistributionGood(100,-17.,3.)", SignalVarName.Data()),
199 SignalWeightName + Form("* (%sSVDdEdx>0) * (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), SignalVarName.Data(), MomLow,
200 SignalVarName.Data(), MomHigh), "goff");
201 TH1D* hSignalKaonLLDistributionGood = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalKaonLLDistributionGood"));
202 SignalTree->Draw(Form("%sProtonLLSVDonly>>hSignalProtonLLDistributionGood(100,-17.,3.)", SignalVarName.Data()),
203 SignalWeightName + Form("* (%sSVDdEdx>0) * (%sMomentum>%f && %sMomentum<%f)", SignalVarName.Data(), SignalVarName.Data(), MomLow,
204 SignalVarName.Data(), MomHigh), "goff");
205 TH1D* hSignalProtonLLDistributionGood = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSignalProtonLLDistributionGood"));
206
207
208 hSignalElectronLLDistribution->Scale(1. / hSignalElectronLLDistribution->Integral());
209 hSignalPionLLDistribution->Scale(1. / hSignalPionLLDistribution->Integral());
210 hSignalKaonLLDistribution->Scale(1. / hSignalKaonLLDistribution->Integral());
211 hSignalProtonLLDistribution->Scale(1. / hSignalProtonLLDistribution->Integral());
212
213 hSignalElectronLLDistributionGood->Scale(1. / hSignalElectronLLDistributionGood->Integral());
214 hSignalPionLLDistributionGood->Scale(1. / hSignalPionLLDistributionGood->Integral());
215 hSignalKaonLLDistributionGood->Scale(1. / hSignalKaonLLDistributionGood->Integral());
216 hSignalProtonLLDistributionGood->Scale(1. / hSignalProtonLLDistributionGood->Integral());
217
218 hSignalElectronLLDistribution->GetXaxis()->SetTitle("ElectronLL (SVD only) for " + SignalVarNameFull);
219 hSignalElectronLLDistribution->GetYaxis()->SetTitle("Candidates, normalised");
220 hSignalElectronLLDistribution->SetMaximum(1.35 * hSignalElectronLLDistribution->GetMaximum());
221
222 hSignalPionLLDistribution->GetXaxis()->SetTitle("PionLL (SVD only) for " + SignalVarNameFull);
223 hSignalPionLLDistribution->GetYaxis()->SetTitle("Candidates, normalised");
224 hSignalPionLLDistribution->SetMaximum(1.35 * hSignalPionLLDistribution->GetMaximum());
225
226 hSignalKaonLLDistribution->GetXaxis()->SetTitle("KaonLL (SVD only) for " + SignalVarNameFull);
227 hSignalKaonLLDistribution->GetYaxis()->SetTitle("Candidates, normalised");
228 hSignalKaonLLDistribution->SetMaximum(1.35 * hSignalKaonLLDistribution->GetMaximum());
229
230 hSignalProtonLLDistribution->GetXaxis()->SetTitle("ProtonLL (SVD only) for " + SignalVarNameFull);
231 hSignalProtonLLDistribution->GetYaxis()->SetTitle("Candidates, normalised");
232 hSignalProtonLLDistribution->SetMaximum(1.35 * hSignalProtonLLDistribution->GetMaximum());
233
234 hSignalElectronLLDistributionGood->GetXaxis()->SetTitle("ElectronLL (SVD only) for " + SignalVarNameFull);
235 hSignalElectronLLDistributionGood->GetYaxis()->SetTitle("Candidates, normalised");
236 hSignalElectronLLDistributionGood->SetMaximum(1.35 * hSignalElectronLLDistributionGood->GetMaximum());
237
238 hSignalPionLLDistributionGood->GetXaxis()->SetTitle("PionLL (SVD only) for " + SignalVarNameFull);
239 hSignalPionLLDistributionGood->GetYaxis()->SetTitle("Candidates, normalised");
240 hSignalPionLLDistributionGood->SetMaximum(1.35 * hSignalPionLLDistributionGood->GetMaximum());
241
242 hSignalKaonLLDistributionGood->GetXaxis()->SetTitle("KaonLL (SVD only) for " + SignalVarNameFull);
243 hSignalKaonLLDistributionGood->GetYaxis()->SetTitle("Candidates, normalised");
244 hSignalKaonLLDistributionGood->SetMaximum(1.35 * hSignalKaonLLDistributionGood->GetMaximum());
245
246 hSignalProtonLLDistributionGood->GetXaxis()->SetTitle("ProtonLL (SVD only) for " + SignalVarNameFull);
247 hSignalProtonLLDistributionGood->GetYaxis()->SetTitle("Candidates, normalised");
248 hSignalProtonLLDistributionGood->SetMaximum(1.35 * hSignalProtonLLDistributionGood->GetMaximum());
249
250 TCanvas* DistribCanvas = new TCanvas("DistribCanvas", "", 600, 600);
251 gPad(TVirtualPad::Pad())->SetTopMargin(0.05);
252 gPad(TVirtualPad::Pad())->SetRightMargin(0.05);
253 gPad(TVirtualPad::Pad())->SetLeftMargin(0.13);
254 gPad(TVirtualPad::Pad())->SetBottomMargin(0.12);
255
256 hSignalPIDDistribution->SetLineWidth(2);
257 hSignalPIDDistribution->SetLineColor(TColor::GetColor("#2166ac"));
258 hSignalPIDDistribution->Draw("hist");
259
260 DistribCanvas->Print("SVDdEdxValidation_Distribution_" + SignalVarNameFull + PIDVarName + PIDDetectorsName +
261 "_MomRange_" +
262 std::to_string(
263 MomLow)
264 .substr(0, 3) +
265 "_" + std::to_string(MomHigh).substr(0, 3) + ".pdf");
266
267 hSignalElectronLLDistribution->SetLineWidth(2);
268 hSignalPionLLDistribution->SetLineWidth(2);
269 hSignalKaonLLDistribution->SetLineWidth(2);
270 hSignalProtonLLDistribution->SetLineWidth(2);
271
272 hSignalElectronLLDistributionGood->SetLineWidth(2);
273 hSignalPionLLDistributionGood->SetLineWidth(2);
274 hSignalKaonLLDistributionGood->SetLineWidth(2);
275 hSignalProtonLLDistributionGood->SetLineWidth(2);
276
277 hSignalElectronLLDistributionGood->SetLineColor(kBlack);
278 hSignalPionLLDistributionGood->SetLineColor(kBlack);
279 hSignalKaonLLDistributionGood->SetLineColor(kBlack);
280 hSignalProtonLLDistributionGood->SetLineColor(kBlack);
281
282 hSignalElectronLLDistribution->SetTitle("ElectronLL (SVD), all tracks");
283 hSignalPionLLDistribution->SetTitle("PionLL (SVD), all tracks");
284 hSignalKaonLLDistribution->SetTitle("KaonLL (SVD), all tracks");
285 hSignalProtonLLDistribution->SetTitle("ProtonLL (SVD), all tracks");
286
287 hSignalElectronLLDistributionGood->SetTitle("ElectronLL (SVD), tracks with dEdx info");
288 hSignalPionLLDistributionGood->SetTitle("PionLL (SVD), tracks with dEdx info");
289 hSignalKaonLLDistributionGood->SetTitle("KaonLL (SVD), tracks with dEdx info");
290 hSignalProtonLLDistributionGood->SetTitle("ProtonLL (SVD), tracks with dEdx info");
291
292 hSignalElectronLLDistribution->GetXaxis()->SetTitleSize(0.04);
293 hSignalElectronLLDistribution->GetYaxis()->SetTitleSize(0.04);
294 hSignalElectronLLDistribution->GetXaxis()->SetTitleOffset(1.0);
295 hSignalElectronLLDistribution->GetYaxis()->SetTitleOffset(1.3);
296 hSignalElectronLLDistribution->GetYaxis()->SetLabelSize(0.04);
297 hSignalElectronLLDistribution->GetXaxis()->SetLabelSize(0.04);
298
299 hSignalPionLLDistribution->GetXaxis()->SetTitleSize(0.04);
300 hSignalPionLLDistribution->GetYaxis()->SetTitleSize(0.04);
301 hSignalPionLLDistribution->GetXaxis()->SetTitleOffset(1.0);
302 hSignalPionLLDistribution->GetYaxis()->SetTitleOffset(1.3);
303 hSignalPionLLDistribution->GetYaxis()->SetLabelSize(0.04);
304 hSignalPionLLDistribution->GetXaxis()->SetLabelSize(0.04);
305
306 hSignalKaonLLDistribution->GetXaxis()->SetTitleSize(0.04);
307 hSignalKaonLLDistribution->GetYaxis()->SetTitleSize(0.04);
308 hSignalKaonLLDistribution->GetXaxis()->SetTitleOffset(1.0);
309 hSignalKaonLLDistribution->GetYaxis()->SetTitleOffset(1.3);
310 hSignalKaonLLDistribution->GetYaxis()->SetLabelSize(0.04);
311 hSignalKaonLLDistribution->GetXaxis()->SetLabelSize(0.04);
312
313 hSignalProtonLLDistribution->GetXaxis()->SetTitleSize(0.04);
314 hSignalProtonLLDistribution->GetYaxis()->SetTitleSize(0.04);
315 hSignalProtonLLDistribution->GetXaxis()->SetTitleOffset(1.0);
316 hSignalProtonLLDistribution->GetYaxis()->SetTitleOffset(1.3);
317 hSignalProtonLLDistribution->GetYaxis()->SetLabelSize(0.04);
318 hSignalProtonLLDistribution->GetXaxis()->SetLabelSize(0.04);
319
320 TCanvas* LLCanvas = new TCanvas("LLCanvas", "", 900, 700);
321
322 gPad(TVirtualPad::Pad())->SetTopMargin(0.05);
323 gPad(TVirtualPad::Pad())->SetRightMargin(0.05);
324 gPad(TVirtualPad::Pad())->SetLeftMargin(0.13);
325 gPad(TVirtualPad::Pad())->SetBottomMargin(0.12);
326
327 LLCanvas->Divide(2, 2, 0.01, 0.01);
328 LLCanvas->cd(1);
329 hSignalElectronLLDistribution->Draw("hist");
330 LLCanvas->cd(2);
331 hSignalPionLLDistribution->Draw("hist");
332 LLCanvas->cd(3);
333 hSignalKaonLLDistribution->Draw("hist");
334 LLCanvas->cd(4);
335 hSignalProtonLLDistribution->Draw("hist");
336
337 TCanvas* LLCanvasGood = new TCanvas("LLCanvasGood", "", 900, 700);
338
339 gPad(TVirtualPad::Pad())->SetTopMargin(0.05);
340 gPad(TVirtualPad::Pad())->SetRightMargin(0.05);
341 gPad(TVirtualPad::Pad())->SetLeftMargin(0.13);
342 gPad(TVirtualPad::Pad())->SetBottomMargin(0.12);
343
344 LLCanvasGood->Divide(2, 2, 0.01, 0.01);
345 LLCanvasGood->cd(1);
346 hSignalElectronLLDistributionGood->Draw("hist");
347 LLCanvasGood->cd(2);
348 hSignalPionLLDistributionGood->Draw("hist");
349 LLCanvasGood->cd(3);
350 hSignalKaonLLDistributionGood->Draw("hist");
351 LLCanvasGood->cd(4);
352 hSignalProtonLLDistributionGood->Draw("hist");
353
354 LLCanvas->Print("SVDdEdxValidation_LLDistributions_" + SignalVarNameFull +
355 "_SVDonly_MomRange_" +
356 std::to_string(
357 MomLow)
358 .substr(0, 3) +
359 "_" + std::to_string(MomHigh).substr(0, 3) + ".pdf");
360
361 LLCanvasGood->Print("SVDdEdxValidation_LLDistributions_GoodSVDTracks_" + SignalVarNameFull +
362 "_SVDonly_MomRange_" +
363 std::to_string(
364 MomLow)
365 .substr(0, 3) +
366 "_" + std::to_string(MomHigh).substr(0, 3) + ".pdf");
367
368 TFile DistribFile("SVDdEdxValidation_Distribution_" + SignalVarNameFull + PIDVarName + PIDDetectorsName +
369 "_MomRange_" +
370 std::to_string(
371 MomLow)
372 .substr(0, 3) +
373 "_" + std::to_string(MomHigh).substr(0, 3) + ".root",
374 "RECREATE");
375 hSignalPIDDistribution->SetLineColor(kBlack);
376 hSignalPIDDistribution->Write();
377 DistribFile.Close();
378 delete DistribCanvas;
379
380 TFile LLDistribFile(TString("SVDdEdxValidation_LLDistributions_" + SignalVarNameFull + "_SVDonly_MomRange_" +
381 std::to_string(
382 MomLow)
383 .substr(0, 3) +
384 "_" + std::to_string(MomHigh).substr(0, 3) + ".root"),
385 "RECREATE");
386 hSignalElectronLLDistribution->Write();
387 hSignalPionLLDistribution->Write();
388 hSignalKaonLLDistribution->Write();
389 hSignalProtonLLDistribution->Write();
390 LLDistribFile.Close();
391 delete LLCanvas;
392 delete LLCanvasGood;
393 }
394
395 // ---------- Momentum distributions (for efficiency determination) ----------
396
397 SignalTree->Draw(Form("%sMomentum>>hAllSignal(%i,%f,%f)", SignalVarName.Data(), nbins, MomLow, MomHigh),
398 SignalWeightName + " * (" + SignalFiducialCut + ")", "goff");
399 SignalTree->Draw(Form("%sMomentum>>hSelectedSignal(%i,%f,%f)", SignalVarName.Data(), nbins, MomLow, MomHigh),
400 SignalWeightName + " * (" + SignalVarName + PIDVarName + PIDDetectorsName + ">" + PIDCut + "&&" + SignalFiducialCut +
401 ")",
402 "goff");
403
404 FakeTree->Draw(Form("%sMomentum>>hAllFakes(%i,%f,%f)", FakeVarName.Data(), nbins, MomLow, MomHigh),
405 FakeWeightName + " * (" + FakesFiducialCut + ")", "goff");
406 FakeTree->Draw(Form("%sMomentum>>hSelectedFakes(%i,%f,%f)", FakeVarName.Data(), nbins, MomLow, MomHigh),
407 FakeWeightName + " * (" + FakeVarName + PIDVarName + PIDDetectorsName + ">" + PIDCut + "&&" + FakesFiducialCut + ")",
408 "goff");
409
410 TH1D* hAllSignal = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hAllSignal"));
411 TH1D* hSelectedSignal = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSelectedSignal"));
412 TH1D* hAllFakes = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hAllFakes"));
413 TH1D* hSelectedFakes = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSelectedFakes"));
414
415 // ---------- Add slow pions to the pion dataset ----------
416 if (strncmp(SignalVarName.Data(), "PionD", 5) == 0) {
417 SignalTree->Draw(Form("SlowPionMomentum>>hAllSignalSlow(%i,%f,%f)", nbins, MomLow, MomHigh),
418 SignalWeightName + " * (" + SignalFiducialCut + ")", "goff");
419 SignalTree->Draw(Form("SlowPionMomentum>>hSelectedSignalSlow(%i,%f,%f)", nbins, MomLow, MomHigh),
420 SignalWeightName + " * (SlowPion" + PIDVarName + PIDDetectorsName + ">" + PIDCut + "&&" + SignalFiducialCut + ")", "goff");
421 TH1D* hAllSignalSlow = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hAllSignalSlow"));
422 TH1D* hSelectedSignalSlow = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSelectedSignalSlow"));
423 hAllSignal->Add(hAllSignalSlow);
424 hSelectedSignal->Add(hSelectedSignalSlow);
425 }
426
427 if (strncmp(FakeVarName.Data(), "PionD", 5) == 0) {
428 FakeTree->Draw(Form("SlowPionMomentum>>hAllFakesSlow(%i,%f,%f)", nbins, MomLow, MomHigh),
429 FakeWeightName + " * (" + FakesFiducialCut + ")",
430 "goff");
431 FakeTree->Draw(Form("SlowPionMomentum>>hSelectedFakesSlow(%i,%f,%f)", nbins, MomLow, MomHigh),
432 FakeWeightName + " * (SlowPion" + PIDVarName + PIDDetectorsName + ">" + PIDCut + "&&" + FakesFiducialCut + ")", "goff");
433 TH1D* hAllFakesSlow = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hAllFakesSlow"));
434 TH1D* hSelectedFakesSlow = static_cast<TH1D*>(gDirectory(::ROOT::Internal::TDirectoryAtomicAdapter{})->Get("hSelectedFakesSlow"));
435 hAllFakes->Add(hAllFakesSlow);
436 hSelectedFakes->Add(hSelectedFakesSlow);
437 }
438
439 TH1D* EffHistoSig = static_cast<TH1D*>(hAllSignal->Clone("EffHistoSig")); // signal efficiency
440 TH1D* EffHistoFake = static_cast<TH1D*>(hAllFakes->Clone("EffHistoFake")); // fakes efficiency
441
442 EffHistoSig->Divide(hSelectedSignal, hAllSignal);//, 1, 1, "B");
443 EffHistoFake->Divide(hSelectedFakes, hAllFakes);//, 1, 1, "B");
444
445 // PID plots
446 TH1D* hBase = new TH1D("hBase", "", 100, 0.0, MomHigh);
447 hBase->SetTitle(";Momentum [GeV];Efficiency");
448 hBase->SetMaximum(1.20);
449 hBase->SetMinimum(0.0);
450
451 TLegend* tleg1 = new TLegend(0.63, 0.82, 0.93, 0.94);
452 tleg1->AddEntry(EffHistoSig, SignalVarNameFull + " efficiency", "pl");
453 tleg1->AddEntry(EffHistoFake, FakeVarNameFull + " fake rate", "pl");
454
455 TCanvas* ResultCanvas = new TCanvas("ResultCanvas", "", 600, 600);
456 gPad(TVirtualPad::Pad())->SetTopMargin(0.05);
457 gPad(TVirtualPad::Pad())->SetRightMargin(0.05);
458 gPad(TVirtualPad::Pad())->SetLeftMargin(0.13);
459 gPad(TVirtualPad::Pad())->SetBottomMargin(0.12);
460
461 ResultCanvas->SetGrid();
462 hBase->Draw();
463 EffHistoSig->SetMarkerSize(1.5);
464 EffHistoSig->SetMarkerStyle(22);
465 EffHistoSig->SetMarkerColor(TColor::GetColor("#2166ac"));
466 EffHistoSig->SetLineColor(TColor::GetColor("#2166ac"));
467 EffHistoSig->Draw("P,same");
468
469 EffHistoFake->SetMarkerSize(1.5);
470 EffHistoFake->SetMarkerStyle(23);
471 EffHistoFake->SetMarkerColor(TColor::GetColor("#ef8a62"));
472 EffHistoFake->SetLineColor(TColor::GetColor("#ef8a62"));
473 EffHistoFake->Draw("P,same");
474
475 tleg1->Draw("same");
476
477 hBase->SetStats(0);
478 hBase->GetXaxis()->SetTitleSize(0.04);
479 hBase->GetYaxis()->SetTitleSize(0.04);
480 hBase->GetXaxis()->SetTitleOffset(1.0);
481 hBase->GetYaxis()->SetTitleOffset(1.3);
482 hBase->GetYaxis()->SetLabelSize(0.04);
483 hBase->GetXaxis()->SetLabelSize(0.04);
484
485 // std::setprecision(2);
486 ResultCanvas->Print("SVDdEdxValidation_Efficiency_" + SignalVarNameFull + "_vs_" + FakeVarNameFull + PIDVarName + "_" +
487 PIDDetectorsName +
488 "_Cut" +
489 PIDCut + "_MomRange_" + std::to_string(MomLow).substr(0, 3) + "_" + std::to_string(MomHigh).substr(0, 3) + ".pdf");
490 TFile ResultFile("SVDdEdxValidation_Efficiency_" + SignalVarNameFull + "_vs_" + FakeVarNameFull + PIDVarName + "_" +
491 PIDDetectorsName +
492 "_Cut" +
493 PIDCut + "_MomRange_" + std::to_string(MomLow).substr(0, 3) + "_" + std::to_string(MomHigh).substr(0, 3) + ".root",
494 "RECREATE");
495 EffHistoSig->SetLineColor(kBlack);
496 EffHistoSig->SetMarkerColor(kBlack);
497 EffHistoFake->SetLineColor(kBlack);
498 EffHistoFake->SetMarkerColor(kBlack);
499 EffHistoSig->Write();
500 EffHistoFake->Write();
501 ResultFile.Close();
502 delete ResultCanvas;
503 delete hBase;
504}
505
506void SVDdEdxValidationAlgorithm::PlotROCCurve(TTree* SignalTree, TString SignalWeightName, TString SignalVarName,
507 TString SignalVarNameFull, TTree* FakeTree, TString FakeWeightName, TString FakeVarName, TString FakeVarNameFull,
508 TString PIDVarName)
509{
510
511 if ((SignalTree == nullptr) || (FakeTree == nullptr)) {
1
Assuming the condition is false
2
Assuming the condition is false
512 B2FATAL("Invalid dataset, stopping here")do { { LogVariableStream varStream; varStream << "Invalid dataset, stopping here"
; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage
(Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__
, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 512, 0
)); }; exit(1); } while(false)
;
513 }
514
515 if ((SignalTree->GetEntries() == 0) || (FakeTree->GetEntries() == 0)) {
3
Assuming the condition is false
4
Assuming the condition is false
516 B2FATAL("The dataset is empty, stopping here")do { { LogVariableStream varStream; varStream << "The dataset is empty, stopping here"
; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage
(Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__
, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 516, 0
)); }; exit(1); } while(false)
;
517 }
518
519 if ((SignalTree->GetBranch(Form("%sMomentum", SignalVarName.Data())) == nullptr)
5
Assuming the condition is false
7
Taking false branch
520 || (FakeTree->GetBranch(Form("%sMomentum", FakeVarName.Data())) == nullptr)) {
6
Assuming the condition is false
521 B2FATAL("Check the provided branch name, stopping here")do { { LogVariableStream varStream; varStream << "Check the provided branch name, stopping here"
; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage
(Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__
, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 521, 0
)); }; exit(1); } while(false)
;
522 }
523
524 std::vector<TString> PIDDetectors;
525 PIDDetectors.clear();
526 PIDDetectors.push_back("ALL");
527 PIDDetectors.push_back("noSVD");
528
529 std::vector<double> SignalEfficiencyALL, FakeEfficiencyALL;
530 SignalEfficiencyALL.reserve(m_NumROCpoints);
531 FakeEfficiencyALL.reserve(m_NumROCpoints);
532 std::vector<double> SignalEfficiencynoSVD, FakeEfficiencynoSVD;
533 SignalEfficiencynoSVD.reserve(m_NumROCpoints);
534 FakeEfficiencynoSVD.reserve(m_NumROCpoints);
535
536 TString SignalFiducialCut = SignalVarName + PIDVarName + "noSVD>=0"; // sanity cuts to reject events with NaN
537 TString FakesFiducialCut = FakeVarName + PIDVarName + "noSVD>=0";
538 TString SignalFiducialCutSlow = "SlowPion" + PIDVarName + "noSVD>=0";
539 TString FakesFiducialCutSlow = "SlowPion" + PIDVarName + "noSVD>=0";
540
541 // calculate efficiencies
542
543 TCut AllSignalCut = SignalFiducialCut * Form("%sMomentum>%f && %sMomentum<%f", SignalVarName.Data(), m_MomLowROC,
544 SignalVarName.Data(), m_MomHighROC);
545
546 double AllSignalIntegral, SelectedSignalIntegral;
547
548 auto DataFrameSignalAll = RDataFrame(*SignalTree).Filter(AllSignalCut.GetTitle());
8
Calling 'RInterface::Filter'
549
550 if (SignalWeightName == "1") {
551 AllSignalIntegral = DataFrameSignalAll.Count().GetValue();
552 } else {
553 AllSignalIntegral = DataFrameSignalAll.Sum(SignalWeightName).GetValue();
554 }
555
556 std::unique_ptr<ROOT::RDF::RNode> DataFrameSlowSignalAll;
557
558 if (strncmp(SignalVarName.Data(), "PionD", 5) == 0) {
559 TString SignalVarNameSlow = "SlowPion";
560 TCut AllSignalCutSlow = TCut(SignalFiducialCut) * TCut(SignalFiducialCutSlow) * Form("(%sMomentum>%f && %sMomentum<%f)",
561 SignalVarNameSlow.Data(), m_MomLowROC, SignalVarNameSlow.Data(), m_MomHighROC);
562 DataFrameSlowSignalAll = std::make_unique<ROOT::RDF::RNode>(RDataFrame(*SignalTree).Filter(AllSignalCutSlow.GetTitle()));
563
564 if (SignalWeightName == "1") {
565 AllSignalIntegral += DataFrameSlowSignalAll->Count().GetValue();
566 } else {
567 AllSignalIntegral += DataFrameSlowSignalAll->Sum(SignalWeightName).GetValue();
568 }
569 }
570
571 for (unsigned int i = 0; i < PIDDetectors.size(); i++) {
572 for (unsigned int j = 0; j < m_NumROCpoints; ++j) {
573 delete gROOT(ROOT::GetROOT())->FindObject("PIDCut");
574
575 // scan cut values from 0 to 1, with a denser scan closer to 0 or 1, to get a nicer ROC curve
576 double x = 1. / m_NumROCpoints * j;
577 TString PIDCut = TString::Format("%f", 1. / (1 + TMath::Power(x / (1 - x), -3)));
578
579 TCut SelectedSignalCut = Form("(%s%s%s > %s)", SignalVarName.Data(), PIDVarName.Data(), PIDDetectors[i].Data(), PIDCut.Data());
580
581 if (SignalWeightName == "1") {
582 SelectedSignalIntegral = DataFrameSignalAll.Filter(SelectedSignalCut.GetTitle()).Count().GetValue();
583 } else {
584 SelectedSignalIntegral = DataFrameSignalAll.Filter(SelectedSignalCut.GetTitle()).Sum(SignalWeightName).GetValue();
585 }
586
587 // special treatment for pions: add also the slow pions from Dstar to gain low-momentum coverage
588 if (strncmp(SignalVarName.Data(), "PionD", 5) == 0) {
589 TString SignalVarNameSlow = "SlowPion";
590 TCut SelectedSignalCutSlow = Form("(%s%s%s > %s)", SignalVarNameSlow.Data(), PIDVarName.Data(), PIDDetectors[i].Data(),
591 PIDCut.Data());
592
593 if (SignalWeightName == "1") {
594 SelectedSignalIntegral += DataFrameSlowSignalAll->Filter(SelectedSignalCutSlow.GetTitle()).Count().GetValue();
595 } else {
596 SelectedSignalIntegral += DataFrameSlowSignalAll->Filter(SelectedSignalCutSlow.GetTitle()).Sum(SignalWeightName).GetValue();
597 }
598 }
599
600 if (PIDDetectors[i] == "ALL") {
601 SignalEfficiencyALL.push_back(SelectedSignalIntegral / AllSignalIntegral);
602 }
603
604 if (PIDDetectors[i] == "noSVD") {
605 SignalEfficiencynoSVD.push_back(SelectedSignalIntegral / AllSignalIntegral);
606 }
607 }
608 }
609
610 // calculate fake rates
611
612 TCut AllFakeCut = FakesFiducialCut * Form("%sMomentum>%f && %sMomentum<%f", FakeVarName.Data(), m_MomLowROC, FakeVarName.Data(),
613 m_MomHighROC);
614
615 double AllFakeIntegral, SelectedFakeIntegral;
616 auto DataFrameFakeAll = RDataFrame(*FakeTree).Filter(AllFakeCut.GetTitle());
617
618 if (FakeWeightName == "1") {
619 AllFakeIntegral = DataFrameFakeAll.Count().GetValue();
620 } else {
621 AllFakeIntegral = DataFrameFakeAll.Sum(FakeWeightName).GetValue();
622 }
623
624 std::unique_ptr<ROOT::RDF::RNode> DataFrameSlowFakeAll;
625
626 // special treatment for pions: add also the slow pions from Dstar to gain low-momentum coverage
627 if (strncmp(FakeVarName.Data(), "PionD", 5) == 0) {
628
629 TString FakeVarNameSlow = "SlowPion";
630 TCut AllFakeCutSlow = TCut(FakesFiducialCut) * TCut(FakesFiducialCutSlow) * Form("(%sMomentum>%f && %sMomentum<%f)",
631 FakeVarNameSlow.Data(), m_MomLowROC, FakeVarNameSlow.Data(), m_MomHighROC);
632 DataFrameSlowFakeAll = std::make_unique<ROOT::RDF::RNode>(RDataFrame(*FakeTree).Filter(AllFakeCutSlow.GetTitle()));
633
634 if (FakeWeightName == "1") {
635 AllFakeIntegral += DataFrameSlowFakeAll->Count().GetValue();
636 } else {
637 AllFakeIntegral += DataFrameSlowFakeAll->Sum(FakeWeightName).GetValue();
638 }
639 }
640
641 for (unsigned int i = 0; i < PIDDetectors.size(); i++) {
642 for (unsigned int j = 0; j < m_NumROCpoints; ++j) {
643 delete gROOT(ROOT::GetROOT())->FindObject("PIDCut");
644 delete gROOT(ROOT::GetROOT())->FindObject("hAllFakes");
645 delete gROOT(ROOT::GetROOT())->FindObject("hSelectedFakes");
646
647 // scan cut values from 0 to 1, with a denser scan closer to 0 or 1, to get a nicer ROC curve
648 double x = 1. / m_NumROCpoints * j;
649 TString PIDCut = TString::Format("%f", 1. / (1 + TMath::Power(x / (1 - x), -3)));
650
651 TCut SelectedFakeCut = Form("(%s%s%s > %s)", FakeVarName.Data(), PIDVarName.Data(), PIDDetectors[i].Data(), PIDCut.Data());
652
653 if (FakeWeightName == "1") {
654 SelectedFakeIntegral = DataFrameFakeAll.Filter(SelectedFakeCut.GetTitle()).Count().GetValue();
655 } else {
656 SelectedFakeIntegral = DataFrameFakeAll.Filter(SelectedFakeCut.GetTitle()).Sum(FakeWeightName).GetValue();
657 }
658
659 if (strncmp(FakeVarName.Data(), "PionD", 5) == 0) {
660 TString FakeVarNameSlow = "SlowPion";
661
662 TCut SelectedFakeCutSlow = Form("(%s%s%s > %s)", FakeVarNameSlow.Data(), PIDVarName.Data(), PIDDetectors[i].Data(), PIDCut.Data());
663
664 if (FakeWeightName == "1") {
665 SelectedFakeIntegral += DataFrameSlowFakeAll->Filter(SelectedFakeCutSlow.GetTitle()).Count().GetValue();
666 } else {
667 SelectedFakeIntegral += DataFrameSlowFakeAll->Filter(SelectedFakeCutSlow.GetTitle()).Sum(FakeWeightName).GetValue();
668 }
669 }
670
671 if (PIDDetectors[i] == "ALL") {
672 FakeEfficiencyALL.push_back(SelectedFakeIntegral / AllFakeIntegral);
673 }
674
675 if (PIDDetectors[i] == "noSVD") {
676 FakeEfficiencynoSVD.push_back(SelectedFakeIntegral / AllFakeIntegral);
677 }
678 }
679 }
680
681 auto ResultCanvas = new TCanvas("ResultCanvas", "", 600, 400);
682 TMultiGraph* hmgraph = new TMultiGraph();
683
684 // efficiency vs fake rate graph
685 TGraph* hgraphALL = new TGraph(m_NumROCpoints, FakeEfficiencyALL.data(), SignalEfficiencyALL.data());
686 hgraphALL->SetMarkerColor(TColor::GetColor("#2166ac"));
687 hgraphALL->SetMarkerStyle(20);
688 hgraphALL->SetLineColor(TColor::GetColor("#2166ac"));
689 hgraphALL->SetLineWidth(3);
690 hgraphALL->SetDrawOption("AP*");
691 hgraphALL->SetTitle("with SVD");
692
693 TGraph* hgraphnoSVD = new TGraph(m_NumROCpoints, FakeEfficiencynoSVD.data(), SignalEfficiencynoSVD.data());
694 hgraphnoSVD->SetMarkerColor(TColor::GetColor("#ef8a62"));
695 hgraphnoSVD->SetLineColor(TColor::GetColor("#ef8a62"));
696 hgraphnoSVD->SetLineWidth(3);
697 hgraphnoSVD->SetMarkerStyle(22);
698 hgraphnoSVD->SetDrawOption("P*");
699 hgraphnoSVD->SetTitle("without SVD");
700
701 hmgraph->Add(hgraphALL);
702 hmgraph->Add(hgraphnoSVD);
703 hmgraph->Draw("A");
704 hmgraph->GetHistogram()->GetXaxis()->SetTitle(FakeVarNameFull + " fake rate");
705 hmgraph->GetHistogram()->GetYaxis()->SetTitle(SignalVarNameFull + " signal efficiency");
706
707 ResultCanvas->BuildLegend(0.6, 0.25, 0.9, 0.5);
708 ResultCanvas->SetGrid();
709
710 ResultCanvas->Print("SVDdEdxValidation_ROC_curve_" + SignalVarNameFull + "_vs_" + FakeVarNameFull + PIDVarName + "_MomRange" +
711 std::to_string(m_MomLowROC).substr(0, 3) + "_" + std::to_string(m_MomHighROC).substr(0, 3) + ".pdf");
712
713 TFile ResultFile("SVDdEdxValidation_ROC_curve_" + SignalVarNameFull + "_vs_" + FakeVarNameFull + PIDVarName + "_MomRange" +
714 std::to_string(m_MomLowROC).substr(0, 3) + "_" + std::to_string(m_MomHighROC).substr(0, 3) + ".root",
715 "RECREATE");
716 hmgraph->Write();
717 ResultFile.Close();
718
719 delete ResultCanvas;
720}
721
722TTree* SVDdEdxValidationAlgorithm::LambdaMassFit(std::shared_ptr<TTree> preselTree)
723{
724 B2INFO("Configuring the Lambda fit...")do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream
; varStream << "Configuring the Lambda fit..."; Belle2::
LogSystem::Instance().sendMessage(Belle2::LogMessage(Belle2::
LogConfig::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__
, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 724, 0
)); }; } } while(false)
;
725 gROOT(ROOT::GetROOT())->SetBatch(true);
726 RooMsgService::instance().setGlobalKillBelow(RooFit::WARNING);
727
728 RooRealVar InvM("InvM", "m(p^{+}#pi^{-})", 1.1, 1.13, "GeV/c^{2}");
729
730 RooRealVar ProtonMomentum("ProtonMomentum", "momentum for p", -1.e8, 1.e8);
731 RooRealVar ProtonSVDdEdx("ProtonSVDdEdx", "", -1.e8, 1.e8);
732
733 RooRealVar exp("exp", "experiment number", 0, 1.e5);
734 RooRealVar run("run", "run number", 0, 1.e7);
735
736 RooRealVar ProtonProtonIDALL("ProtonProtonIDALL", "", -1.e8, 1.e8);
737 RooRealVar ProtonProtonIDSVDonly("ProtonProtonIDSVDonly", "", -1.e8, 1.e8);
738 RooRealVar ProtonProtonIDnoSVD("ProtonProtonIDnoSVD", "", -1.e8, 1.e8);
739
740 RooRealVar ProtonElectronLLSVDonly("ProtonElectronLLSVDonly", "", -1.e8, 1.e8);
741 RooRealVar ProtonPionLLSVDonly("ProtonPionLLSVDonly", "", -1.e8, 1.e8);
742 RooRealVar ProtonKaonLLSVDonly("ProtonKaonLLSVDonly", "", -1.e8, 1.e8);
743 RooRealVar ProtonProtonLLSVDonly("ProtonProtonLLSVDonly", "", -1.e8, 1.e8);
744
745 RooRealVar ProtonBinaryProtonPionIDALL("ProtonBinaryProtonPionIDALL", "", -1.e8, 1.e8);
746 RooRealVar ProtonBinaryProtonPionIDSVDonly("ProtonBinaryProtonPionIDSVDonly", "", -1.e8, 1.e8);
747 RooRealVar ProtonBinaryProtonPionIDnoSVD("ProtonBinaryProtonPionIDnoSVD", "", -1.e8, 1.e8);
748
749 RooRealVar ProtonBinaryProtonKaonIDALL("ProtonBinaryProtonKaonIDALL", "", -1.e8, 1.e8);
750 RooRealVar ProtonBinaryProtonKaonIDSVDonly("ProtonBinaryProtonKaonIDSVDonly", "", -1.e8, 1.e8);
751 RooRealVar ProtonBinaryProtonKaonIDnoSVD("ProtonBinaryProtonKaonIDnoSVD", "", -1.e8, 1.e8);
752
753 RooRealVar ProtonBinaryProtonElectronIDALL("ProtonBinaryProtonElectronIDALL", "", -1.e8, 1.e8);
754 RooRealVar ProtonBinaryProtonElectronIDSVDonly("ProtonBinaryProtonElectronIDSVDonly", "", -1.e8, 1.e8);
755 RooRealVar ProtonBinaryProtonElectronIDnoSVD("ProtonBinaryProtonElectronIDnoSVD", "", -1.e8, 1.e8);
756
757 RooRealVar ProtonBinaryPionProtonIDALL("ProtonBinaryPionProtonIDALL", "", -1.e8, 1.e8);
758 RooRealVar ProtonBinaryPionProtonIDSVDonly("ProtonBinaryPionProtonIDSVDonly", "", -1.e8, 1.e8);
759 RooRealVar ProtonBinaryPionProtonIDnoSVD("ProtonBinaryPionProtonIDnoSVD", "", -1.e8, 1.e8);
760
761 RooRealVar ProtonBinaryKaonProtonIDALL("ProtonBinaryKaonProtonIDALL", "", -1.e8, 1.e8);
762 RooRealVar ProtonBinaryKaonProtonIDSVDonly("ProtonBinaryKaonProtonIDSVDonly", "", -1.e8, 1.e8);
763 RooRealVar ProtonBinaryKaonProtonIDnoSVD("ProtonBinaryKaonProtonIDnoSVD", "", -1.e8, 1.e8);
764
765 RooRealVar ProtonBinaryElectronProtonIDALL("ProtonBinaryElectronProtonIDALL", "", -1.e8, 1.e8);
766 RooRealVar ProtonBinaryElectronProtonIDSVDonly("ProtonBinaryElectronProtonIDSVDonly", "", -1.e8, 1.e8);
767 RooRealVar ProtonBinaryElectronProtonIDnoSVD("ProtonBinaryElectronProtonIDnoSVD", "", -1.e8, 1.e8);
768
769 auto variables = new RooArgSet();
770
771 variables->add(InvM);
772
773 variables->add(ProtonMomentum);
774 variables->add(ProtonSVDdEdx);
775 variables->add(exp);
776 variables->add(run);
777
778 variables->add(ProtonProtonIDALL);
779 variables->add(ProtonProtonIDSVDonly);
780 variables->add(ProtonProtonIDnoSVD);
781 variables->add(ProtonElectronLLSVDonly);
782 variables->add(ProtonPionLLSVDonly);
783 variables->add(ProtonKaonLLSVDonly);
784 variables->add(ProtonProtonLLSVDonly);
785 variables->add(ProtonBinaryProtonPionIDALL);
786 variables->add(ProtonBinaryProtonPionIDSVDonly);
787 variables->add(ProtonBinaryProtonPionIDnoSVD);
788 variables->add(ProtonBinaryProtonKaonIDALL);
789 variables->add(ProtonBinaryProtonKaonIDSVDonly);
790 variables->add(ProtonBinaryProtonKaonIDnoSVD);
791 variables->add(ProtonBinaryProtonElectronIDALL);
792 variables->add(ProtonBinaryProtonElectronIDSVDonly);
793 variables->add(ProtonBinaryProtonElectronIDnoSVD);
794 variables->add(ProtonBinaryPionProtonIDALL);
795 variables->add(ProtonBinaryPionProtonIDSVDonly);
796 variables->add(ProtonBinaryPionProtonIDnoSVD);
797 variables->add(ProtonBinaryKaonProtonIDALL);
798 variables->add(ProtonBinaryKaonProtonIDSVDonly);
799 variables->add(ProtonBinaryKaonProtonIDnoSVD);
800 variables->add(ProtonBinaryElectronProtonIDALL);
801 variables->add(ProtonBinaryElectronProtonIDSVDonly);
802 variables->add(ProtonBinaryElectronProtonIDnoSVD);
803
804 RooDataSet* LambdaDataset = new RooDataSet("LambdaDataset", "LambdaDataset", *variables, Import(*preselTree));
805
806 if (LambdaDataset->sumEntries() == 0) {
807 B2FATAL("The Lambda dataset is empty, stopping here")do { { LogVariableStream varStream; varStream << "The Lambda dataset is empty, stopping here"
; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage
(Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__
, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 807, 0
)); }; exit(1); } while(false)
;
808 }
809
810 // the signal PDF; might be revisited at a later point
811
812 RooRealVar GaussMean("GaussMean", " GaussMean", 1.116, 1.111, 1.12);
813 RooRealVar GaussSigma("GaussSigma", "#sigma_{1}", 3.e-3, 3.e-5, 10.e-3);
814 RooGaussian LambdaGauss("LambdaGauss", "LambdaGauss", InvM, GaussMean, GaussSigma);
815
816 /* temporary RooRealVar sigmaBifurGaussL1 and sigmaBifurGaussR1 to replace
817 * RooRealVar resolutionParamL("resolutionParamL", "resolutionParamL", 0.4, 5.e-4, 1.0);
818 * RooRealVar resolutionParamR("resolutionParamR", "resolutionParamR", 0.4, 5.e-4, 1.0);
819 * RooFormulaVar sigmaBifurGaussL1("sigmaBifurGaussL1", "resolutionParamL*GaussSigma", RooArgSet(resolutionParamL, GaussSigma));
820 * RooFormulaVar sigmaBifurGaussR1("sigmaBifurGaussR1", "resolutionParamR*GaussSigma", RooArgSet(resolutionParamR, GaussSigma));
821 */
822 RooRealVar sigmaBifurGaussL1("sigmaBifurGaussL1", "sigma left", 0.4 * 3.e-3, 3.e-5, 10.e-3);
823 RooRealVar sigmaBifurGaussR1("sigmaBifurGaussR1", "sigma right", 0.4 * 3.e-3, 3.e-5, 10.e-3);
824 RooBifurGauss LambdaBifurGauss("LambdaBifurGauss", "LambdaBifurGauss", InvM, GaussMean, sigmaBifurGaussL1, sigmaBifurGaussR1);
825
826 /* temporary RooRealVar sigmaBifurGaussL2 to replace
827 * RooRealVar resolutionParam2("resolutionParam2", "resolutionParam2", 0.2, 5.e-4, 1.0);
828 * sigmaBifurGaussL2("sigmaBifurGaussL2", "resolutionParam2*GaussSigma", RooArgSet(resolutionParam2, GaussSigma));
829 */
830 RooRealVar sigmaBifurGaussL2("sigmaBifurGaussL2", "sigmaBifurGaussL2", 0.2 * 3.e-3, 3.e-5, 10.e-3);
831 RooGaussian LambdaBifurGauss2("LambdaBifurGauss2", "LambdaBifurGauss2", InvM, GaussMean, sigmaBifurGaussL2);
832
833 RooRealVar fracBifurGaussYield("fracBifurGaussYield", "fracBifurGaussYield", 0.3, 5.e-4, 1.0);
834 RooRealVar fracGaussYield("fracGaussYield", "fracGaussYield", 0.8, 5.e-4, 1.0);
835
836 RooAddPdf LambdaCombinedBifurGauss("LambdaCombinedBifurGauss", "LambdaBifurGauss + LambdaBifurGauss2 ", RooArgList(LambdaBifurGauss,
837 LambdaBifurGauss2), RooArgList(fracBifurGaussYield));
838
839 RooAddPdf LambdaSignalPDF("LambdaSignalPDF", "LambdaCombinedBifurGauss + LambdaGauss", RooArgList(LambdaCombinedBifurGauss,
840 LambdaGauss), RooArgList(fracGaussYield));
841
842 // Background PDF
843 RooRealVar BkgPolyCoef0("BkgPolyCoef0", "BkgPolyCoef0", 0.1, 0., 1.5);
844 RooRealVar BkgPolyCoef1("BkgPolyCoef1", "BkgPolyCoef1", -0.5, -1.5, -1.e-3);
845 RooChebychev BkgPolyPDF("BkgPolyPDF", "BkgPolyPDF", InvM, RooArgList(BkgPolyCoef0, BkgPolyCoef1));
846
847 RooRealVar nSignalLambda("nSignalLambda", "nSignalLambda", 0.6 * preselTree->GetEntries(), 0., 0.99 * preselTree->GetEntries());
848 RooRealVar nBkgLambda("nBkgLambda", "nBkgLambda", 0.4 * preselTree->GetEntries(), 0., 0.99 * preselTree->GetEntries());
849 RooAddPdf totalPDFLambda("totalPDFLambda", "totalPDFLambda pdf", RooArgList(LambdaSignalPDF, BkgPolyPDF),
850 RooArgList(nSignalLambda, nBkgLambda));
851
852 B2INFO("Lambda: Start fitting...")do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream
; varStream << "Lambda: Start fitting..."; Belle2::LogSystem
::Instance().sendMessage(Belle2::LogMessage(Belle2::LogConfig
::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc"
, 852, 0)); }; } } while(false)
;
853 RooFitResult* LambdaFitResult = totalPDFLambda.fitTo(*LambdaDataset, Save(kTRUE), PrintLevel(-1));
854
855 int status = LambdaFitResult->status();
856 int covqual = LambdaFitResult->covQual();
857 double diff = nSignalLambda.getValV() + nBkgLambda.getValV() - LambdaDataset->sumEntries();
858
859 B2INFO("Lambda: Fit status: " << status << "; covariance quality: " << covqual)do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream
; varStream << "Lambda: Fit status: " << status <<
"; covariance quality: " << covqual; Belle2::LogSystem
::Instance().sendMessage(Belle2::LogMessage(Belle2::LogConfig
::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc"
, 859, 0)); }; } } while(false)
;
860 // if the fit is not healthy, try again once before giving up, with a slightly different setup:
861 if ((status > 0) || (TMath::Abs(diff) > 1.) || (nSignalLambda.getError() < sqrt(nSignalLambda.getValV()))
862 || (nSignalLambda.getError() > (nSignalLambda.getValV()))) {
863
864 LambdaFitResult = totalPDFLambda.fitTo(*LambdaDataset, Save(), Strategy(2), Offset(1));
865 status = LambdaFitResult->status();
866 covqual = LambdaFitResult->covQual();
867 diff = nSignalLambda.getValV() + nBkgLambda.getValV() - LambdaDataset->sumEntries();
868 }
869
870 if ((status > 0) || (TMath::Abs(diff) > 1.) || (nSignalLambda.getError() < sqrt(nSignalLambda.getValV()))
871 || (nSignalLambda.getError() > (nSignalLambda.getValV()))) {
872 B2WARNING("Lambda: Fit problem: fit status " << status << "; sum of component yields minus the dataset yield is " << diff <<do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Warning, 0, "svd")) { { LogVariableStream varStream
; varStream << "Lambda: Fit problem: fit status " <<
status << "; sum of component yields minus the dataset yield is "
<< diff << "; signal yield is " << nSignalLambda
.getValV() << ", while its uncertainty is " << nSignalLambda
.getError(); Belle2::LogSystem::Instance().sendMessage(Belle2
::LogMessage(Belle2::LogConfig::c_Warning, std::move(varStream
), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc"
, 873, 0)); }; } } while(false)
873 "; signal yield is " << nSignalLambda.getValV() << ", while its uncertainty is " << nSignalLambda.getError())do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Warning, 0, "svd")) { { LogVariableStream varStream
; varStream << "Lambda: Fit problem: fit status " <<
status << "; sum of component yields minus the dataset yield is "
<< diff << "; signal yield is " << nSignalLambda
.getValV() << ", while its uncertainty is " << nSignalLambda
.getError(); Belle2::LogSystem::Instance().sendMessage(Belle2
::LogMessage(Belle2::LogConfig::c_Warning, std::move(varStream
), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc"
, 873, 0)); }; } } while(false)
;
874 }
875 if (covqual < 2) {
876 B2INFO("Lambda: Fit warning: covariance quality " << covqual)do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream
; varStream << "Lambda: Fit warning: covariance quality "
<< covqual; Belle2::LogSystem::Instance().sendMessage(
Belle2::LogMessage(Belle2::LogConfig::c_Info, std::move(varStream
), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc"
, 876, 0)); }; } } while(false)
;
877 }
878
879 TCanvas* canvLambda = new TCanvas("canvLambda", "canvLambda");
880 RooPlot* LambdaFitFrame = LambdaDataset->plotOn(InvM.frame(130));
881 totalPDFLambda.plotOn(LambdaFitFrame, LineColor(TColor::GetColor("#4575b4")));
882
883 double chisquare = LambdaFitFrame->chiSquare();
884 B2INFO("Lambda: Fit chi2 = " << chisquare)do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream
; varStream << "Lambda: Fit chi2 = " << chisquare
; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage
(Belle2::LogConfig::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__
, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 884, 0
)); }; } } while(false)
;
885 totalPDFLambda.paramOn(LambdaFitFrame, Layout(0.6, 0.96, 0.93), Format("NEU", AutoPrecision(2)));
886 LambdaFitFrame->getAttText()->SetTextSize(0.03);
887
888 totalPDFLambda.plotOn(LambdaFitFrame, Components("LambdaSignalPDF"), LineColor(TColor::GetColor("#d73027")));
889 totalPDFLambda.plotOn(LambdaFitFrame, Components("BkgPolyPDF"), LineColor(TColor::GetColor("#fc8d59")));
890 totalPDFLambda.plotOn(LambdaFitFrame, LineColor(TColor::GetColor("#4575b4")));
891
892 LambdaFitFrame->GetXaxis()->SetTitle("m(p#pi^{-}) (GeV/c^{2})");
893
894 LambdaFitFrame->Draw();
895
896 if (m_isMakePlots) {
897 canvLambda->Print("SVDdEdxValidationFitLambda.pdf");
898 TFile LambdaFitPlotFile("SVDdEdxValidationLambdaFitPlotFile.root", "RECREATE");
899 canvLambda->Write();
900 LambdaFitPlotFile.Close();
901 }
902 RooStats::SPlot* sPlotDatasetLambda = new RooStats::SPlot("sData", "An SPlot", *LambdaDataset, &totalPDFLambda,
903 RooArgList(nSignalLambda, nBkgLambda));
904
905 for (int iEvt = 0; iEvt < 5; iEvt++) {
906 if (TMath::Abs(sPlotDatasetLambda->GetSWeight(iEvt, "nSignalLambda") + sPlotDatasetLambda->GetSWeight(iEvt,
907 "nBkgLambda") - 1) > 5.e-3)
908 B2FATAL("Lambda: sPlot error: sum of weights not equal to 1")do { { LogVariableStream varStream; varStream << "Lambda: sPlot error: sum of weights not equal to 1"
; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage
(Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__
, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 908, 0
)); }; exit(1); } while(false)
;
909 }
910
911 TTree* treeLambda_sw = LambdaDataset->GetClonedTree();
912 treeLambda_sw->SetName("treeLambda_sw");
913
914 B2INFO("Lambda: sPlot done. ")do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream
; varStream << "Lambda: sPlot done. "; Belle2::LogSystem
::Instance().sendMessage(Belle2::LogMessage(Belle2::LogConfig
::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc"
, 914, 0)); }; } } while(false)
;
915
916 return treeLambda_sw;
917}
918
919TTree* SVDdEdxValidationAlgorithm::DstarMassFit(std::shared_ptr<TTree> preselTree)
920{
921 B2INFO("Configuring the Dstar fit...")do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream
; varStream << "Configuring the Dstar fit..."; Belle2::
LogSystem::Instance().sendMessage(Belle2::LogMessage(Belle2::
LogConfig::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__
, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 921, 0
)); }; } } while(false)
;
922 gROOT(ROOT::GetROOT())->SetBatch(true);
923 RooMsgService::instance().setGlobalKillBelow(RooFit::WARNING);
924
925 RooRealVar deltaM("deltaM", "m(D*)-m(D^{0})", 0.139545, 0.151, "GeV/c^{2}");
926
927 RooRealVar KaonMomentum("KaonMomentum", "momentum for Kaon(GeV)", -1.e8, 1.e8);
928 RooRealVar KaonSVDdEdx("KaonSVDdEdx", "", -1.e8, 1.e8);
929 RooRealVar PionDMomentum("PionDMomentum", "momentum for pion(GeV)", -1.e8, 1.e8);
930 RooRealVar PionDSVDdEdx("PionDSVDdEdx", "", -1.e8, 1.e8);
931 RooRealVar SlowPionMomentum("SlowPionMomentum", "momentum for slow pion(GeV)", -1.e8, 1.e8);
932 RooRealVar SlowPionSVDdEdx("SlowPionSVDdEdx", "", -1.e8, 1.e8);
933
934 RooRealVar exp("exp", "experiment number", 0, 1.e5);
935 RooRealVar run("run", "run number", 0, 1.e8);
936
937 RooRealVar KaonKaonIDALL("KaonKaonIDALL", "", -1.e8, 1.e8);
938 RooRealVar KaonKaonIDSVDonly("KaonKaonIDSVDonly", "", -1.e8, 1.e8);
939 RooRealVar KaonKaonIDnoSVD("KaonKaonIDnoSVD", "", -1.e8, 1.e8);
940
941 RooRealVar KaonPionIDALL("KaonPionIDALL", "", -1.e8, 1.e8);
942 RooRealVar KaonPionIDSVDonly("KaonPionIDSVDonly", "", -1.e8, 1.e8);
943 RooRealVar KaonPionIDnoSVD("KaonPionIDnoSVD", "", -1.e8, 1.e8);
944
945 RooRealVar KaonProtonIDALL("KaonProtonIDALL", "", -1.e8, 1.e8);
946 RooRealVar KaonProtonIDSVDonly("KaonProtonIDSVDonly", "", -1.e8, 1.e8);
947 RooRealVar KaonProtonIDnoSVD("KaonProtonIDnoSVD", "", -1.e8, 1.e8);
948
949 RooRealVar KaonElectronIDALL("KaonElectronIDALL", "", -1.e8, 1.e8);
950 RooRealVar KaonElectronIDSVDonly("KaonElectronIDSVDonly", "", -1.e8, 1.e8);
951 RooRealVar KaonElectronIDnoSVD("KaonElectronIDnoSVD", "", -1.e8, 1.e8);
952
953 RooRealVar KaonElectronLLSVDonly("KaonElectronLLSVDonly", "", -1.e8, 1.e8);
954 RooRealVar KaonPionLLSVDonly("KaonPionLLSVDonly", "", -1.e8, 1.e8);
955 RooRealVar KaonKaonLLSVDonly("KaonKaonLLSVDonly", "", -1.e8, 1.e8);
956 RooRealVar KaonProtonLLSVDonly("KaonProtonLLSVDonly", "", -1.e8, 1.e8);
957
958 RooRealVar KaonBinaryKaonPionIDALL("KaonBinaryKaonPionIDALL", "", -1.e8, 1.e8);
959 RooRealVar KaonBinaryKaonPionIDSVDonly("KaonBinaryKaonPionIDSVDonly", "", -1.e8, 1.e8);
960 RooRealVar KaonBinaryKaonPionIDnoSVD("KaonBinaryKaonPionIDnoSVD", "", -1.e8, 1.e8);
961
962 RooRealVar KaonBinaryPionKaonIDALL("KaonBinaryPionKaonIDALL", "", -1.e8, 1.e8);
963 RooRealVar KaonBinaryPionKaonIDSVDonly("KaonBinaryPionKaonIDSVDonly", "", -1.e8, 1.e8);
964 RooRealVar KaonBinaryPionKaonIDnoSVD("KaonBinaryPionKaonIDnoSVD", "", -1.e8, 1.e8);
965
966 RooRealVar KaonBinaryProtonKaonIDALL("KaonBinaryProtonKaonIDALL", "", -1.e8, 1.e8);
967 RooRealVar KaonBinaryProtonKaonIDSVDonly("KaonBinaryProtonKaonIDSVDonly", "", -1.e8, 1.e8);
968 RooRealVar KaonBinaryProtonKaonIDnoSVD("KaonBinaryProtonKaonIDnoSVD", "", -1.e8, 1.e8);
969
970 RooRealVar KaonBinaryElectronKaonIDALL("KaonBinaryElectronKaonIDALL", "", -1.e8, 1.e8);
971 RooRealVar KaonBinaryElectronKaonIDSVDonly("KaonBinaryElectronKaonIDSVDonly", "", -1.e8, 1.e8);
972 RooRealVar KaonBinaryElectronKaonIDnoSVD("KaonBinaryElectronKaonIDnoSVD", "", -1.e8, 1.e8);
973
974 RooRealVar PionDKaonIDALL("PionDKaonIDALL", "", -1.e8, 1.e8);
975 RooRealVar PionDKaonIDSVDonly("PionDKaonIDSVDonly", "", -1.e8, 1.e8);
976 RooRealVar PionDKaonIDnoSVD("PionDKaonIDnoSVD", "", -1.e8, 1.e8);
977
978 RooRealVar PionDPionIDALL("PionDPionIDALL", "", -1.e8, 1.e8);
979 RooRealVar PionDPionIDSVDonly("PionDPionIDSVDonly", "", -1.e8, 1.e8);
980 RooRealVar PionDPionIDnoSVD("PionDPionIDnoSVD", "", -1.e8, 1.e8);
981
982 RooRealVar PionDElectronIDALL("PionDElectronIDALL", "", -1.e8, 1.e8);
983 RooRealVar PionDElectronIDSVDonly("PionDElectronIDSVDonly", "", -1.e8, 1.e8);
984 RooRealVar PionDElectronIDnoSVD("PionDElectronIDnoSVD", "", -1.e8, 1.e8);
985
986 RooRealVar PionDProtonIDALL("PionDProtonIDALL", "", -1.e8, 1.e8);
987 RooRealVar PionDProtonIDSVDonly("PionDProtonIDSVDonly", "", -1.e8, 1.e8);
988 RooRealVar PionDProtonIDnoSVD("PionDProtonIDnoSVD", "", -1.e8, 1.e8);
989
990 RooRealVar PionDElectronLLSVDonly("PionDElectronLLSVDonly", "", -1.e8, 1.e8);
991 RooRealVar PionDPionLLSVDonly("PionDPionLLSVDonly", "", -1.e8, 1.e8);
992 RooRealVar PionDKaonLLSVDonly("PionDKaonLLSVDonly", "", -1.e8, 1.e8);
993 RooRealVar PionDProtonLLSVDonly("PionDProtonLLSVDonly", "", -1.e8, 1.e8);
994
995 RooRealVar PionDBinaryPionKaonIDALL("PionDBinaryPionKaonIDALL", "", -1.e8, 1.e8);
996 RooRealVar PionDBinaryPionKaonIDSVDonly("PionDBinaryPionKaonIDSVDonly", "", -1.e8, 1.e8);
997 RooRealVar PionDBinaryPionKaonIDnoSVD("PionDBinaryPionKaonIDnoSVD", "", -1.e8, 1.e8);
998
999 RooRealVar PionDBinaryKaonPionIDALL("PionDBinaryKaonPionIDALL", "", -1.e8, 1.e8);
1000 RooRealVar PionDBinaryKaonPionIDSVDonly("PionDBinaryKaonPionIDSVDonly", "", -1.e8, 1.e8);
1001 RooRealVar PionDBinaryKaonPionIDnoSVD("PionDBinaryKaonPionIDnoSVD", "", -1.e8, 1.e8);
1002
1003 RooRealVar PionDBinaryProtonPionIDALL("PionDBinaryProtonPionIDALL", "", -1.e8, 1.e8);
1004 RooRealVar PionDBinaryProtonPionIDSVDonly("PionDBinaryProtonPionIDSVDonly", "", -1.e8, 1.e8);
1005 RooRealVar PionDBinaryProtonPionIDnoSVD("PionDBinaryProtonPionIDnoSVD", "", -1.e8, 1.e8);
1006
1007 RooRealVar PionDBinaryElectronPionIDALL("PionDBinaryElectronPionIDALL", "", -1.e8, 1.e8);
1008 RooRealVar PionDBinaryElectronPionIDSVDonly("PionDBinaryElectronPionIDSVDonly", "", -1.e8, 1.e8);
1009 RooRealVar PionDBinaryElectronPionIDnoSVD("PionDBinaryElectronPionIDnoSVD", "", -1.e8, 1.e8);
1010
1011 RooRealVar SlowPionKaonIDALL("SlowPionKaonIDALL", "", -1.e8, 1.e8);
1012 RooRealVar SlowPionKaonIDSVDonly("SlowPionKaonIDSVDonly", "", -1.e8, 1.e8);
1013 RooRealVar SlowPionKaonIDnoSVD("SlowPionKaonIDnoSVD", "", -1.e8, 1.e8);
1014
1015 RooRealVar SlowPionPionIDALL("SlowPionPionIDALL", "", -1.e8, 1.e8);
1016 RooRealVar SlowPionPionIDSVDonly("SlowPionPionIDSVDonly", "", -1.e8, 1.e8);
1017 RooRealVar SlowPionPionIDnoSVD("SlowPionPionIDnoSVD", "", -1.e8, 1.e8);
1018
1019 RooRealVar SlowPionElectronIDALL("SlowPionElectronIDALL", "", -1.e8, 1.e8);
1020 RooRealVar SlowPionElectronIDSVDonly("SlowPionElectronIDSVDonly", "", -1.e8, 1.e8);
1021 RooRealVar SlowPionElectronIDnoSVD("SlowPionElectronIDnoSVD", "", -1.e8, 1.e8);
1022
1023 RooRealVar SlowPionProtonIDALL("SlowPionProtonIDALL", "", -1.e8, 1.e8);
1024 RooRealVar SlowPionProtonIDSVDonly("SlowPionProtonIDSVDonly", "", -1.e8, 1.e8);
1025 RooRealVar SlowPionProtonIDnoSVD("SlowPionProtonIDnoSVD", "", -1.e8, 1.e8);
1026
1027 RooRealVar SlowPionElectronLLSVDonly("SlowPionElectronLLSVDonly", "", -1.e8, 1.e8);
1028 RooRealVar SlowPionPionLLSVDonly("SlowPionPionLLSVDonly", "", -1.e8, 1.e8);
1029 RooRealVar SlowPionKaonLLSVDonly("SlowPionKaonLLSVDonly", "", -1.e8, 1.e8);
1030 RooRealVar SlowPionProtonLLSVDonly("SlowPionProtonLLSVDonly", "", -1.e8, 1.e8);
1031
1032 RooRealVar SlowPionBinaryPionKaonIDALL("SlowPionBinaryPionKaonIDALL", "", -1.e8, 1.e8);
1033 RooRealVar SlowPionBinaryPionKaonIDSVDonly("SlowPionBinaryPionKaonIDSVDonly", "", -1.e8, 1.e8);
1034 RooRealVar SlowPionBinaryPionKaonIDnoSVD("SlowPionBinaryPionKaonIDnoSVD", "", -1.e8, 1.e8);
1035
1036 RooRealVar SlowPionBinaryKaonPionIDALL("SlowPionBinaryKaonPionIDALL", "", -1.e8, 1.e8);
1037 RooRealVar SlowPionBinaryKaonPionIDSVDonly("SlowPionBinaryKaonPionIDSVDonly", "", -1.e8, 1.e8);
1038 RooRealVar SlowPionBinaryKaonPionIDnoSVD("SlowPionBinaryKaonPionIDnoSVD", "", -1.e8, 1.e8);
1039
1040 RooRealVar SlowPionBinaryProtonPionIDALL("SlowPionBinaryProtonPionIDALL", "", -1.e8, 1.e8);
1041 RooRealVar SlowPionBinaryProtonPionIDSVDonly("SlowPionBinaryProtonPionIDSVDonly", "", -1.e8, 1.e8);
1042 RooRealVar SlowPionBinaryProtonPionIDnoSVD("SlowPionBinaryProtonPionIDnoSVD", "", -1.e8, 1.e8);
1043
1044 RooRealVar SlowPionBinaryElectronPionIDALL("SlowPionBinaryElectronPionIDALL", "", -1.e8, 1.e8);
1045 RooRealVar SlowPionBinaryElectronPionIDSVDonly("SlowPionBinaryElectronPionIDSVDonly", "", -1.e8, 1.e8);
1046 RooRealVar SlowPionBinaryElectronPionIDnoSVD("SlowPionBinaryElectronPionIDnoSVD", "", -1.e8, 1.e8);
1047
1048 auto variables = new RooArgSet();
1049 variables->add(deltaM);
1050 variables->add(KaonMomentum);
1051 variables->add(KaonSVDdEdx);
1052 variables->add(PionDMomentum);
1053 variables->add(PionDSVDdEdx);
1054 variables->add(SlowPionMomentum);
1055 variables->add(SlowPionSVDdEdx);
1056 variables->add(exp);
1057 variables->add(run);
1058
1059 variables->add(KaonKaonIDALL);
1060 variables->add(KaonKaonIDSVDonly);
1061 variables->add(KaonKaonIDnoSVD);
1062 variables->add(KaonPionIDALL);
1063 variables->add(KaonPionIDSVDonly);
1064 variables->add(KaonPionIDnoSVD);
1065 variables->add(KaonProtonIDALL);
1066 variables->add(KaonProtonIDSVDonly);
1067 variables->add(KaonProtonIDnoSVD);
1068 variables->add(KaonElectronIDALL);
1069 variables->add(KaonElectronIDSVDonly);
1070 variables->add(KaonElectronIDnoSVD);
1071
1072 variables->add(KaonElectronLLSVDonly);
1073 variables->add(KaonPionLLSVDonly);
1074 variables->add(KaonKaonLLSVDonly);
1075 variables->add(KaonProtonLLSVDonly);
1076
1077 variables->add(KaonBinaryKaonPionIDALL);
1078 variables->add(KaonBinaryKaonPionIDSVDonly);
1079 variables->add(KaonBinaryKaonPionIDnoSVD);
1080 variables->add(KaonBinaryPionKaonIDALL);
1081 variables->add(KaonBinaryPionKaonIDSVDonly);
1082 variables->add(KaonBinaryPionKaonIDnoSVD);
1083 variables->add(KaonBinaryProtonKaonIDALL);
1084 variables->add(KaonBinaryProtonKaonIDSVDonly);
1085 variables->add(KaonBinaryProtonKaonIDnoSVD);
1086 variables->add(KaonBinaryElectronKaonIDALL);
1087 variables->add(KaonBinaryElectronKaonIDSVDonly);
1088 variables->add(KaonBinaryElectronKaonIDnoSVD);
1089
1090 variables->add(PionDPionIDALL);
1091 variables->add(PionDPionIDSVDonly);
1092 variables->add(PionDPionIDnoSVD);
1093 variables->add(PionDKaonIDALL);
1094 variables->add(PionDKaonIDSVDonly);
1095 variables->add(PionDKaonIDnoSVD);
1096 variables->add(PionDElectronIDALL);
1097 variables->add(PionDElectronIDSVDonly);
1098 variables->add(PionDElectronIDnoSVD);
1099 variables->add(PionDProtonIDALL);
1100 variables->add(PionDProtonIDSVDonly);
1101 variables->add(PionDProtonIDnoSVD);
1102
1103 variables->add(PionDElectronLLSVDonly);
1104 variables->add(PionDPionLLSVDonly);
1105 variables->add(PionDKaonLLSVDonly);
1106 variables->add(PionDProtonLLSVDonly);
1107
1108 variables->add(PionDBinaryPionKaonIDALL);
1109 variables->add(PionDBinaryPionKaonIDSVDonly);
1110 variables->add(PionDBinaryPionKaonIDnoSVD);
1111 variables->add(PionDBinaryKaonPionIDALL);
1112 variables->add(PionDBinaryKaonPionIDSVDonly);
1113 variables->add(PionDBinaryKaonPionIDnoSVD);
1114 variables->add(PionDBinaryProtonPionIDALL);
1115 variables->add(PionDBinaryProtonPionIDSVDonly);
1116 variables->add(PionDBinaryProtonPionIDnoSVD);
1117 variables->add(PionDBinaryElectronPionIDALL);
1118 variables->add(PionDBinaryElectronPionIDSVDonly);
1119 variables->add(PionDBinaryElectronPionIDnoSVD);
1120
1121 variables->add(SlowPionPionIDALL);
1122 variables->add(SlowPionPionIDSVDonly);
1123 variables->add(SlowPionPionIDnoSVD);
1124 variables->add(SlowPionKaonIDALL);
1125 variables->add(SlowPionKaonIDSVDonly);
1126 variables->add(SlowPionKaonIDnoSVD);
1127 variables->add(SlowPionElectronIDALL);
1128 variables->add(SlowPionElectronIDSVDonly);
1129 variables->add(SlowPionElectronIDnoSVD);
1130 variables->add(SlowPionProtonIDALL);
1131 variables->add(SlowPionProtonIDSVDonly);
1132 variables->add(SlowPionProtonIDnoSVD);
1133
1134 variables->add(SlowPionElectronLLSVDonly);
1135 variables->add(SlowPionPionLLSVDonly);
1136 variables->add(SlowPionKaonLLSVDonly);
1137 variables->add(SlowPionProtonLLSVDonly);
1138
1139 variables->add(SlowPionBinaryPionKaonIDALL);
1140 variables->add(SlowPionBinaryPionKaonIDSVDonly);
1141 variables->add(SlowPionBinaryPionKaonIDnoSVD);
1142 variables->add(SlowPionBinaryKaonPionIDALL);
1143 variables->add(SlowPionBinaryKaonPionIDSVDonly);
1144 variables->add(SlowPionBinaryKaonPionIDnoSVD);
1145 variables->add(SlowPionBinaryProtonPionIDALL);
1146 variables->add(SlowPionBinaryProtonPionIDSVDonly);
1147 variables->add(SlowPionBinaryProtonPionIDnoSVD);
1148 variables->add(SlowPionBinaryElectronPionIDALL);
1149 variables->add(SlowPionBinaryElectronPionIDSVDonly);
1150 variables->add(SlowPionBinaryElectronPionIDnoSVD);
1151
1152 RooDataSet* DstarDataset = new RooDataSet("DstarDataset", "DstarDataset", *variables, Import(*preselTree));
1153
1154 if (DstarDataset->sumEntries() == 0) {
1155 B2FATAL("The Dstar dataset is empty, stopping here")do { { LogVariableStream varStream; varStream << "The Dstar dataset is empty, stopping here"
; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage
(Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__
, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 1155, 0
)); }; exit(1); } while(false)
;
1156 }
1157
1158 RooPlot* DstarFitFrame = DstarDataset->plotOn(deltaM.frame());
1159
1160 RooRealVar GaussMean("GaussMean", "GaussMean", 0.145, 0.140, 0.150);
1161 RooRealVar GaussSigma1("GaussSigma1", "GaussSigma1", 0.01, 1.e-4, 1.0);
1162 RooGaussian DstarGauss1("DstarGauss1", "DstarGauss1", deltaM, GaussMean, GaussSigma1);
1163 RooRealVar GaussSigma2("GaussSigma2", "GaussSigma2", 0.001, 1.e-4, 1.0);
1164 RooGaussian DstarGauss2("DstarGauss2", "DstarGauss2", deltaM, GaussMean, GaussSigma2);
1165 RooRealVar fracGaussYield("fracGaussYield", "Fraction of two Gaussians", 0.75, 0.0, 1.0);
1166 RooAddPdf DstarSignalPDF("DstarSignalPDF", "DstarGauss1+DstarGauss2", RooArgList(DstarGauss1, DstarGauss2), fracGaussYield);
1167
1168 RooRealVar dm0Bkg("dm0Bkg", "dm0", 0.13957018, 0.130, 0.140);
1169 RooRealVar aBkg("aBkg", "a", -0.0784, -0.08, 3.0);
1170 RooRealVar bBkg("bBkg", "b", -0.444713, -0.5, 0.4);
1171 RooRealVar cBkg("cBkg", "c", 0.3);
1172 RooDstD0BG DstarBkgPDF("DstarBkgPDF", "DstarBkgPDF", deltaM, dm0Bkg, cBkg, aBkg, bBkg);
1173 RooRealVar nSignalDstar("nSignalDstar", "signal yield", 0.5 * preselTree->GetEntries(), 0, preselTree->GetEntries());
1174 RooRealVar nBkgDstar("nBkgDstar", "background yield", 0.5 * preselTree->GetEntries(), 0, preselTree->GetEntries());
1175 RooAddPdf totalPDFDstar("totalPDFDstar", "totalPDFDstar pdf", RooArgList(DstarSignalPDF, DstarBkgPDF),
1176 RooArgList(nSignalDstar, nBkgDstar));
1177
1178 B2INFO("Dstar: Start fitting...")do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream
; varStream << "Dstar: Start fitting..."; Belle2::LogSystem
::Instance().sendMessage(Belle2::LogMessage(Belle2::LogConfig
::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc"
, 1178, 0)); }; } } while(false)
;
1179 RooFitResult* DstarFitResult = totalPDFDstar.fitTo(*DstarDataset, Save(kTRUE), PrintLevel(-1));
1180
1181 int status = DstarFitResult->status();
1182 int covqual = DstarFitResult->covQual();
1183 double diff = nSignalDstar.getValV() + nBkgDstar.getValV() - DstarDataset->sumEntries();
1184
1185 B2INFO("Dstar: Fit status: " << status << "; covariance quality: " << covqual)do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream
; varStream << "Dstar: Fit status: " << status <<
"; covariance quality: " << covqual; Belle2::LogSystem
::Instance().sendMessage(Belle2::LogMessage(Belle2::LogConfig
::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc"
, 1185, 0)); }; } } while(false)
;
1186 // if the fit is not healthy, try again once before giving up, with a slightly different setup:
1187 if ((status > 0) || (TMath::Abs(diff) > 1.) || (nSignalDstar.getError() < sqrt(nSignalDstar.getValV()))
1188 || (nSignalDstar.getError() > (nSignalDstar.getValV()))) {
1189
1190 DstarFitResult = totalPDFDstar.fitTo(*DstarDataset, Save(), Strategy(2), Offset(1));
1191 status = DstarFitResult->status();
1192 covqual = DstarFitResult->covQual();
1193 diff = nSignalDstar.getValV() + nBkgDstar.getValV() - DstarDataset->sumEntries();
1194 }
1195
1196 if ((status > 0) || (TMath::Abs(diff) > 1.) || (nSignalDstar.getError() < sqrt(nSignalDstar.getValV()))
1197 || (nSignalDstar.getError() > (nSignalDstar.getValV()))) {
1198 B2WARNING("Dstar: Fit problem: fit status " << status << "; sum of component yields minus the dataset yield is " << diff <<do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Warning, 0, "svd")) { { LogVariableStream varStream
; varStream << "Dstar: Fit problem: fit status " <<
status << "; sum of component yields minus the dataset yield is "
<< diff << "; signal yield is " << nSignalDstar
.getValV() << ", while its uncertainty is " << nSignalDstar
.getError(); Belle2::LogSystem::Instance().sendMessage(Belle2
::LogMessage(Belle2::LogConfig::c_Warning, std::move(varStream
), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc"
, 1199, 0)); }; } } while(false)
1199 "; signal yield is " << nSignalDstar.getValV() << ", while its uncertainty is " << nSignalDstar.getError())do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Warning, 0, "svd")) { { LogVariableStream varStream
; varStream << "Dstar: Fit problem: fit status " <<
status << "; sum of component yields minus the dataset yield is "
<< diff << "; signal yield is " << nSignalDstar
.getValV() << ", while its uncertainty is " << nSignalDstar
.getError(); Belle2::LogSystem::Instance().sendMessage(Belle2
::LogMessage(Belle2::LogConfig::c_Warning, std::move(varStream
), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc"
, 1199, 0)); }; } } while(false)
;
1200 }
1201 if (covqual < 2) {
1202 B2INFO("Dstar: Fit warning: covariance quality " << covqual)do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream
; varStream << "Dstar: Fit warning: covariance quality "
<< covqual; Belle2::LogSystem::Instance().sendMessage(
Belle2::LogMessage(Belle2::LogConfig::c_Info, std::move(varStream
), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc"
, 1202, 0)); }; } } while(false)
;
1203 }
1204
1205 totalPDFDstar.plotOn(DstarFitFrame, LineColor(TColor::GetColor("#4575b4")));
1206
1207 double chisquare = DstarFitFrame->chiSquare();
1208 B2INFO("Dstar: Fit chi2 = " << chisquare)do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream
; varStream << "Dstar: Fit chi2 = " << chisquare;
Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage
(Belle2::LogConfig::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__
, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 1208, 0
)); }; } } while(false)
;
1209 totalPDFDstar.paramOn(DstarFitFrame, Layout(0.63, 0.96, 0.93), Format("NEU", AutoPrecision(2)));
1210 DstarFitFrame->getAttText()->SetTextSize(0.03);
1211
1212 totalPDFDstar.plotOn(DstarFitFrame, Components("DstarSignalPDF"), LineColor(TColor::GetColor("#d73027")));
1213 totalPDFDstar.plotOn(DstarFitFrame, Components("DstarBkgPDF"), LineColor(TColor::GetColor("#fc8d59")));
1214 totalPDFDstar.plotOn(DstarFitFrame, LineColor(TColor::GetColor("#4575b4")));
1215
1216 DstarFitFrame->GetXaxis()->SetTitle("#Deltam [GeV/c^{2}]");
1217 TCanvas* canvDstar = new TCanvas("canvDstar", "canvDstar");
1218 canvDstar->cd();
1219
1220 DstarFitFrame->Draw();
1221
1222 if (m_isMakePlots) {
1223 canvDstar->Print("SVDdEdxValidationFitDstar.pdf");
1224 TFile DstarFitPlotFile("SVDdEdxValidationDstarFitPlotFile.root", "RECREATE");
1225 canvDstar->Write();
1226 DstarFitPlotFile.Close();
1227 }
1228
1229 /////////////////// SPlot ///////////////////////////////////////////////////////////
1230
1231 RooStats::SPlot* sPlotDatasetDstar = new RooStats::SPlot("sData", "An SPlot", *DstarDataset, &totalPDFDstar,
1232 RooArgList(nSignalDstar, nBkgDstar));
1233
1234 for (int iEvt = 0; iEvt < 5; iEvt++) {
1235 if (TMath::Abs(sPlotDatasetDstar->GetSWeight(iEvt, "nSignalDstar") + sPlotDatasetDstar->GetSWeight(iEvt, "nBkgDstar") - 1) > 5.e-3)
1236 B2FATAL("Dstar: sPlot error: sum of weights not equal to 1")do { { LogVariableStream varStream; varStream << "Dstar: sPlot error: sum of weights not equal to 1"
; Belle2::LogSystem::Instance().sendMessage(Belle2::LogMessage
(Belle2::LogConfig::c_Fatal, std::move(varStream), "svd", __PRETTY_FUNCTION__
, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc", 1236, 0
)); }; exit(1); } while(false)
;
1237 }
1238
1239 TTree* treeDstar_sw = DstarDataset->GetClonedTree();
1240 treeDstar_sw->SetName("treeDstar_sw");
1241
1242 B2INFO("Dstar: sPlot done. ")do { if (Belle2::LogSystem::Instance().isLevelEnabled(Belle2::
LogConfig::c_Info, 0, "svd")) { { LogVariableStream varStream
; varStream << "Dstar: sPlot done. "; Belle2::LogSystem
::Instance().sendMessage(Belle2::LogMessage(Belle2::LogConfig
::c_Info, std::move(varStream), "svd", __PRETTY_FUNCTION__, "svd/calibration/src/SVDdEdxValidationAlgorithm.cc"
, 1242, 0)); }; } } while(false)
;
1243
1244 return treeDstar_sw;
1245}

/cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/root/ROOT/RDF/RInterface.hxx

1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_TINTERFACE
12#define ROOT_RDF_TINTERFACE
13
14#include "ROOT/RDataSource.hxx"
15#include "ROOT/RDF/ActionHelpers.hxx"
16#include "ROOT/RDF/HistoModels.hxx"
17#include "ROOT/RDF/InterfaceUtils.hxx"
18#include "ROOT/RDF/RColumnRegister.hxx"
19#include "ROOT/RDF/RDefaultValueFor.hxx"
20#include "ROOT/RDF/RDefine.hxx"
21#include "ROOT/RDF/RDefinePerSample.hxx"
22#include "ROOT/RDF/RFilter.hxx"
23#include "ROOT/RDF/RInterfaceBase.hxx"
24#include "ROOT/RDF/RVariation.hxx"
25#include "ROOT/RDF/RLazyDSImpl.hxx"
26#include "ROOT/RDF/RLoopManager.hxx"
27#include "ROOT/RDF/RRange.hxx"
28#include "ROOT/RDF/RFilterWithMissingValues.hxx"
29#include "ROOT/RDF/Utils.hxx"
30#include "ROOT/RDF/RDFDescription.hxx"
31#include "ROOT/RDF/RVariationsDescription.hxx"
32#include "ROOT/RResultPtr.hxx"
33#include "ROOT/RSnapshotOptions.hxx"
34#include <string_view>
35#include "ROOT/RVec.hxx"
36#include "ROOT/TypeTraits.hxx"
37#include "RtypesCore.h" // for ULong64_t
38#include "TDirectory.h"
39#include "TH1.h" // For Histo actions
40#include "TH2.h" // For Histo actions
41#include "TH3.h" // For Histo actions
42#include "THn.h"
43#include "TProfile.h"
44#include "TProfile2D.h"
45#include "TStatistic.h"
46
47#include <algorithm>
48#include <cstddef>
49#include <initializer_list>
50#include <iterator> // std::back_insterter
51#include <limits>
52#include <memory>
53#include <set>
54#include <sstream>
55#include <stdexcept>
56#include <string>
57#include <type_traits> // is_same, enable_if
58#include <typeinfo>
59#include <unordered_set>
60#include <utility> // std::index_sequence
61#include <vector>
62#include <any>
63
64class TGraph;
65
66// Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
67namespace ROOT {
68void DisableImplicitMT();
69bool IsImplicitMTEnabled();
70void EnableImplicitMT(UInt_t numthreads);
71class RDataFrame;
72} // namespace ROOT
73namespace cling {
74std::string printValue(ROOT::RDataFrame *tdf);
75}
76
77namespace ROOT {
78namespace RDF {
79namespace RDFDetail = ROOT::Detail::RDF;
80namespace RDFInternal = ROOT::Internal::RDF;
81namespace TTraits = ROOT::TypeTraits;
82
83template <typename Proxied, typename DataSource>
84class RInterface;
85
86using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
87} // namespace RDF
88
89namespace Internal {
90namespace RDF {
91class GraphCreatorHelper;
92void ChangeEmptyEntryRange(const ROOT::RDF::RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange);
93void ChangeBeginAndEndEntries(const RNode &node, Long64_t begin, Long64_t end);
94void ChangeSpec(const ROOT::RDF::RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec);
95void TriggerRun(ROOT::RDF::RNode node);
96std::string GetDataSourceLabel(const ROOT::RDF::RNode &node);
97void SetTTreeLifeline(ROOT::RDF::RNode &node, std::any lifeline);
98} // namespace RDF
99} // namespace Internal
100
101namespace RDF {
102
103// clang-format off
104/**
105 * \class ROOT::RDF::RInterface
106 * \ingroup dataframe
107 * \brief The public interface to the RDataFrame federation of classes.
108 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
109 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
110 *
111 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
112 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
113 */
114// clang-format on
115template <typename Proxied, typename DataSource = void>
116class RInterface : public RInterfaceBase {
117 using DS_t = DataSource;
118 using RFilterBase = RDFDetail::RFilterBase;
119 using RRangeBase = RDFDetail::RRangeBase;
120 using RLoopManager = RDFDetail::RLoopManager;
121 friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
122 friend class RDFInternal::GraphDrawing::GraphCreatorHelper;
123
124 template <typename T, typename W>
125 friend class RInterface;
126
127 friend void RDFInternal::TriggerRun(RNode node);
128 friend void RDFInternal::ChangeEmptyEntryRange(const RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange);
129 friend void RDFInternal::ChangeBeginAndEndEntries(const RNode &node, Long64_t start, Long64_t end);
130 friend void RDFInternal::ChangeSpec(const RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec);
131 friend std::string ROOT::Internal::RDF::GetDataSourceLabel(const RNode &node);
132 friend void ROOT::Internal::RDF::SetTTreeLifeline(ROOT::RDF::RNode &node, std::any lifeline);
133 std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
134
135public:
136 ////////////////////////////////////////////////////////////////////////////
137 /// \brief Copy-assignment operator for RInterface.
138 RInterface &operator=(const RInterface &) = default;
139
140 ////////////////////////////////////////////////////////////////////////////
141 /// \brief Copy-ctor for RInterface.
142 RInterface(const RInterface &) = default;
143
144 ////////////////////////////////////////////////////////////////////////////
145 /// \brief Move-ctor for RInterface.
146 RInterface(RInterface &&) = default;
147
148 ////////////////////////////////////////////////////////////////////////////
149 /// \brief Move-assignment operator for RInterface.
150 RInterface &operator=(RInterface &&) = default;
151
152 ////////////////////////////////////////////////////////////////////////////
153 /// \brief Build a RInterface from a RLoopManager.
154 /// This constructor is only available for RInterface<RLoopManager>.
155 template <typename T = Proxied, typename = std::enable_if_t<std::is_same<T, RLoopManager>::value, int>>
156 RInterface(const std::shared_ptr<RLoopManager> &proxied) : RInterfaceBase(proxied), fProxiedPtr(proxied)
157 {
158 }
159
160 ////////////////////////////////////////////////////////////////////////////
161 /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
162 /// Different RDataFrame methods return different C++ types. All nodes, however,
163 /// can be cast to this common type at the cost of a small performance penalty.
164 /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
165 /// around via (non-template, C++11) helper functions.
166 /// Example usage:
167 /// ~~~{.cpp}
168 /// // a function that conditionally adds a Range to a RDataFrame node.
169 /// RNode MaybeAddRange(RNode df, bool mustAddRange)
170 /// {
171 /// return mustAddRange ? df.Range(1) : df;
172 /// }
173 /// // use as :
174 /// ROOT::RDataFrame df(10);
175 /// auto maybeRanged = MaybeAddRange(df, true);
176 /// ~~~
177 /// Note that it is not a problem to pass RNode's by value.
178 operator RNode() const
179 {
180 return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fColRegister);
181 }
182
183 ////////////////////////////////////////////////////////////////////////////
184 /// \brief Append a filter to the call graph.
185 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
186 /// signalling whether the event has passed the selection (true) or not (false).
187 /// \param[in] columns Names of the columns/branches in input to the filter function.
188 /// \param[in] name Optional name of this filter. See `Report`.
189 /// \return the filter node of the computation graph.
190 ///
191 /// Append a filter node at the point of the call graph corresponding to the
192 /// object this method is called on.
193 /// The callable `f` should not have side-effects (e.g. modification of an
194 /// external or static variable) to ensure correct results when implicit
195 /// multi-threading is active.
196 ///
197 /// RDataFrame only evaluates filters when necessary: if multiple filters
198 /// are chained one after another, they are executed in order and the first
199 /// one returning false causes the event to be discarded.
200 /// Even if multiple actions or transformations depend on the same filter,
201 /// it is executed once per entry. If its result is requested more than
202 /// once, the cached result is served.
203 ///
204 /// ### Example usage:
205 /// ~~~{.cpp}
206 /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
207 /// auto filtered = df.Filter(myCut, {"x", "y"});
208 ///
209 /// // String: it must contain valid C++ except that column names can be used instead of variable names
210 /// auto filtered = df.Filter("x*y > 0");
211 /// ~~~
212 ///
213 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
214 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
215 /// ~~~{.cpp}
216 /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
217 /// ~~~
218 /// but instead this will:
219 /// ~~~{.cpp}
220 /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
221 /// ~~~
222 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
223 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t>
224 Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
225 {
226 RDFInternal::CheckFilter(f);
227 using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
228 constexpr auto nColumns = ColTypes_t::list_size;
229 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
230 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
231
232 using F_t = RDFDetail::RFilter<F, Proxied>;
233
234 auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, fColRegister, name);
235 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister);
236 }
237
238 ////////////////////////////////////////////////////////////////////////////
239 /// \brief Append a filter to the call graph.
240 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
241 /// signalling whether the event has passed the selection (true) or not (false).
242 /// \param[in] name Optional name of this filter. See `Report`.
243 /// \return the filter node of the computation graph.
244 ///
245 /// Refer to the first overload of this method for the full documentation.
246 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
247 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, std::string_view name)
248 {
249 // The sfinae is there in order to pick up the overloaded method which accepts two strings
250 // rather than this template method.
251 return Filter(f, {}, name);
252 }
253
254 ////////////////////////////////////////////////////////////////////////////
255 /// \brief Append a filter to the call graph.
256 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
257 /// signalling whether the event has passed the selection (true) or not (false).
258 /// \param[in] columns Names of the columns/branches in input to the filter function.
259 /// \return the filter node of the computation graph.
260 ///
261 /// Refer to the first overload of this method for the full documentation.
262 template <typename F>
263 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
264 {
265 return Filter(f, ColumnNames_t{columns});
266 }
267
268 ////////////////////////////////////////////////////////////////////////////
269 /// \brief Append a filter to the call graph.
270 /// \param[in] expression The filter expression in C++
271 /// \param[in] name Optional name of this filter. See `Report`.
272 /// \return the filter node of the computation graph.
273 ///
274 /// The expression is just-in-time compiled and used to filter entries. It must
275 /// be valid C++ syntax in which variable names are substituted with the names
276 /// of branches/columns.
277 ///
278 /// ### Example usage:
279 /// ~~~{.cpp}
280 /// auto filtered_df = df.Filter("myCollection.size() > 3");
281 /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
282 /// ~~~
283 ///
284 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
285 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
286 /// ~~~{.cpp}
287 /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
288 /// ~~~
289 /// but instead this will:
290 /// ~~~{.cpp}
291 /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
292 /// ~~~
293 RInterface<RDFDetail::RJittedFilter, DS_t> Filter(std::string_view expression, std::string_view name = "")
294 {
295 // deleted by the jitted call to JitFilterHelper
296 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
9
Calling 'MakeSharedOnHeap<ROOT::Detail::RDF::RNodeBase>'
11
Returned allocated memory
297 using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type;
298 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fColRegister);
299 const auto jittedFilter =
12
Potential leak of memory pointed to by 'upcastNodeOnHeap'
300 RDFInternal::BookFilterJit(upcastNodeOnHeap, name, expression, fLoopManager->GetBranchNames(), fColRegister,
301 fLoopManager->GetTree(), GetDataSource());
302
303 return RInterface<RDFDetail::RJittedFilter, DS_t>(std::move(jittedFilter), *fLoopManager, fColRegister);
304 }
305
306 ////////////////////////////////////////////////////////////////////////////
307 /// \brief Discard entries with missing values
308 /// \param[in] column Column name whose entries with missing values should be discarded
309 /// \return The filter node of the computation graph
310 ///
311 /// This operation is useful in case an entry of the dataset is incomplete,
312 /// i.e. if one or more of the columns do not have valid values. If the value
313 /// of the input column is missing for an entry, the entire entry will be
314 /// discarded from the rest of this branch of the computation graph.
315 ///
316 /// Use cases include:
317 /// * When processing multiple files, one or more of them is missing a column
318 /// * In horizontal joining with entry matching, a certain dataset has no
319 /// match for the current entry.
320 ///
321 /// ### Example usage:
322 ///
323 /// \code{.py}
324 /// # Assume a dataset with columns [idx, x] matching another dataset with
325 /// # columns [idx, y]. For idx == 42, the right-hand dataset has no match
326 /// df = ROOT.RDataFrame(dataset)
327 /// df_nomissing = df.FilterAvailable("idx").Define("z", "x + y")
328 /// colz = df_nomissing.Take[int]("z")
329 /// \endcode
330 ///
331 /// \code{.cpp}
332 /// // Assume a dataset with columns [idx, x] matching another dataset with
333 /// // columns [idx, y]. For idx == 42, the right-hand dataset has no match
334 /// ROOT::RDataFrame df{dataset};
335 /// auto df_nomissing = df.FilterAvailable("idx")
336 /// .Define("z", [](int x, int y) { return x + y; }, {"x", "y"});
337 /// auto colz = df_nomissing.Take<int>("z");
338 /// \endcode
339 ///
340 /// \note See FilterMissing() if you want to keep only the entries with
341 /// missing values instead.
342 RInterface<RDFDetail::RFilterWithMissingValues<Proxied>, DS_t> FilterAvailable(std::string_view column)
343 {
344 const auto columns = ColumnNames_t{column.data()};
345 // For now disable this functionality in case of an empty data source and
346 // the column name was not defined previously.
347 if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS")
348 throw std::runtime_error("Unknown column: \"" + std::string(column) + "\"");
349 using F_t = RDFDetail::RFilterWithMissingValues<Proxied>;
350 auto filterPtr = std::make_shared<F_t>(/*discardEntry*/ true, fProxiedPtr, fColRegister, columns);
351 CheckAndFillDSColumns(columns, TTraits::TypeList<void>{});
352 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister);
353 }
354
355 ////////////////////////////////////////////////////////////////////////////
356 /// \brief Keep only the entries that have missing values.
357 /// \param[in] column Column name whose entries with missing values should be kept
358 /// \return The filter node of the computation graph
359 ///
360 /// This operation is useful in case an entry of the dataset is incomplete,
361 /// i.e. if one or more of the columns do not have valid values. It only
362 /// keeps the entries for which the value of the input column is missing.
363 ///
364 /// Use cases include:
365 /// * When processing multiple files, one or more of them is missing a column
366 /// * In horizontal joining with entry matching, a certain dataset has no
367 /// match for the current entry.
368 ///
369 /// ### Example usage:
370 ///
371 /// \code{.py}
372 /// # Assume a dataset made of two files vertically chained together, one has
373 /// # column "x" and the other has column "y"
374 /// df = ROOT.RDataFrame(dataset)
375 /// df_valid_col_x = df.FilterMissing("y")
376 /// df_valid_col_y = df.FilterMissing("x")
377 /// display_x = df_valid_col_x.Display(("x",))
378 /// display_y = df_valid_col_y.Display(("y",))
379 /// \endcode
380 ///
381 /// \code{.cpp}
382 /// // Assume a dataset made of two files vertically chained together, one has
383 /// // column "x" and the other has column "y"
384 /// ROOT.RDataFrame df{dataset};
385 /// auto df_valid_col_x = df.FilterMissing("y");
386 /// auto df_valid_col_y = df.FilterMissing("x");
387 /// auto display_x = df_valid_col_x.Display<int>({"x"});
388 /// auto display_y = df_valid_col_y.Display<int>({"y"});
389 /// \endcode
390 ///
391 /// \note See FilterAvailable() if you want to discard the entries in case
392 /// there is a missing value instead.
393 RInterface<RDFDetail::RFilterWithMissingValues<Proxied>, DS_t> FilterMissing(std::string_view column)
394 {
395 const auto columns = ColumnNames_t{column.data()};
396 // For now disable this functionality in case of an empty data source and
397 // the column name was not defined previously.
398 if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS")
399 throw std::runtime_error("Unknown column: \"" + std::string(column) + "\"");
400 using F_t = RDFDetail::RFilterWithMissingValues<Proxied>;
401 auto filterPtr = std::make_shared<F_t>(/*discardEntry*/ false, fProxiedPtr, fColRegister, columns);
402 CheckAndFillDSColumns(columns, TTraits::TypeList<void>{});
403 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister);
404 }
405
406 // clang-format off
407 ////////////////////////////////////////////////////////////////////////////
408 /// \brief Define a new column.
409 /// \param[in] name The name of the defined column.
410 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
411 /// \param[in] columns Names of the columns/branches in input to the producer function.
412 /// \return the first node of the computation graph for which the new quantity is defined.
413 ///
414 /// Define a column that will be visible from all subsequent nodes
415 /// of the functional chain. The `expression` is only evaluated for entries that pass
416 /// all the preceding filters.
417 /// A new variable is created called `name`, accessible as if it was contained
418 /// in the dataset from subsequent transformations/actions.
419 ///
420 /// Use cases include:
421 /// * caching the results of complex calculations for easy and efficient multiple access
422 /// * extraction of quantities of interest from complex objects
423 ///
424 /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
425 ///
426 /// ### Example usage:
427 /// ~~~{.cpp}
428 /// // assuming a function with signature:
429 /// double myComplexCalculation(const RVec<float> &muon_pts);
430 /// // we can pass it directly to Define
431 /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
432 /// // alternatively, we can pass the body of the function as a string, as in Filter:
433 /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
434 /// ~~~
435 ///
436 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
437 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
438 /// ~~~{.cpp}
439 /// df.Define("x2", "Map(v, [](float e) { return e*e; })")
440 /// ~~~
441 /// but instead this will:
442 /// ~~~{.cpp}
443 /// df.Define("x2", "return Map(v, [](float e) { return e*e; })")
444 /// ~~~
445 template <typename F, typename std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
446 RInterface<Proxied, DS_t> Define(std::string_view name, F expression, const ColumnNames_t &columns = {})
447 {
448 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Define");
449 }
450 // clang-format on
451
452 // clang-format off
453 ////////////////////////////////////////////////////////////////////////////
454 /// \brief Define a new column with a value dependent on the processing slot.
455 /// \param[in] name The name of the defined column.
456 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
457 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
458 /// \return the first node of the computation graph for which the new quantity is defined.
459 ///
460 /// This alternative implementation of `Define` is meant as a helper to evaluate new column values in a thread-safe manner.
461 /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
462 /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
463 /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
464 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
465 /// Note that there is no guarantee as to how often each slot will be reached during the event loop.
466 ///
467 /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
468 /// ~~~{.cpp}
469 /// int function(unsigned int, double, double);
470 /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
471 /// df.DefineSlot("x", function, {"column1", "column2"})
472 /// ~~~
473 ///
474 /// See Define() for more information.
475 template <typename F>
476 RInterface<Proxied, DS_t> DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
477 {
478 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "DefineSlot");
479 }
480 // clang-format on
481
482 // clang-format off
483 ////////////////////////////////////////////////////////////////////////////
484 /// \brief Define a new column with a value dependent on the processing slot and the current entry.
485 /// \param[in] name The name of the defined column.
486 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
487 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
488 /// \return the first node of the computation graph for which the new quantity is defined.
489 ///
490 /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
491 /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
492 /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
493 /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
494 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
495 /// Note that there is no guarantee as to how often each slot will be reached during the event loop.
496 /// The second parameter is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
497 ///
498 /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
499 /// ~~~{.cpp}
500 /// int function(unsigned int, ULong64_t, double, double);
501 /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
502 /// DefineSlotEntry("x", function, {"column1", "column2"})
503 /// ~~~
504 ///
505 /// See Define() for more information.
506 template <typename F>
507 RInterface<Proxied, DS_t> DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
508 {
509 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns,
510 "DefineSlotEntry");
511 }
512 // clang-format on
513
514 ////////////////////////////////////////////////////////////////////////////
515 /// \brief Define a new column.
516 /// \param[in] name The name of the defined column.
517 /// \param[in] expression An expression in C++ which represents the defined value
518 /// \return the first node of the computation graph for which the new quantity is defined.
519 ///
520 /// The expression is just-in-time compiled and used to produce the column entries.
521 /// It must be valid C++ syntax in which variable names are substituted with the names
522 /// of branches/columns.
523 ///
524 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
525 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
526 /// ~~~{.cpp}
527 /// df.Define("x2", "Map(v, [](float e) { return e*e; })")
528 /// ~~~
529 /// but instead this will:
530 /// ~~~{.cpp}
531 /// df.Define("x2", "return Map(v, [](float e) { return e*e; })")
532 /// ~~~
533 ///
534 /// Refer to the first overload of this method for the full documentation.
535 RInterface<Proxied, DS_t> Define(std::string_view name, std::string_view expression)
536 {
537 constexpr auto where = "Define";
538 RDFInternal::CheckValidCppVarName(name, where);
539 // these checks must be done before jitting lest we throw exceptions in jitted code
540 RDFInternal::CheckForRedefinition(where, name, fColRegister, fLoopManager->GetBranchNames(),
541 GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
542
543 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
544 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, GetDataSource(), fColRegister,
545 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
546
547 RDFInternal::RColumnRegister newCols(fColRegister);
548 newCols.AddDefine(std::move(jittedDefine));
549
550 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
551
552 return newInterface;
553 }
554
555 ////////////////////////////////////////////////////////////////////////////
556 /// \brief Overwrite the value and/or type of an existing column.
557 /// \param[in] name The name of the column to redefine.
558 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
559 /// \param[in] columns Names of the columns/branches in input to the expression.
560 /// \return the first node of the computation graph for which the quantity is redefined.
561 ///
562 /// The old value of the column can be used as an input for the expression.
563 ///
564 /// An exception is thrown in case the column to redefine does not already exist.
565 /// See Define() for more information.
566 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
567 RInterface<Proxied, DS_t> Redefine(std::string_view name, F expression, const ColumnNames_t &columns = {})
568 {
569 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Redefine");
570 }
571
572 // clang-format off
573 ////////////////////////////////////////////////////////////////////////////
574 /// \brief Overwrite the value and/or type of an existing column.
575 /// \param[in] name The name of the column to redefine.
576 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
577 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot).
578 /// \return the first node of the computation graph for which the new quantity is defined.
579 ///
580 /// The old value of the column can be used as an input for the expression.
581 /// An exception is thrown in case the column to redefine does not already exist.
582 ///
583 /// See DefineSlot() for more information.
584 // clang-format on
585 template <typename F>
586 RInterface<Proxied, DS_t> RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
587 {
588 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "RedefineSlot");
589 }
590
591 // clang-format off
592 ////////////////////////////////////////////////////////////////////////////
593 /// \brief Overwrite the value and/or type of an existing column.
594 /// \param[in] name The name of the column to redefine.
595 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
596 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
597 /// \return the first node of the computation graph for which the new quantity is defined.
598 ///
599 /// The old value of the column can be used as an input for the expression.
600 /// An exception is thrown in case the column to re-define does not already exist.
601 ///
602 /// See DefineSlotEntry() for more information.
603 // clang-format on
604 template <typename F>
605 RInterface<Proxied, DS_t> RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
606 {
607 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns,
608 "RedefineSlotEntry");
609 }
610
611 ////////////////////////////////////////////////////////////////////////////
612 /// \brief Overwrite the value and/or type of an existing column.
613 /// \param[in] name The name of the column to redefine.
614 /// \param[in] expression An expression in C++ which represents the defined value
615 /// \return the first node of the computation graph for which the new quantity is defined.
616 ///
617 /// The expression is just-in-time compiled and used to produce the column entries.
618 /// It must be valid C++ syntax in which variable names are substituted with the names
619 /// of branches/columns.
620 ///
621 /// The old value of the column can be used as an input for the expression.
622 /// An exception is thrown in case the column to re-define does not already exist.
623 ///
624 /// Aliases cannot be overridden. See the corresponding Define() overload for more information.
625 RInterface<Proxied, DS_t> Redefine(std::string_view name, std::string_view expression)
626 {
627 constexpr auto where = "Redefine";
628 RDFInternal::CheckValidCppVarName(name, where);
629 RDFInternal::CheckForDefinition(where, name, fColRegister, fLoopManager->GetBranchNames(),
630 GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
631 RDFInternal::CheckForNoVariations(where, name, fColRegister);
632
633 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
634 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, GetDataSource(), fColRegister,
635 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
636
637 RDFInternal::RColumnRegister newCols(fColRegister);
638 newCols.AddDefine(std::move(jittedDefine));
639
640 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
641
642 return newInterface;
643 }
644
645 ////////////////////////////////////////////////////////////////////////////
646 /// \brief In case the value in the given column is missing, provide a default value
647 /// \tparam T The type of the column
648 /// \param[in] column Column name where missing values should be replaced by the given default value
649 /// \param[in] defaultValue Value to provide instead of a missing value
650 /// \return The node of the graph that will provide a default value
651 ///
652 /// This operation is useful in case an entry of the dataset is incomplete,
653 /// i.e. if one or more of the columns do not have valid values. It does not
654 /// modify the values of the column, but in case any entry is missing, it
655 /// will provide the default value to downstream nodes instead.
656 ///
657 /// Use cases include:
658 /// * When processing multiple files, one or more of them is missing a column
659 /// * In horizontal joining with entry matching, a certain dataset has no
660 /// match for the current entry.
661 ///
662 /// ### Example usage:
663 ///
664 /// \code{.cpp}
665 /// // Assume a dataset with columns [idx, x] matching another dataset with
666 /// // columns [idx, y]. For idx == 42, the right-hand dataset has no match
667 /// ROOT::RDataFrame df{dataset};
668 /// auto df_default = df.DefaultValueFor("y", 33)
669 /// .Define("z", [](int x, int y) { return x + y; }, {"x", "y"});
670 /// auto colz = df_default.Take<int>("z");
671 /// \endcode
672 ///
673 /// \code{.py}
674 /// df = ROOT.RDataFrame(dataset)
675 /// df_default = df.DefaultValueFor("y", 33).Define("z", "x + y")
676 /// colz = df_default.Take[int]("z")
677 /// \endcode
678 template <typename T>
679 RInterface<Proxied, DS_t> DefaultValueFor(std::string_view column, const T &defaultValue)
680 {
681 constexpr auto where{"DefaultValueFor"};
682 RDFInternal::CheckForNoVariations(where, column, fColRegister);
683 // For now disable this functionality in case of an empty data source and
684 // the column name was not defined previously.
685 if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS")
686 RDFInternal::CheckForDefinition(where, column, fColRegister, fLoopManager->GetBranchNames(),
687 GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
688
689 // Declare return type to the interpreter, for future use by jitted actions
690 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(T));
691 if (retTypeName.empty()) {
692 // The type is not known to the interpreter.
693 // We must not error out here, but if/when this column is used in jitted code
694 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(T));
695 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
696 }
697
698 const auto validColumnNames = ColumnNames_t{column.data()};
699 auto newColumn = std::make_shared<ROOT::Internal::RDF::RDefaultValueFor<T>>(
700 column, retTypeName, defaultValue, validColumnNames, fColRegister, *fLoopManager);
701 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>{});
702
703 RDFInternal::RColumnRegister newCols(fColRegister);
704 newCols.AddDefine(std::move(newColumn));
705
706 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
707
708 return newInterface;
709 }
710
711 // clang-format off
712 ////////////////////////////////////////////////////////////////////////////
713 /// \brief Define a new column that is updated when the input sample changes.
714 /// \param[in] name The name of the defined column.
715 /// \param[in] expression A C++ callable that computes the new value of the defined column.
716 /// \return the first node of the computation graph for which the new quantity is defined.
717 ///
718 /// The signature of the callable passed as second argument should be `T(unsigned int slot, const ROOT::RDF::RSampleInfo &id)`
719 /// where:
720 /// - `T` is the type of the defined column
721 /// - `slot` is a number in the range [0, nThreads) that is different for each processing thread. This can simplify
722 /// the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame.
723 /// - `id` is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is
724 /// being processed (see the class docs for more information).
725 ///
726 /// DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being
727 /// processed or to inject a callback into the event loop that is only called when the processing of a new sample
728 /// starts rather than at every entry.
729 ///
730 /// The callable will be invoked once per input TTree or once per multi-thread task, whichever is more often.
731 ///
732 /// ### Example usage:
733 /// ~~~{.cpp}
734 /// ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}};
735 /// df.DefinePerSample("weightbysample",
736 /// [](unsigned int slot, const ROOT::RDF::RSampleInfo &id)
737 /// { return id.Contains("sample1") ? 1.0f : 2.0f; });
738 /// ~~~
739 // clang-format on
740 // TODO we could SFINAE on F's signature to provide friendlier compilation errors in case of signature mismatch
741 template <typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type>
742 RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, F expression)
743 {
744 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
745 RDFInternal::CheckForRedefinition("DefinePerSample", name, fColRegister, fLoopManager->GetBranchNames(),
746 GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
747
748 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType_t));
749 if (retTypeName.empty()) {
750 // The type is not known to the interpreter.
751 // We must not error out here, but if/when this column is used in jitted code
752 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType_t));
753 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
754 }
755
756 auto newColumn =
757 std::make_shared<RDFDetail::RDefinePerSample<F>>(name, retTypeName, std::move(expression), *fLoopManager);
758
759 RDFInternal::RColumnRegister newCols(fColRegister);
760 newCols.AddDefine(std::move(newColumn));
761 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
762 return newInterface;
763 }
764
765 // clang-format off
766 ////////////////////////////////////////////////////////////////////////////
767 /// \brief Define a new column that is updated when the input sample changes.
768 /// \param[in] name The name of the defined column.
769 /// \param[in] expression A valid C++ expression as a string, which will be used to compute the defined value.
770 /// \return the first node of the computation graph for which the new quantity is defined.
771 ///
772 /// The expression is just-in-time compiled and used to produce the column entries.
773 /// It must be valid C++ syntax and the usage of the special variable names `rdfslot_` and `rdfsampleinfo_` is
774 /// permitted, where these variables will take the same values as the `slot` and `id` parameters described at the
775 /// DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information.
776 ///
777 /// ### Example usage:
778 /// ~~~{.py}
779 /// df = ROOT.RDataFrame('mytree', ['sample1.root','sample2.root'])
780 /// df.DefinePerSample('weightbysample', 'rdfsampleinfo_.Contains("sample1") ? 1.0f : 2.0f')
781 /// ~~~
782 ///
783 /// \note
784 /// If you have declared some C++ function to the interpreter, the correct syntax to call that function with this
785 /// overload of DefinePerSample is by calling it explicitly with the special names `rdfslot_` and `rdfsampleinfo_` as
786 /// input parameters. This is for example the correct way to call this overload when working in PyROOT:
787 /// ~~~{.py}
788 /// ROOT.gInterpreter.Declare(
789 /// """
790 /// float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){
791 /// return id.Contains("sample1") ? 1.0f : 2.0f;
792 /// }
793 /// """)
794 /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
795 /// df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)")
796 /// ~~~
797 ///
798 /// \note
799 /// Differently from what happens in Define(), the string expression passed to DefinePerSample cannot contain
800 /// column names other than those mentioned above: the expression is evaluated once before the processing of the
801 /// sample even starts, so column values are not accessible.
802 // clang-format on
803 RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, std::string_view expression)
804 {
805 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
806 // these checks must be done before jitting lest we throw exceptions in jitted code
807 RDFInternal::CheckForRedefinition("DefinePerSample", name, fColRegister, fLoopManager->GetBranchNames(),
808 GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
809
810 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
811 auto jittedDefine =
812 RDFInternal::BookDefinePerSampleJit(name, expression, *fLoopManager, fColRegister, upcastNodeOnHeap);
813
814 RDFInternal::RColumnRegister newCols(fColRegister);
815 newCols.AddDefine(std::move(jittedDefine));
816
817 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
818
819 return newInterface;
820 }
821
822 /// \brief Register systematic variations for a single existing column using custom variation tags.
823 /// \param[in] colName name of the column for which varied values are provided.
824 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
825 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
826 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
827 /// \param[in] inputColumns the names of the columns to be passed to the callable.
828 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
829 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
830 ///
831 /// Vary provides a natural and flexible syntax to define systematic variations that automatically propagate to
832 /// Filters, Defines and results. RDataFrame usage of columns with attached variations does not change, but for
833 /// results that depend on any varied quantity, a map/dictionary of varied results can be produced with
834 /// ROOT::RDF::Experimental::VariationsFor (see the example below).
835 ///
836 /// The dictionary will contain a "nominal" value (accessed with the "nominal" key) for the unchanged result, and
837 /// values for each of the systematic variations that affected the result (via upstream Filters or via direct or
838 /// indirect dependencies of the column values on some registered variations). The keys will be a composition of
839 /// variation names and tags, e.g. "pt:up" and "pt:down" for the example below.
840 ///
841 /// In the following example we add up/down variations of pt and fill a histogram with a quantity that depends on pt.
842 /// We automatically obtain three histograms in output ("nominal", "pt:up" and "pt:down"):
843 /// ~~~{.cpp}
844 /// auto nominal_hx =
845 /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, {"down", "up"})
846 /// .Filter("pt > k")
847 /// .Define("x", someFunc, {"pt"})
848 /// .Histo1D("x");
849 ///
850 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
851 /// hx["nominal"].Draw();
852 /// hx["pt:down"].Draw("SAME");
853 /// hx["pt:up"].Draw("SAME");
854 /// ~~~
855 /// RDataFrame computes all variations as part of a single loop over the data.
856 /// In particular, this means that I/O and computation of values shared
857 /// among variations only happen once for all variations. Thus, the event loop
858 /// run-time typically scales much better than linearly with the number of
859 /// variations.
860 ///
861 /// RDataFrame lazily computes the varied values required to produce the
862 /// outputs of \ref ROOT::RDF::Experimental::VariationsFor "VariationsFor()". If \ref
863 /// ROOT::RDF::Experimental::VariationsFor "VariationsFor()" was not called for a result, the computations are only
864 /// run for the nominal case.
865 ///
866 /// See other overloads for examples when variations are added for multiple existing columns,
867 /// or when the tags are auto-generated instead of being directly defined.
868 template <typename F>
869 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
870 const std::vector<std::string> &variationTags, std::string_view variationName = "")
871 {
872 std::vector<std::string> colNames{{std::string(colName)}};
873 const std::string theVariationName{variationName.empty() ? colName : variationName};
874
875 return VaryImpl<true>(std::move(colNames), std::forward<F>(expression), inputColumns, variationTags,
876 theVariationName);
877 }
878
879 /// \brief Register systematic variations for a single existing column using auto-generated variation tags.
880 /// \param[in] colName name of the column for which varied values are provided.
881 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
882 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
883 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
884 /// \param[in] inputColumns the names of the columns to be passed to the callable.
885 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
886 /// `"1"`, etc.
887 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
888 /// colName is used if none is provided.
889 ///
890 /// This overload of Vary takes an nVariations parameter instead of a list of tag names.
891 /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N`
892 /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`.
893 ///
894 /// Example usage:
895 /// ~~~{.cpp}
896 /// auto nominal_hx =
897 /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, 2)
898 /// .Histo1D("x");
899 ///
900 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
901 /// hx["nominal"].Draw();
902 /// hx["x:0"].Draw("SAME");
903 /// hx["x:1"].Draw("SAME");
904 /// ~~~
905 ///
906 /// \note See also This Vary() overload for more information.
907 template <typename F>
908 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
909 std::size_t nVariations, std::string_view variationName = "")
910 {
911 R__ASSERT(nVariations > 0 && "Must have at least one variation.")do { if (__builtin_expect(!!(!(nVariations > 0 && "Must have at least one variation."
)), 0)) ::Fatal("", kAssertMsg, "nVariations > 0 && \"Must have at least one variation.\""
, 911, "/cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/root/ROOT/RDF/RInterface.hxx"
); } while (false)
;
912
913 std::vector<std::string> variationTags;
914 variationTags.reserve(nVariations);
915 for (std::size_t i = 0u; i < nVariations; ++i)
916 variationTags.emplace_back(std::to_string(i));
917
918 const std::string theVariationName{variationName.empty() ? colName : variationName};
919
920 return Vary(colName, std::forward<F>(expression), inputColumns, std::move(variationTags), theVariationName);
921 }
922
923 /// \brief Register systematic variations for multiple existing columns using custom variation tags.
924 /// \param[in] colNames set of names of the columns for which varied values are provided.
925 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
926 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
927 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
928 /// \param[in] inputColumns the names of the columns to be passed to the callable.
929 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
930 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`
931 ///
932 /// This overload of Vary takes a list of column names as first argument and
933 /// requires that the expression returns an RVec of RVecs of values: one inner RVec for the variations of each
934 /// affected column. The `variationTags` are defined as `{"down", "up"}`.
935 ///
936 /// Example usage:
937 /// ~~~{.cpp}
938 /// // produce variations "ptAndEta:down" and "ptAndEta:up"
939 /// auto nominal_hx =
940 /// df.Vary({"pt", "eta"}, // the columns that will vary simultaneously
941 /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
942 /// {"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied
943 /// {"down", "up"}, // variation tags
944 /// "ptAndEta") // variation name
945 /// .Histo1D("pt", "eta");
946 ///
947 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
948 /// hx["nominal"].Draw();
949 /// hx["ptAndEta:down"].Draw("SAME");
950 /// hx["ptAndEta:up"].Draw("SAME");
951 /// ~~~
952 ///
953 /// \note See also This Vary() overload for more information.
954
955 template <typename F>
956 RInterface<Proxied, DS_t>
957 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
958 const std::vector<std::string> &variationTags, std::string_view variationName)
959 {
960 return VaryImpl<false>(colNames, std::forward<F>(expression), inputColumns, variationTags, variationName);
961 }
962
963 /// \brief Register systematic variations for multiple existing columns using custom variation tags.
964 /// \param[in] colNames set of names of the columns for which varied values are provided.
965 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
966 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
967 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
968 /// \param[in] inputColumns the names of the columns to be passed to the callable.
969 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
970 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
971 /// colName is used if none is provided.
972 ///
973 /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
974 /// is avoided.
975 ///
976 /// \note See also This Vary() overload for more information.
977 template <typename F>
978 RInterface<Proxied, DS_t>
979 Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns,
980 const std::vector<std::string> &variationTags, std::string_view variationName)
981 {
982 return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, variationTags, variationName);
983 }
984
985 /// \brief Register systematic variations for multiple existing columns using auto-generated tags.
986 /// \param[in] colNames set of names of the columns for which varied values are provided.
987 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
988 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
989 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
990 /// \param[in] inputColumns the names of the columns to be passed to the callable.
991 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
992 /// `"1"`, etc.
993 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
994 /// colName is used if none is provided.
995 ///
996 /// This overload of Vary takes a list of column names as first argument.
997 /// It takes an `nVariations` parameter instead of a list of tag names (`variationTags`). Tag names
998 /// will be auto-generated as the sequence 0...``nVariations-1``.
999 ///
1000 /// Example usage:
1001 /// ~~~{.cpp}
1002 /// auto nominal_hx =
1003 /// df.Vary({"pt", "eta"}, // the columns that will vary simultaneously
1004 /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
1005 /// {"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied
1006 /// 2, // auto-generated variation tags
1007 /// "ptAndEta") // variation name
1008 /// .Histo1D("pt", "eta");
1009 ///
1010 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1011 /// hx["nominal"].Draw();
1012 /// hx["ptAndEta:0"].Draw("SAME");
1013 /// hx["ptAndEta:1"].Draw("SAME");
1014 /// ~~~
1015 ///
1016 /// \note See also This Vary() overload for more information.
1017 template <typename F>
1018 RInterface<Proxied, DS_t>
1019 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
1020 std::size_t nVariations, std::string_view variationName)
1021 {
1022 R__ASSERT(nVariations > 0 && "Must have at least one variation.")do { if (__builtin_expect(!!(!(nVariations > 0 && "Must have at least one variation."
)), 0)) ::Fatal("", kAssertMsg, "nVariations > 0 && \"Must have at least one variation.\""
, 1022, "/cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/root/ROOT/RDF/RInterface.hxx"
); } while (false)
;
1023
1024 std::vector<std::string> variationTags;
1025 variationTags.reserve(nVariations);
1026 for (std::size_t i = 0u; i < nVariations; ++i)
1027 variationTags.emplace_back(std::to_string(i));
1028
1029 return Vary(colNames, std::forward<F>(expression), inputColumns, std::move(variationTags), variationName);
1030 }
1031
1032 /// \brief Register systematic variations for for multiple existing columns using custom variation tags.
1033 /// \param[in] colNames set of names of the columns for which varied values are provided.
1034 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
1035 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
1036 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
1037 /// \param[in] inputColumns the names of the columns to be passed to the callable.
1038 /// \param[in] inputColumns the names of the columns to be passed to the callable.
1039 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1040 /// `"1"`, etc.
1041 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1042 /// colName is used if none is provided.
1043 ///
1044 /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
1045 /// is avoided.
1046 ///
1047 /// \note See also This Vary() overload for more information.
1048 template <typename F>
1049 RInterface<Proxied, DS_t>
1050 Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns,
1051 std::size_t nVariations, std::string_view variationName)
1052 {
1053 return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, nVariations, variationName);
1054 }
1055
1056 /// \brief Register systematic variations for a single existing column using custom variation tags.
1057 /// \param[in] colName name of the column for which varied values are provided.
1058 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
1059 /// values for the specified column.
1060 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
1061 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1062 /// colName is used if none is provided.
1063 ///
1064 /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
1065 /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are
1066 /// defined as `{"down", "up"}`.
1067 /// ~~~{.cpp}
1068 /// auto nominal_hx =
1069 /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", {"down", "up"})
1070 /// .Filter("pt > k")
1071 /// .Define("x", someFunc, {"pt"})
1072 /// .Histo1D("x");
1073 ///
1074 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1075 /// hx["nominal"].Draw();
1076 /// hx["pt:down"].Draw("SAME");
1077 /// hx["pt:up"].Draw("SAME");
1078 /// ~~~
1079 ///
1080 /// \note See also This Vary() overload for more information.
1081 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression,
1082 const std::vector<std::string> &variationTags, std::string_view variationName = "")
1083 {
1084 std::vector<std::string> colNames{{std::string(colName)}};
1085 const std::string theVariationName{variationName.empty() ? colName : variationName};
1086
1087 return JittedVaryImpl(colNames, expression, variationTags, theVariationName, /*isSingleColumn=*/true);
1088 }
1089
1090 /// \brief Register systematic variations for a single existing column using auto-generated variation tags.
1091 /// \param[in] colName name of the column for which varied values are provided.
1092 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
1093 /// values for the specified column.
1094 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1095 /// `"1"`, etc.
1096 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1097 /// colName is used if none is provided.
1098 ///
1099 /// This overload adds the possibility for the expression used to evaluate the varied values to be a just-in-time
1100 /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are
1101 /// auto-generated.
1102 /// ~~~{.cpp}
1103 /// auto nominal_hx =
1104 /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", 2)
1105 /// .Histo1D("pt");
1106 ///
1107 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1108 /// hx["nominal"].Draw();
1109 /// hx["pt:0"].Draw("SAME");
1110 /// hx["pt:1"].Draw("SAME");
1111 /// ~~~
1112 ///
1113 /// \note See also This Vary() overload for more information.
1114 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, std::size_t nVariations,
1115 std::string_view variationName = "")
1116 {
1117 std::vector<std::string> variationTags;
1118 variationTags.reserve(nVariations);
1119 for (std::size_t i = 0u; i < nVariations; ++i)
1120 variationTags.emplace_back(std::to_string(i));
1121
1122 return Vary(colName, expression, std::move(variationTags), variationName);
1123 }
1124
1125 /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags.
1126 /// \param[in] colNames set of names of the columns for which varied values are provided.
1127 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
1128 /// values for the specified columns.
1129 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1130 /// `"1"`, etc.
1131 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1132 ///
1133 /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
1134 /// compiled. It takes an nVariations parameter instead of a list of tag names.
1135 /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N`
1136 /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`.
1137 /// The example below shows how Vary() is used while dealing with multiple columns.
1138 ///
1139 /// ~~~{.cpp}
1140 /// auto nominal_hx =
1141 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy")
1142 /// .Histo1D("x", "y");
1143 ///
1144 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1145 /// hx["nominal"].Draw();
1146 /// hx["xy:0"].Draw("SAME");
1147 /// hx["xy:1"].Draw("SAME");
1148 /// ~~~
1149 ///
1150 /// \note See also This Vary() overload for more information.
1151 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
1152 std::size_t nVariations, std::string_view variationName)
1153 {
1154 std::vector<std::string> variationTags;
1155 variationTags.reserve(nVariations);
1156 for (std::size_t i = 0u; i < nVariations; ++i)
1157 variationTags.emplace_back(std::to_string(i));
1158
1159 return Vary(colNames, expression, std::move(variationTags), variationName);
1160 }
1161
1162 /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags.
1163 /// \param[in] colNames set of names of the columns for which varied values are provided.
1164 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
1165 /// values for the specified column.
1166 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1167 /// `"1"`, etc.
1168 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1169 /// colName is used if none is provided.
1170 ///
1171 /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
1172 /// is avoided.
1173 ///
1174 /// \note See also This Vary() overload for more information.
1175 RInterface<Proxied, DS_t> Vary(std::initializer_list<std::string> colNames, std::string_view expression,
1176 std::size_t nVariations, std::string_view variationName)
1177 {
1178 return Vary(std::vector<std::string>(colNames), expression, nVariations, variationName);
1179 }
1180
1181 /// \brief Register systematic variations for multiple existing columns using custom variation tags.
1182 /// \param[in] colNames set of names of the columns for which varied values are provided.
1183 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
1184 /// values for the specified columns.
1185 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
1186 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1187 ///
1188 /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
1189 /// compiled. The example below shows how Vary() is used while dealing with multiple columns. The tags are defined as
1190 /// `{"down", "up"}`.
1191 /// ~~~{.cpp}
1192 /// auto nominal_hx =
1193 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy")
1194 /// .Histo1D("x", "y");
1195 ///
1196 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1197 /// hx["nominal"].Draw();
1198 /// hx["xy:down"].Draw("SAME");
1199 /// hx["xy:up"].Draw("SAME");
1200 /// ~~~
1201 ///
1202 /// \note See also This Vary() overload for more information.
1203 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
1204 const std::vector<std::string> &variationTags, std::string_view variationName)
1205 {
1206 return JittedVaryImpl(colNames, expression, variationTags, variationName, /*isSingleColumn=*/false);
1207 }
1208
1209 ////////////////////////////////////////////////////////////////////////////
1210 /// \brief Allow to refer to a column with a different name.
1211 /// \param[in] alias name of the column alias
1212 /// \param[in] columnName of the column to be aliased
1213 /// \return the first node of the computation graph for which the alias is available.
1214 ///
1215 /// Aliasing an alias is supported.
1216 ///
1217 /// ### Example usage:
1218 /// ~~~{.cpp}
1219 /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
1220 /// ~~~
1221 RInterface<Proxied, DS_t> Alias(std::string_view alias, std::string_view columnName)
1222 {
1223 // The symmetry with Define is clear. We want to:
1224 // - Create globally the alias and return this very node, unchanged
1225 // - Make aliases accessible based on chains and not globally
1226
1227 // Helper to find out if a name is a column
1228 auto &dsColumnNames = GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{};
1229
1230 constexpr auto where = "Alias";
1231 RDFInternal::CheckValidCppVarName(alias, where);
1232 // If the alias name is a column name, there is a problem
1233 RDFInternal::CheckForRedefinition(where, alias, fColRegister, fLoopManager->GetBranchNames(), dsColumnNames);
1234
1235 const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
1236
1237 RDFInternal::RColumnRegister newCols(fColRegister);
1238 newCols.AddAlias(alias, validColumnName);
1239
1240 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
1241
1242 return newInterface;
1243 }
1244
1245 ////////////////////////////////////////////////////////////////////////////
1246 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1247 /// \tparam ColumnTypes variadic list of branch/column types.
1248 /// \param[in] treename The name of the output TTree.
1249 /// \param[in] filename The name of the output TFile.
1250 /// \param[in] columnList The list of names of the columns/branches to be written.
1251 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1252 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1253 ///
1254 /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
1255 /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
1256 /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
1257 /// written out and it appears before the array in the columnList.
1258 ///
1259 /// By default, in case of TTree or TChain inputs, Snapshot will try to write out all top-level branches. For other
1260 /// types of inputs, all columns returned by GetColumnNames() will be written out. If friend trees or chains are
1261 /// present, by default all friend top-level branches that have names that do not collide with
1262 /// names of branches in the main TTree/TChain will be written out. Since v6.24, Snapshot will also write out
1263 /// friend branches with the same names of branches in the main TTree/TChain with names of the form
1264 /// `<friendname>_<branchname>` in order to differentiate them from the branches in the main tree/chain.
1265 ///
1266 /// ### Writing to a sub-directory
1267 ///
1268 /// Snapshot supports writing the TTree in a sub-directory inside the TFile. It is sufficient to specify the path to
1269 /// the TTree as part of the TTree name, e.g. `df.Snapshot("subdir/t", "f.root")` write TTree `t` in the
1270 /// sub-directory `subdir` of file `f.root` (creating file and sub-directory as needed).
1271 ///
1272 /// \attention In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of
1273 /// entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled
1274 /// with respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in
1275 /// wrong associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will
1276 /// error out if such a "shuffled" TTree is used in a friendship.
1277 ///
1278 /// \note In case no events are written out (e.g. because no event passes all filters), Snapshot will still write the
1279 /// requested output TTree to the file, with all the branches requested to preserve the dataset schema.
1280 ///
1281 /// \note Snapshot will refuse to process columns with names of the form `#columnname`. These are special columns
1282 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1283 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1284 /// Alias(): `df.Alias("nbar", "#bar").Snapshot(..., {"nbar"})`.
1285 ///
1286 /// ### Example invocations:
1287 ///
1288 /// ~~~{.cpp}
1289 /// // without specifying template parameters (column types automatically deduced)
1290 /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
1291 ///
1292 /// // specifying template parameters ("x" is `int`, "y" is `float`)
1293 /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
1294 /// ~~~
1295 ///
1296 /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
1297 /// `RSnapshotOptions`:
1298 /// ~~~{.cpp}
1299 /// RSnapshotOptions opts;
1300 /// opts.fLazy = true;
1301 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
1302 /// ~~~
1303 template <typename... ColumnTypes>
1304 RResultPtr<RInterface<RLoopManager>>
1305 Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
1306 const RSnapshotOptions &options = RSnapshotOptions())
1307 {
1308 return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
1309 }
1310
1311 ////////////////////////////////////////////////////////////////////////////
1312 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1313 /// \param[in] treename The name of the output TTree.
1314 /// \param[in] filename The name of the output TFile.
1315 /// \param[in] columnList The list of names of the columns/branches to be written.
1316 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1317 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1318 ///
1319 /// This function returns a `RDataFrame` built with the output tree as a source.
1320 /// The types of the columns are automatically inferred and do not need to be specified.
1321 ///
1322 /// See above for a more complete description and example usages.
1323 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1324 const ColumnNames_t &columnList,
1325 const RSnapshotOptions &options = RSnapshotOptions())
1326 {
1327 // like columnList but with `#var` columns removed
1328 auto colListNoPoundSizes = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
1329 // like columnListWithoutSizeColumns but with aliases resolved
1330 auto colListNoAliases = GetValidatedColumnNames(colListNoPoundSizes.size(), colListNoPoundSizes);
1331 RDFInternal::CheckForDuplicateSnapshotColumns(colListNoAliases);
1332 // like validCols but with missing size branches required by array branches added in the right positions
1333 const auto pairOfColumnLists = RDFInternal::AddSizeBranches(
1334 fLoopManager->GetBranchNames(), GetDataSource(), std::move(colListNoAliases), std::move(colListNoPoundSizes));
1335 const auto &colListNoAliasesWithSizeBranches = pairOfColumnLists.first;
1336 const auto &colListWithAliasesAndSizeBranches = pairOfColumnLists.second;
1337
1338 const auto fullTreeName = treename;
1339 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
1340 treename = parsedTreePath.fTreeName;
1341 const auto &dirname = parsedTreePath.fDirName;
1342
1343 ::TDirectory::TContext ctxt;
1344
1345 RResultPtr<RInterface<RLoopManager>> resPtr;
1346
1347 if (options.fOutputFormat == ESnapshotOutputFormat::kRNTuple) {
1348 if (RDFInternal::GetDataSourceLabel(*this) == "TTreeDS") {
1349 throw std::runtime_error("Snapshotting from TTree to RNTuple is not yet supported. The current recommended "
1350 "way to convert TTrees to RNTuple is through the RNTupleImporter.");
1351 }
1352
1353 // The data source of the RNTuple resulting from the Snapshot action does not exist yet here, so we create one
1354 // without a data source for now, and set it once the actual data source can be created (i.e., after
1355 // writing the RNTuple).
1356 auto newRDF = std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(colListNoPoundSizes));
1357
1358 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
1359 std::string(filename), std::string(dirname), std::string(treename), colListWithAliasesAndSizeBranches,
1360 options, newRDF->GetLoopManager(), GetLoopManager(), true /* fToNTuple */});
1361
1362 // The Snapshot helper will use colListNoAliasesWithSizeBranches (with aliases resolved) as input columns, and
1363 // colListWithAliasesAndSizeBranches (still with aliases in it, passed through snapHelperArgs) as output column
1364 // names.
1365 resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>(
1366 colListNoAliasesWithSizeBranches, newRDF, snapHelperArgs, fProxiedPtr,
1367 colListNoAliasesWithSizeBranches.size());
1368 } else {
1369 if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS" &&
1370 options.fOutputFormat == ESnapshotOutputFormat::kDefault) {
1371 Warning("Snapshot",
1372 "The default Snapshot output data format is TTree, but the input data format is RNTuple. If you "
1373 "want to Snapshot to RNTuple or suppress this warning, set the appropriate fOutputFormat option in "
1374 "RSnapshotOptions. Note that this current default behaviour might change in the future.");
1375 }
1376
1377 // We create an RLoopManager without a data source. This needs to be initialised when the output TTree dataset
1378 // has actually been created and written to TFile, i.e. at the end of the Snapshot execution.
1379 auto newRDF = std::make_shared<RInterface<RLoopManager>>(
1380 std::make_shared<RLoopManager>(colListNoAliasesWithSizeBranches));
1381
1382 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
1383 std::string(filename), std::string(dirname), std::string(treename), colListWithAliasesAndSizeBranches,
1384 options, newRDF->GetLoopManager(), GetLoopManager(), false /* fToRNTuple */});
1385
1386 resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>(
1387 colListNoAliasesWithSizeBranches, newRDF, snapHelperArgs, fProxiedPtr,
1388 colListNoAliasesWithSizeBranches.size(), options.fVector2RVec);
1389 }
1390
1391 if (!options.fLazy)
1392 *resPtr;
1393 return resPtr;
1394 }
1395
1396 // clang-format off
1397 ////////////////////////////////////////////////////////////////////////////
1398 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1399 /// \param[in] treename The name of the output TTree.
1400 /// \param[in] filename The name of the output TFile.
1401 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1402 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
1403 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1404 ///
1405 /// This function returns a `RDataFrame` built with the output tree as a source.
1406 /// The types of the columns are automatically inferred and do not need to be specified.
1407 ///
1408 /// See above for a more complete description and example usages.
1409 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1410 std::string_view columnNameRegexp = "",
1411 const RSnapshotOptions &options = RSnapshotOptions())
1412 {
1413 const auto definedColumns = fColRegister.GenerateColumnNames();
1414 auto *tree = fLoopManager->GetTree();
1415
1416 const auto treeBranchNames = tree != nullptr ? ROOT::Internal::TreeUtils::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
1417 const auto dsColumns = GetDataSource() ? ROOT::Internal::RDF::GetTopLevelFieldNames(*GetDataSource()) : ColumnNames_t{};
1418 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1419 ColumnNames_t dsColumnsWithoutSizeColumns;
1420 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1421 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1422 ColumnNames_t columnNames;
1423 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumnsWithoutSizeColumns.size());
1424 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1425 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1426 columnNames.insert(columnNames.end(), dsColumnsWithoutSizeColumns.begin(), dsColumnsWithoutSizeColumns.end());
1427
1428 // The only way we can get duplicate entries is if a column coming from a tree or data-source is Redefine'd.
1429 // RemoveDuplicates should preserve ordering of the columns: it might be meaningful.
1430 RDFInternal::RemoveDuplicates(columnNames);
1431
1432 auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot");
1433
1434 if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS") {
1435 RDFInternal::RemoveRNTupleSubFields(selectedColumns);
1436 }
1437
1438 return Snapshot(treename, filename, selectedColumns, options);
1439 }
1440 // clang-format on
1441
1442 // clang-format off
1443 ////////////////////////////////////////////////////////////////////////////
1444 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1445 /// \param[in] treename The name of the output TTree.
1446 /// \param[in] filename The name of the output TFile.
1447 /// \param[in] columnList The list of names of the columns/branches to be written.
1448 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1449 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1450 ///
1451 /// This function returns a `RDataFrame` built with the output tree as a source.
1452 /// The types of the columns are automatically inferred and do not need to be specified.
1453 ///
1454 /// See above for a more complete description and example usages.
1455 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1456 std::initializer_list<std::string> columnList,
1457 const RSnapshotOptions &options = RSnapshotOptions())
1458 {
1459 ColumnNames_t selectedColumns(columnList);
1460 return Snapshot(treename, filename, selectedColumns, options);
1461 }
1462 // clang-format on
1463
1464 ////////////////////////////////////////////////////////////////////////////
1465 /// \brief Save selected columns in memory.
1466 /// \tparam ColumnTypes variadic list of branch/column types.
1467 /// \param[in] columnList columns to be cached in memory.
1468 /// \return a `RDataFrame` that wraps the cached dataset.
1469 ///
1470 /// This action returns a new `RDataFrame` object, completely detached from
1471 /// the originating `RDataFrame`. The new dataframe only contains the cached
1472 /// columns and stores their content in memory for fast, zero-copy subsequent access.
1473 ///
1474 /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
1475 /// fits in memory and that will be accessed many times.
1476 ///
1477 /// \note Cache will refuse to process columns with names of the form `#columnname`. These are special columns
1478 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1479 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1480 /// Alias(): `df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"})`.
1481 ///
1482 /// ### Example usage:
1483 ///
1484 /// **Types and columns specified:**
1485 /// ~~~{.cpp}
1486 /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
1487 /// ~~~
1488 ///
1489 /// **Types inferred and columns specified (this invocation relies on jitting):**
1490 /// ~~~{.cpp}
1491 /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
1492 /// ~~~
1493 ///
1494 /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
1495 /// ~~~{.cpp}
1496 /// auto cache_all_cols_df = df.Cache(myRegexp);
1497 /// ~~~
1498 template <typename... ColumnTypes>
1499 RInterface<RLoopManager> Cache(const ColumnNames_t &columnList)
1500 {
1501 auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
1502 return CacheImpl<ColumnTypes...>(columnList, staticSeq);
1503 }
1504
1505 ////////////////////////////////////////////////////////////////////////////
1506 /// \brief Save selected columns in memory.
1507 /// \param[in] columnList columns to be cached in memory
1508 /// \return a `RDataFrame` that wraps the cached dataset.
1509 ///
1510 /// See the previous overloads for more information.
1511 RInterface<RLoopManager> Cache(const ColumnNames_t &columnList)
1512 {
1513 // Early return: if the list of columns is empty, just return an empty RDF
1514 // If we proceed, the jitted call will not compile!
1515 if (columnList.empty()) {
1516 auto nEntries = *this->Count();
1517 RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
1518 return emptyRDF;
1519 }
1520
1521 std::stringstream cacheCall;
1522 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
1523 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
1524 fColRegister);
1525 // build a string equivalent to
1526 // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
1527 RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
1528 cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
1529 << RDFInternal::PrettyPrintAddr(&resRDF)
1530 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
1531 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
1532
1533 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Cache");
1534
1535 const auto validColumnNames =
1536 GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
1537 const auto colTypes = GetValidatedArgTypes(validColumnNames, fColRegister, fLoopManager->GetTree(),
1538 GetDataSource(), "Cache", /*vector2RVec=*/false);
1539 for (const auto &colType : colTypes)
1540 cacheCall << colType << ", ";
1541 if (!columnListWithoutSizeColumns.empty())
1542 cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
1543 cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
1544 << RDFInternal::PrettyPrintAddr(&columnListWithoutSizeColumns) << "));";
1545
1546 // book the code to jit with the RLoopManager and trigger the event loop
1547 fLoopManager->ToJitExec(cacheCall.str());
1548 fLoopManager->Jit();
1549
1550 return resRDF;
1551 }
1552
1553 ////////////////////////////////////////////////////////////////////////////
1554 /// \brief Save selected columns in memory.
1555 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1556 /// \return a `RDataFrame` that wraps the cached dataset.
1557 ///
1558 /// The existing columns are matched against the regular expression. If the string provided
1559 /// is empty, all columns are selected. See the previous overloads for more information.
1560 RInterface<RLoopManager> Cache(std::string_view columnNameRegexp = "")
1561 {
1562 const auto definedColumns = fColRegister.GenerateColumnNames();
1563 auto *tree = fLoopManager->GetTree();
1564 const auto treeBranchNames =
1565 tree != nullptr ? ROOT::Internal::TreeUtils::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
1566 const auto dsColumns = GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{};
1567 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1568 ColumnNames_t dsColumnsWithoutSizeColumns;
1569 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1570 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1571 ColumnNames_t columnNames;
1572 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumns.size());
1573 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1574 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1575 columnNames.insert(columnNames.end(), dsColumns.begin(), dsColumns.end());
1576 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Cache");
1577 return Cache(selectedColumns);
1578 }
1579
1580 ////////////////////////////////////////////////////////////////////////////
1581 /// \brief Save selected columns in memory.
1582 /// \param[in] columnList columns to be cached in memory.
1583 /// \return a `RDataFrame` that wraps the cached dataset.
1584 ///
1585 /// See the previous overloads for more information.
1586 RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
1587 {
1588 ColumnNames_t selectedColumns(columnList);
1589 return Cache(selectedColumns);
1590 }
1591
1592 // clang-format off
1593 ////////////////////////////////////////////////////////////////////////////
1594 /// \brief Creates a node that filters entries based on range: [begin, end).
1595 /// \param[in] begin Initial entry number considered for this range.
1596 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1597 /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
1598 /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
1599 ///
1600 /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
1601 /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
1602 ///
1603 /// ### Example usage:
1604 /// ~~~{.cpp}
1605 /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
1606 /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
1607 /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
1608 /// ~~~
1609 // clang-format on
1610 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
1611 {
1612 // check invariants
1613 if (stride == 0 || (end != 0 && end < begin))
1614 throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
1615 CheckIMTDisabled("Range");
1616
1617 using Range_t = RDFDetail::RRange<Proxied>;
1618 auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
1619 RInterface<RDFDetail::RRange<Proxied>, DS_t> newInterface(std::move(rangePtr), *fLoopManager, fColRegister);
1620 return newInterface;
1621 }
1622
1623 // clang-format off
1624 ////////////////////////////////////////////////////////////////////////////
1625 /// \brief Creates a node that filters entries based on range.
1626 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1627 /// \return a node of the computation graph for which the range is defined.
1628 ///
1629 /// See the other Range overload for a detailed description.
1630 // clang-format on
1631 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
1632
1633 // clang-format off
1634 ////////////////////////////////////////////////////////////////////////////
1635 /// \brief Execute a user-defined function on each entry (*instant action*).
1636 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1637 /// \param[in] columns Names of the columns/branches in input to the user function.
1638 ///
1639 /// The callable `f` is invoked once per entry. This is an *instant action*:
1640 /// upon invocation, an event loop as well as execution of all scheduled actions
1641 /// is triggered.
1642 /// Users are responsible for the thread-safety of this callable when executing
1643 /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
1644 ///
1645 /// ### Example usage:
1646 /// ~~~{.cpp}
1647 /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
1648 /// ~~~
1649 // clang-format on
1650 template <typename F>
1651 void Foreach(F f, const ColumnNames_t &columns = {})
1652 {
1653 using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
1654 using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
1655 ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
1656 }
1657
1658 // clang-format off
1659 ////////////////////////////////////////////////////////////////////////////
1660 /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*).
1661 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1662 /// \param[in] columns Names of the columns/branches in input to the user function.
1663 ///
1664 /// Same as `Foreach`, but the user-defined function takes an extra
1665 /// `unsigned int` as its first parameter, the *processing slot index*.
1666 /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
1667 /// for each thread of execution.
1668 /// This is meant as a helper in writing thread-safe `Foreach`
1669 /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
1670 /// The user-defined processing callable is able to follow different
1671 /// *streams of processing* indexed by the first parameter.
1672 /// `ForeachSlot` works just as well with single-thread execution: in that
1673 /// case `slot` will always be `0`.
1674 ///
1675 /// ### Example usage:
1676 /// ~~~{.cpp}
1677 /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
1678 /// ~~~
1679 // clang-format on
1680 template <typename F>
1681 void ForeachSlot(F f, const ColumnNames_t &columns = {})
1682 {
1683 using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>;
1684 constexpr auto nColumns = ColTypes_t::list_size;
1685
1686 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
1687 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
1688
1689 using Helper_t = RDFInternal::ForeachSlotHelper<F>;
1690 using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1691
1692 auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, fColRegister);
1693
1694 fLoopManager->Run();
1695 }
1696
1697 // clang-format off
1698 ////////////////////////////////////////////////////////////////////////////
1699 /// \brief Execute a user-defined reduce operation on the values of a column.
1700 /// \tparam F The type of the reduce callable. Automatically deduced.
1701 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1702 /// \param[in] f A callable with signature `T(T,T)`
1703 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1704 /// \return the reduced quantity wrapped in a ROOT::RDF:RResultPtr.
1705 ///
1706 /// A reduction takes two values of a column and merges them into one (e.g.
1707 /// by summing them, taking the maximum, etc). This action performs the
1708 /// specified reduction operation on all processed column values, returning
1709 /// a single value of the same type. The callable f must satisfy the general
1710 /// requirements of a *processing function* besides having signature `T(T,T)`
1711 /// where `T` is the type of column columnName.
1712 ///
1713 /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
1714 /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
1715 /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
1716 /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
1717 /// overload.
1718 ///
1719 /// ### Example usage:
1720 /// ~~~{.cpp}
1721 /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
1722 /// ~~~
1723 ///
1724 /// This action is *lazy*: upon invocation of this method the calculation is
1725 /// booked but not executed. Also see RResultPtr.
1726 // clang-format on
1727 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1728 RResultPtr<T> Reduce(F f, std::string_view columnName = "")
1729 {
1730 static_assert(
1731 std::is_default_constructible<T>::value,
1732 "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
1733 return Reduce(std::move(f), columnName, T());
1734 }
1735
1736 ////////////////////////////////////////////////////////////////////////////
1737 /// \brief Execute a user-defined reduce operation on the values of a column.
1738 /// \tparam F The type of the reduce callable. Automatically deduced.
1739 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1740 /// \param[in] f A callable with signature `T(T,T)`
1741 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1742 /// \param[in] redIdentity The reduced object of each thread is initialized to this value.
1743 /// \return the reduced quantity wrapped in a RResultPtr.
1744 ///
1745 /// ### Example usage:
1746 /// ~~~{.cpp}
1747 /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
1748 /// ~~~
1749 /// See the description of the first Reduce overload for more information.
1750 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1751 RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
1752 {
1753 return Aggregate(f, f, columnName, redIdentity);
1754 }
1755
1756 ////////////////////////////////////////////////////////////////////////////
1757 /// \brief Return the number of entries processed (*lazy action*).
1758 /// \return the number of entries wrapped in a RResultPtr.
1759 ///
1760 /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
1761 /// This action is *lazy*: upon invocation of this method the calculation is
1762 /// booked but not executed. Also see RResultPtr.
1763 ///
1764 /// ### Example usage:
1765 /// ~~~{.cpp}
1766 /// auto nEntriesAfterCuts = myFilteredDf.Count();
1767 /// ~~~
1768 ///
1769 RResultPtr<ULong64_t> Count()
1770 {
1771 const auto nSlots = fLoopManager->GetNSlots();
1772 auto cSPtr = std::make_shared<ULong64_t>(0);
1773 using Helper_t = RDFInternal::CountHelper;
1774 using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1775 auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr,
1776 RDFInternal::RColumnRegister(fColRegister));
1777 return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
1778 }
1779
1780 ////////////////////////////////////////////////////////////////////////////
1781 /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default).
1782 /// \tparam T The type of the column.
1783 /// \tparam COLL The type of collection used to store the values.
1784 /// \param[in] column The name of the column to collect the values of.
1785 /// \return the content of the selected column wrapped in a RResultPtr.
1786 ///
1787 /// The collection type to be specified for C-style array columns is `RVec<T>`:
1788 /// in this case the returned collection is a `std::vector<RVec<T>>`.
1789 /// ### Example usage:
1790 /// ~~~{.cpp}
1791 /// // In this case intCol is a std::vector<int>
1792 /// auto intCol = rdf.Take<int>("integerColumn");
1793 /// // Same content as above but in this case taken as a RVec<int>
1794 /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
1795 /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
1796 /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
1797 /// ~~~
1798 /// This action is *lazy*: upon invocation of this method the calculation is
1799 /// booked but not executed. Also see RResultPtr.
1800 template <typename T, typename COLL = std::vector<T>>
1801 RResultPtr<COLL> Take(std::string_view column = "")
1802 {
1803 const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
1804
1805 const auto validColumnNames = GetValidatedColumnNames(1, columns);
1806 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
1807
1808 using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
1809 using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1810 auto valuesPtr = std::make_shared<COLL>();
1811 const auto nSlots = fLoopManager->GetNSlots();
1812
1813 auto action =
1814 std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, fColRegister);
1815 return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
1816 }
1817
1818 ////////////////////////////////////////////////////////////////////////////
1819 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1820 /// \tparam V The type of the column used to fill the histogram.
1821 /// \param[in] model The returned histogram will be constructed using this as a model.
1822 /// \param[in] vName The name of the column that will fill the histogram.
1823 /// \return the monodimensional histogram wrapped in a RResultPtr.
1824 ///
1825 /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
1826 /// is filled with each one of the elements of the container. In case multiple columns of container type
1827 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1828 /// possibly different lengths between events).
1829 /// This action is *lazy*: upon invocation of this method the calculation is
1830 /// booked but not executed. Also see RResultPtr.
1831 ///
1832 /// ### Example usage:
1833 /// ~~~{.cpp}
1834 /// // Deduce column type (this invocation needs jitting internally)
1835 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1836 /// // Explicit column type
1837 /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1838 /// ~~~
1839 ///
1840 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1841 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1842 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1843 template <typename V = RDFDetail::RInferredType>
1844 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
1845 {
1846 const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
1847
1848 const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
1849
1850 std::shared_ptr<::TH1D> h(nullptr);
1851 {
1852 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1853 h = model.GetHistogram();
1854 h->SetDirectory(nullptr);
1855 }
1856
1857 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
1858 RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
1859 return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h, h, fProxiedPtr);
1860 }
1861
1862 ////////////////////////////////////////////////////////////////////////////
1863 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1864 /// \tparam V The type of the column used to fill the histogram.
1865 /// \param[in] vName The name of the column that will fill the histogram.
1866 /// \return the monodimensional histogram wrapped in a RResultPtr.
1867 ///
1868 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1869 /// The "name" and "title" strings are built starting from the input column name.
1870 /// See the description of the first Histo1D() overload for more details.
1871 ///
1872 /// ### Example usage:
1873 /// ~~~{.cpp}
1874 /// // Deduce column type (this invocation needs jitting internally)
1875 /// auto myHist1 = myDf.Histo1D("myColumn");
1876 /// // Explicit column type
1877 /// auto myHist2 = myDf.Histo1D<float>("myColumn");
1878 /// ~~~
1879 template <typename V = RDFDetail::RInferredType>
1880 RResultPtr<::TH1D> Histo1D(std::string_view vName)
1881 {
1882 const auto h_name = std::string(vName);
1883 const auto h_title = h_name + ";" + h_name + ";count";
1884 return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
1885 }
1886
1887 ////////////////////////////////////////////////////////////////////////////
1888 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1889 /// \tparam V The type of the column used to fill the histogram.
1890 /// \tparam W The type of the column used as weights.
1891 /// \param[in] model The returned histogram will be constructed using this as a model.
1892 /// \param[in] vName The name of the column that will fill the histogram.
1893 /// \param[in] wName The name of the column that will provide the weights.
1894 /// \return the monodimensional histogram wrapped in a RResultPtr.
1895 ///
1896 /// See the description of the first Histo1D() overload for more details.
1897 ///
1898 /// ### Example usage:
1899 /// ~~~{.cpp}
1900 /// // Deduce column type (this invocation needs jitting internally)
1901 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1902 /// // Explicit column type
1903 /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1904 /// ~~~
1905 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1906 RResultPtr<::TH1D> Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
1907 {
1908 const std::vector<std::string_view> columnViews = {vName, wName};
1909 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1910 ? ColumnNames_t()
1911 : ColumnNames_t(columnViews.begin(), columnViews.end());
1912 std::shared_ptr<::TH1D> h(nullptr);
1913 {
1914 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1915 h = model.GetHistogram();
1916 }
1917 return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h, h, fProxiedPtr);
1918 }
1919
1920 ////////////////////////////////////////////////////////////////////////////
1921 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1922 /// \tparam V The type of the column used to fill the histogram.
1923 /// \tparam W The type of the column used as weights.
1924 /// \param[in] vName The name of the column that will fill the histogram.
1925 /// \param[in] wName The name of the column that will provide the weights.
1926 /// \return the monodimensional histogram wrapped in a RResultPtr.
1927 ///
1928 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1929 /// The "name" and "title" strings are built starting from the input column names.
1930 /// See the description of the first Histo1D() overload for more details.
1931 ///
1932 /// ### Example usage:
1933 /// ~~~{.cpp}
1934 /// // Deduce column types (this invocation needs jitting internally)
1935 /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1936 /// // Explicit column types
1937 /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1938 /// ~~~
1939 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1940 RResultPtr<::TH1D> Histo1D(std::string_view vName, std::string_view wName)
1941 {
1942 // We build name and title based on the value and weight column names
1943 std::string str_vName{vName};
1944 std::string str_wName{wName};
1945 const auto h_name = str_vName + "_weighted_" + str_wName;
1946 const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
1947 return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1948 }
1949
1950 ////////////////////////////////////////////////////////////////////////////
1951 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1952 /// \tparam V The type of the column used to fill the histogram.
1953 /// \tparam W The type of the column used as weights.
1954 /// \param[in] model The returned histogram will be constructed using this as a model.
1955 /// \return the monodimensional histogram wrapped in a RResultPtr.
1956 ///
1957 /// This overload will use the first two default columns as column names.
1958 /// See the description of the first Histo1D() overload for more details.
1959 template <typename V, typename W>
1960 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1961 {
1962 return Histo1D<V, W>(model, "", "");
1963 }
1964
1965 ////////////////////////////////////////////////////////////////////////////
1966 /// \brief Fill and return a two-dimensional histogram (*lazy action*).
1967 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1968 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1969 /// \param[in] model The returned histogram will be constructed using this as a model.
1970 /// \param[in] v1Name The name of the column that will fill the x axis.
1971 /// \param[in] v2Name The name of the column that will fill the y axis.
1972 /// \return the bidimensional histogram wrapped in a RResultPtr.
1973 ///
1974 /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1975 /// is filled with each one of the elements of the container. In case multiple columns of container type
1976 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1977 /// possibly different lengths between events).
1978 /// This action is *lazy*: upon invocation of this method the calculation is
1979 /// booked but not executed. Also see RResultPtr.
1980 ///
1981 /// ### Example usage:
1982 /// ~~~{.cpp}
1983 /// // Deduce column types (this invocation needs jitting internally)
1984 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1985 /// // Explicit column types
1986 /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1987 /// ~~~
1988 ///
1989 ///
1990 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1991 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1992 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1993 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1994 RResultPtr<::TH2D> Histo2D(const TH2DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
1995 {
1996 std::shared_ptr<::TH2D> h(nullptr);
1997 {
1998 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1999 h = model.GetHistogram();
2000 }
2001 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
2002 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
2003 }
2004 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2005 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2006 ? ColumnNames_t()
2007 : ColumnNames_t(columnViews.begin(), columnViews.end());
2008 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h, h, fProxiedPtr);
2009 }
2010
2011 ////////////////////////////////////////////////////////////////////////////
2012 /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*).
2013 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
2014 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
2015 /// \tparam W The type of the column used for the weights of the histogram.
2016 /// \param[in] model The returned histogram will be constructed using this as a model.
2017 /// \param[in] v1Name The name of the column that will fill the x axis.
2018 /// \param[in] v2Name The name of the column that will fill the y axis.
2019 /// \param[in] wName The name of the column that will provide the weights.
2020 /// \return the bidimensional histogram wrapped in a RResultPtr.
2021 ///
2022 /// This action is *lazy*: upon invocation of this method the calculation is
2023 /// booked but not executed. Also see RResultPtr.
2024 ///
2025 /// ### Example usage:
2026 /// ~~~{.cpp}
2027 /// // Deduce column types (this invocation needs jitting internally)
2028 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
2029 /// // Explicit column types
2030 /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
2031 /// ~~~
2032 ///
2033 /// See the documentation of the first Histo2D() overload for more details.
2034 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2035 typename W = RDFDetail::RInferredType>
2036 RResultPtr<::TH2D>
2037 Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
2038 {
2039 std::shared_ptr<::TH2D> h(nullptr);
2040 {
2041 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2042 h = model.GetHistogram();
2043 }
2044 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
2045 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
2046 }
2047 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2048 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2049 ? ColumnNames_t()
2050 : ColumnNames_t(columnViews.begin(), columnViews.end());
2051 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h, h, fProxiedPtr);
2052 }
2053
2054 template <typename V1, typename V2, typename W>
2055 RResultPtr<::TH2D> Histo2D(const TH2DModel &model)
2056 {
2057 return Histo2D<V1, V2, W>(model, "", "", "");
2058 }
2059
2060 ////////////////////////////////////////////////////////////////////////////
2061 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
2062 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2063 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2064 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2065 /// \param[in] model The returned histogram will be constructed using this as a model.
2066 /// \param[in] v1Name The name of the column that will fill the x axis.
2067 /// \param[in] v2Name The name of the column that will fill the y axis.
2068 /// \param[in] v3Name The name of the column that will fill the z axis.
2069 /// \return the tridimensional histogram wrapped in a RResultPtr.
2070 ///
2071 /// This action is *lazy*: upon invocation of this method the calculation is
2072 /// booked but not executed. Also see RResultPtr.
2073 ///
2074 /// ### Example usage:
2075 /// ~~~{.cpp}
2076 /// // Deduce column types (this invocation needs jitting internally)
2077 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2078 /// "myValueX", "myValueY", "myValueZ");
2079 /// // Explicit column types
2080 /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2081 /// "myValueX", "myValueY", "myValueZ");
2082 /// ~~~
2083 ///
2084 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
2085 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2086 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2087 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2088 typename V3 = RDFDetail::RInferredType>
2089 RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name = "", std::string_view v2Name = "",
2090 std::string_view v3Name = "")
2091 {
2092 std::shared_ptr<::TH3D> h(nullptr);
2093 {
2094 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2095 h = model.GetHistogram();
2096 }
2097 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
2098 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
2099 }
2100 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2101 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2102 ? ColumnNames_t()
2103 : ColumnNames_t(columnViews.begin(), columnViews.end());
2104 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h, h, fProxiedPtr);
2105 }
2106
2107 ////////////////////////////////////////////////////////////////////////////
2108 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
2109 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2110 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2111 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2112 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
2113 /// \param[in] model The returned histogram will be constructed using this as a model.
2114 /// \param[in] v1Name The name of the column that will fill the x axis.
2115 /// \param[in] v2Name The name of the column that will fill the y axis.
2116 /// \param[in] v3Name The name of the column that will fill the z axis.
2117 /// \param[in] wName The name of the column that will provide the weights.
2118 /// \return the tridimensional histogram wrapped in a RResultPtr.
2119 ///
2120 /// This action is *lazy*: upon invocation of this method the calculation is
2121 /// booked but not executed. Also see RResultPtr.
2122 ///
2123 /// ### Example usage:
2124 /// ~~~{.cpp}
2125 /// // Deduce column types (this invocation needs jitting internally)
2126 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2127 /// "myValueX", "myValueY", "myValueZ", "myWeight");
2128 /// // Explicit column types
2129 /// using d_t = double;
2130 /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2131 /// "myValueX", "myValueY", "myValueZ", "myWeight");
2132 /// ~~~
2133 ///
2134 ///
2135 /// See the documentation of the first Histo2D() overload for more details.
2136 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2137 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2138 RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name,
2139 std::string_view v3Name, std::string_view wName)
2140 {
2141 std::shared_ptr<::TH3D> h(nullptr);
2142 {
2143 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2144 h = model.GetHistogram();
2145 }
2146 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
2147 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
2148 }
2149 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2150 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2151 ? ColumnNames_t()
2152 : ColumnNames_t(columnViews.begin(), columnViews.end());
2153 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr);
2154 }
2155
2156 template <typename V1, typename V2, typename V3, typename W>
2157 RResultPtr<::TH3D> Histo3D(const TH3DModel &model)
2158 {
2159 return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
2160 }
2161
2162 ////////////////////////////////////////////////////////////////////////////
2163 /// \brief Fill and return an N-dimensional histogram (*lazy action*).
2164 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not
2165 /// present.
2166 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the
2167 /// object.
2168 /// \param[in] model The returned histogram will be constructed using this as a model.
2169 /// \param[in] columnList
2170 /// A list containing the names of the columns that will be passed when calling `Fill`.
2171 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
2172 /// \return the N-dimensional histogram wrapped in a RResultPtr.
2173 ///
2174 /// This action is *lazy*: upon invocation of this method the calculation is
2175 /// booked but not executed. See RResultPtr documentation.
2176 ///
2177 /// ### Example usage:
2178 /// ~~~{.cpp}
2179 /// auto myFilledObj = myDf.HistoND<float, float, float, float>({"name","title", 4,
2180 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
2181 /// {"col0", "col1", "col2", "col3"});
2182 /// ~~~
2183 ///
2184 template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads
2185 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
2186 {
2187 std::shared_ptr<::THnD> h(nullptr);
2188 {
2189 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2190 h = model.GetHistogram();
2191
2192 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
2193 h->Sumw2();
2194 } else if (int(columnList.size()) != h->GetNdimensions()) {
2195 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
2196 }
2197 }
2198 return CreateAction<RDFInternal::ActionTags::HistoND, FirstColumn, OtherColumns...>(columnList, h, h,
2199 fProxiedPtr);
2200 }
2201
2202 ////////////////////////////////////////////////////////////////////////////
2203 /// \brief Fill and return an N-dimensional histogram (*lazy action*).
2204 /// \param[in] model The returned histogram will be constructed using this as a model.
2205 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2206 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
2207 /// \return the N-dimensional histogram wrapped in a RResultPtr.
2208 ///
2209 /// This action is *lazy*: upon invocation of this method the calculation is
2210 /// booked but not executed. Also see RResultPtr.
2211 ///
2212 /// ### Example usage:
2213 /// ~~~{.cpp}
2214 /// auto myFilledObj = myDf.HistoND({"name","title", 4,
2215 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
2216 /// {"col0", "col1", "col2", "col3"});
2217 /// ~~~
2218 ///
2219 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
2220 {
2221 std::shared_ptr<::THnD> h(nullptr);
2222 {
2223 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2224 h = model.GetHistogram();
2225
2226 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
2227 h->Sumw2();
2228 } else if (int(columnList.size()) != h->GetNdimensions()) {
2229 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
2230 }
2231 }
2232 return CreateAction<RDFInternal::ActionTags::HistoND, RDFDetail::RInferredType>(columnList, h, h, fProxiedPtr,
2233 columnList.size());
2234 }
2235
2236 ////////////////////////////////////////////////////////////////////////////
2237 /// \brief Fill and return a TGraph object (*lazy action*).
2238 /// \tparam X The type of the column used to fill the x axis.
2239 /// \tparam Y The type of the column used to fill the y axis.
2240 /// \param[in] x The name of the column that will fill the x axis.
2241 /// \param[in] y The name of the column that will fill the y axis.
2242 /// \return the TGraph wrapped in a RResultPtr.
2243 ///
2244 /// Columns can be of a container type (e.g. std::vector<double>), in which case the TGraph
2245 /// is filled with each one of the elements of the container.
2246 /// If Multithreading is enabled, the order in which points are inserted is undefined.
2247 /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
2248 /// A name and a title to the TGraph is given based on the input column names.
2249 ///
2250 /// This action is *lazy*: upon invocation of this method the calculation is
2251 /// booked but not executed. Also see RResultPtr.
2252 ///
2253 /// ### Example usage:
2254 /// ~~~{.cpp}
2255 /// // Deduce column types (this invocation needs jitting internally)
2256 /// auto myGraph1 = myDf.Graph("xValues", "yValues");
2257 /// // Explicit column types
2258 /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
2259 /// ~~~
2260 ///
2261 /// \note Differently from other ROOT interfaces, the returned TGraph is not associated to gDirectory
2262 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2263 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2264 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType>
2265 RResultPtr<::TGraph> Graph(std::string_view x = "", std::string_view y = "")
2266 {
2267 auto graph = std::make_shared<::TGraph>();
2268 const std::vector<std::string_view> columnViews = {x, y};
2269 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2270 ? ColumnNames_t()
2271 : ColumnNames_t(columnViews.begin(), columnViews.end());
2272
2273 const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
2274
2275 // We build a default name and title based on the input columns
2276 const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0];
2277 const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0];
2278 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
2279 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
2280 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
2281
2282 return CreateAction<RDFInternal::ActionTags::Graph, X, Y>(validatedColumns, graph, graph, fProxiedPtr);
2283 }
2284
2285 ////////////////////////////////////////////////////////////////////////////
2286 /// \brief Fill and return a TGraphAsymmErrors object (*lazy action*).
2287 /// \param[in] x The name of the column that will fill the x axis.
2288 /// \param[in] y The name of the column that will fill the y axis.
2289 /// \param[in] exl The name of the column of X low errors
2290 /// \param[in] exh The name of the column of X high errors
2291 /// \param[in] eyl The name of the column of Y low errors
2292 /// \param[in] eyh The name of the column of Y high errors
2293 /// \return the TGraphAsymmErrors wrapped in a RResultPtr.
2294 ///
2295 /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
2296 /// is filled with each one of the elements of the container.
2297 /// If Multithreading is enabled, the order in which points are inserted is undefined.
2298 ///
2299 /// This action is *lazy*: upon invocation of this method the calculation is
2300 /// booked but not executed. Also see RResultPtr.
2301 ///
2302 /// ### Example usage:
2303 /// ~~~{.cpp}
2304 /// // Deduce column types (this invocation needs jitting internally)
2305 /// auto myGAE1 = myDf.GraphAsymmErrors("xValues", "yValues", "exl", "exh", "eyl", "eyh");
2306 /// // Explicit column types
2307 /// using f = float
2308 /// auto myGAE2 = myDf.GraphAsymmErrors<f, f, f, f, f, f>("xValues", "yValues", "exl", "exh", "eyl", "eyh");
2309 /// ~~~
2310 ///
2311 /// `GraphAssymErrors` should also be used for the cases in which values associated only with
2312 /// one of the axes have associated errors. For example, only `ey` exist and `ex` are equal to zero.
2313 /// In such cases, user should do the following:
2314 /// ~~~{.cpp}
2315 /// // Create a column of zeros in RDataFrame
2316 /// auto rdf_withzeros = rdf.Define("zero", "0");
2317 /// // or alternatively:
2318 /// auto rdf_withzeros = rdf.Define("zero", []() -> double { return 0.;});
2319 /// // Create the graph with y errors only
2320 /// auto rdf_errorsOnYOnly = rdf_withzeros.GraphAsymmErrors("xValues", "yValues", "zero", "zero", "eyl", "eyh");
2321 /// ~~~
2322 ///
2323 /// \note Differently from other ROOT interfaces, the returned TGraphAsymmErrors is not associated to gDirectory
2324 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2325 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2326 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType,
2327 typename EXL = RDFDetail::RInferredType, typename EXH = RDFDetail::RInferredType,
2328 typename EYL = RDFDetail::RInferredType, typename EYH = RDFDetail::RInferredType>
2329 RResultPtr<::TGraphAsymmErrors>
2330 GraphAsymmErrors(std::string_view x = "", std::string_view y = "", std::string_view exl = "",
2331 std::string_view exh = "", std::string_view eyl = "", std::string_view eyh = "")
2332 {
2333 auto graph = std::make_shared<::TGraphAsymmErrors>();
2334 const std::vector<std::string_view> columnViews = {x, y, exl, exh, eyl, eyh};
2335 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2336 ? ColumnNames_t()
2337 : ColumnNames_t(columnViews.begin(), columnViews.end());
2338
2339 const auto validatedColumns = GetValidatedColumnNames(6, userColumns);
2340
2341 // We build a default name and title based on the input columns
2342 const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0];
2343 const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0];
2344 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
2345 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
2346 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
2347
2348 return CreateAction<RDFInternal::ActionTags::GraphAsymmErrors, X, Y, EXL, EXH, EYL, EYH>(validatedColumns, graph,
2349 graph, fProxiedPtr);
2350 }
2351
2352 ////////////////////////////////////////////////////////////////////////////
2353 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2354 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2355 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2356 /// \param[in] model The model to be considered to build the new return value.
2357 /// \param[in] v1Name The name of the column that will fill the x axis.
2358 /// \param[in] v2Name The name of the column that will fill the y axis.
2359 /// \return the monodimensional profile wrapped in a RResultPtr.
2360 ///
2361 /// This action is *lazy*: upon invocation of this method the calculation is
2362 /// booked but not executed. Also see RResultPtr.
2363 ///
2364 /// ### Example usage:
2365 /// ~~~{.cpp}
2366 /// // Deduce column types (this invocation needs jitting internally)
2367 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2368 /// // Explicit column types
2369 /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2370 /// ~~~
2371 ///
2372 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2373 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2374 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2375 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
2376 RResultPtr<::TProfile>
2377 Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
2378 {
2379 std::shared_ptr<::TProfile> h(nullptr);
2380 {
2381 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2382 h = model.GetProfile();
2383 }
2384
2385 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2386 throw std::runtime_error("Profiles with no axes limits are not supported yet.");
2387 }
2388 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2389 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2390 ? ColumnNames_t()
2391 : ColumnNames_t(columnViews.begin(), columnViews.end());
2392 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h, h, fProxiedPtr);
2393 }
2394
2395 ////////////////////////////////////////////////////////////////////////////
2396 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2397 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2398 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2399 /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
2400 /// \param[in] model The model to be considered to build the new return value.
2401 /// \param[in] v1Name The name of the column that will fill the x axis.
2402 /// \param[in] v2Name The name of the column that will fill the y axis.
2403 /// \param[in] wName The name of the column that will provide the weights.
2404 /// \return the monodimensional profile wrapped in a RResultPtr.
2405 ///
2406 /// This action is *lazy*: upon invocation of this method the calculation is
2407 /// booked but not executed. Also see RResultPtr.
2408 ///
2409 /// ### Example usage:
2410 /// ~~~{.cpp}
2411 /// // Deduce column types (this invocation needs jitting internally)
2412 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
2413 /// // Explicit column types
2414 /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
2415 /// "xValues", "yValues", "weight");
2416 /// ~~~
2417 ///
2418 /// See the first Profile1D() overload for more details.
2419 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2420 typename W = RDFDetail::RInferredType>
2421 RResultPtr<::TProfile>
2422 Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
2423 {
2424 std::shared_ptr<::TProfile> h(nullptr);
2425 {
2426 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2427 h = model.GetProfile();
2428 }
2429
2430 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2431 throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
2432 }
2433 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2434 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2435 ? ColumnNames_t()
2436 : ColumnNames_t(columnViews.begin(), columnViews.end());
2437 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h, h, fProxiedPtr);
2438 }
2439
2440 ////////////////////////////////////////////////////////////////////////////
2441 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2442 /// See the first Profile1D() overload for more details.
2443 template <typename V1, typename V2, typename W>
2444 RResultPtr<::TProfile> Profile1D(const TProfile1DModel &model)
2445 {
2446 return Profile1D<V1, V2, W>(model, "", "", "");
2447 }
2448
2449 ////////////////////////////////////////////////////////////////////////////
2450 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2451 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2452 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2453 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2454 /// \param[in] model The returned profile will be constructed using this as a model.
2455 /// \param[in] v1Name The name of the column that will fill the x axis.
2456 /// \param[in] v2Name The name of the column that will fill the y axis.
2457 /// \param[in] v3Name The name of the column that will fill the z axis.
2458 /// \return the bidimensional profile wrapped in a RResultPtr.
2459 ///
2460 /// This action is *lazy*: upon invocation of this method the calculation is
2461 /// booked but not executed. Also see RResultPtr.
2462 ///
2463 /// ### Example usage:
2464 /// ~~~{.cpp}
2465 /// // Deduce column types (this invocation needs jitting internally)
2466 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2467 /// "xValues", "yValues", "zValues");
2468 /// // Explicit column types
2469 /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2470 /// "xValues", "yValues", "zValues");
2471 /// ~~~
2472 ///
2473 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2474 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2475 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2476 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2477 typename V3 = RDFDetail::RInferredType>
2478 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name = "",
2479 std::string_view v2Name = "", std::string_view v3Name = "")
2480 {
2481 std::shared_ptr<::TProfile2D> h(nullptr);
2482 {
2483 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2484 h = model.GetProfile();
2485 }
2486
2487 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2488 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2489 }
2490 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2491 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2492 ? ColumnNames_t()
2493 : ColumnNames_t(columnViews.begin(), columnViews.end());
2494 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h, h, fProxiedPtr);
2495 }
2496
2497 ////////////////////////////////////////////////////////////////////////////
2498 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2499 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2500 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2501 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2502 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
2503 /// \param[in] model The returned histogram will be constructed using this as a model.
2504 /// \param[in] v1Name The name of the column that will fill the x axis.
2505 /// \param[in] v2Name The name of the column that will fill the y axis.
2506 /// \param[in] v3Name The name of the column that will fill the z axis.
2507 /// \param[in] wName The name of the column that will provide the weights.
2508 /// \return the bidimensional profile wrapped in a RResultPtr.
2509 ///
2510 /// This action is *lazy*: upon invocation of this method the calculation is
2511 /// booked but not executed. Also see RResultPtr.
2512 ///
2513 /// ### Example usage:
2514 /// ~~~{.cpp}
2515 /// // Deduce column types (this invocation needs jitting internally)
2516 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2517 /// "xValues", "yValues", "zValues", "weight");
2518 /// // Explicit column types
2519 /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2520 /// "xValues", "yValues", "zValues", "weight");
2521 /// ~~~
2522 ///
2523 /// See the first Profile2D() overload for more details.
2524 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2525 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2526 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name,
2527 std::string_view v3Name, std::string_view wName)
2528 {
2529 std::shared_ptr<::TProfile2D> h(nullptr);
2530 {
2531 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2532 h = model.GetProfile();
2533 }
2534
2535 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2536 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2537 }
2538 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2539 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2540 ? ColumnNames_t()
2541 : ColumnNames_t(columnViews.begin(), columnViews.end());
2542 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr);
2543 }
2544
2545 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2546 /// See the first Profile2D() overload for more details.
2547 template <typename V1, typename V2, typename V3, typename W>
2548 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model)
2549 {
2550 return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
2551 }
2552
2553 ////////////////////////////////////////////////////////////////////////////
2554 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*).
2555 ///
2556 /// Type T must provide at least:
2557 /// - a copy-constructor
2558 /// - a `Fill` method that accepts as many arguments and with same types as the column names passed as columnList
2559 /// (these types can also be passed as template parameters to this method)
2560 /// - a `Merge` method with signature `Merge(TCollection *)` or `Merge(const std::vector<T *>&)` that merges the
2561 /// objects passed as argument into the object on which `Merge` was called (an analogous of TH1::Merge). Note that
2562 /// if the signature that takes a `TCollection*` is used, then T must inherit from TObject (to allow insertion in
2563 /// the TCollection*).
2564 ///
2565 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present.
2566 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
2567 /// \tparam T The type of the object to fill. Automatically deduced.
2568 /// \param[in] model The model to be considered to build the new return value.
2569 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2570 /// \return the filled object wrapped in a RResultPtr.
2571 ///
2572 /// The user gives up ownership of the model object.
2573 /// The list of column names to be used for filling must always be specified.
2574 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
2575 /// Also see RResultPtr.
2576 ///
2577 /// ### Example usage:
2578 /// ~~~{.cpp}
2579 /// MyClass obj;
2580 /// // Deduce column types (this invocation needs jitting internally, and in this case
2581 /// // MyClass needs to be known to the interpreter)
2582 /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
2583 /// // explicit column types
2584 /// auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"});
2585 /// ~~~
2586 ///
2587 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T>
2588 RResultPtr<std::decay_t<T>> Fill(T &&model, const ColumnNames_t &columnList)
2589 {
2590 auto h = std::make_shared<std::decay_t<T>>(std::forward<T>(model));
2591 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
2592 throw std::runtime_error("The absence of axes limits is not supported yet.");
2593 }
2594 return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h, h, fProxiedPtr,
2595 columnList.size());
2596 }
2597
2598 ////////////////////////////////////////////////////////////////////////////
2599 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2600 ///
2601 /// \tparam V The type of the value column
2602 /// \param[in] value The name of the column with the values to fill the statistics with.
2603 /// \return the filled TStatistic object wrapped in a RResultPtr.
2604 ///
2605 /// ### Example usage:
2606 /// ~~~{.cpp}
2607 /// // Deduce column type (this invocation needs jitting internally)
2608 /// auto stats0 = myDf.Stats("values");
2609 /// // Explicit column type
2610 /// auto stats1 = myDf.Stats<float>("values");
2611 /// ~~~
2612 ///
2613 template <typename V = RDFDetail::RInferredType>
2614 RResultPtr<TStatistic> Stats(std::string_view value = "")
2615 {
2616 ColumnNames_t columns;
2617 if (!value.empty()) {
2618 columns.emplace_back(std::string(value));
2619 }
2620 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2621 if (std::is_same<V, RDFDetail::RInferredType>::value) {
2622 return Fill(TStatistic(), validColumnNames);
2623 } else {
2624 return Fill<V>(TStatistic(), validColumnNames);
2625 }
2626 }
2627
2628 ////////////////////////////////////////////////////////////////////////////
2629 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2630 ///
2631 /// \tparam V The type of the value column
2632 /// \tparam W The type of the weight column
2633 /// \param[in] value The name of the column with the values to fill the statistics with.
2634 /// \param[in] weight The name of the column with the weights to fill the statistics with.
2635 /// \return the filled TStatistic object wrapped in a RResultPtr.
2636 ///
2637 /// ### Example usage:
2638 /// ~~~{.cpp}
2639 /// // Deduce column types (this invocation needs jitting internally)
2640 /// auto stats0 = myDf.Stats("values", "weights");
2641 /// // Explicit column types
2642 /// auto stats1 = myDf.Stats<int, float>("values", "weights");
2643 /// ~~~
2644 ///
2645 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2646 RResultPtr<TStatistic> Stats(std::string_view value, std::string_view weight)
2647 {
2648 ColumnNames_t columns{std::string(value), std::string(weight)};
2649 constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
2650 constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
2651 const auto validColumnNames = GetValidatedColumnNames(2, columns);
2652 // We have 3 cases:
2653 // 1. Both types are inferred: we use Fill and let the jit kick in.
2654 // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
2655 // 3. Both types are explicit: we invoke the fully compiled Fill method.
2656 if (vIsInferred && wIsInferred) {
2657 return Fill(TStatistic(), validColumnNames);
2658 } else if (vIsInferred != wIsInferred) {
2659 std::string error("The ");
2660 error += vIsInferred ? "value " : "weight ";
2661 error += "column type is explicit, while the ";
2662 error += vIsInferred ? "weight " : "value ";
2663 error += " is specified to be inferred. This case is not supported: please specify both types or none.";
2664 throw std::runtime_error(error);
2665 } else {
2666 return Fill<V, W>(TStatistic(), validColumnNames);
2667 }
2668 }
2669
2670 ////////////////////////////////////////////////////////////////////////////
2671 /// \brief Return the minimum of processed column values (*lazy action*).
2672 /// \tparam T The type of the branch/column.
2673 /// \param[in] columnName The name of the branch/column to be treated.
2674 /// \return the minimum value of the selected column wrapped in a RResultPtr.
2675 ///
2676 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2677 /// template specialization of this method.
2678 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2679 ///
2680 /// This action is *lazy*: upon invocation of this method the calculation is
2681 /// booked but not executed. Also see RResultPtr.
2682 ///
2683 /// ### Example usage:
2684 /// ~~~{.cpp}
2685 /// // Deduce column type (this invocation needs jitting internally)
2686 /// auto minVal0 = myDf.Min("values");
2687 /// // Explicit column type
2688 /// auto minVal1 = myDf.Min<double>("values");
2689 /// ~~~
2690 ///
2691 template <typename T = RDFDetail::RInferredType>
2692 RResultPtr<RDFDetail::MinReturnType_t<T>> Min(std::string_view columnName = "")
2693 {
2694 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2695 using RetType_t = RDFDetail::MinReturnType_t<T>;
2696 auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
2697 return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV, minV, fProxiedPtr);
2698 }
2699
2700 ////////////////////////////////////////////////////////////////////////////
2701 /// \brief Return the maximum of processed column values (*lazy action*).
2702 /// \tparam T The type of the branch/column.
2703 /// \param[in] columnName The name of the branch/column to be treated.
2704 /// \return the maximum value of the selected column wrapped in a RResultPtr.
2705 ///
2706 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2707 /// template specialization of this method.
2708 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2709 ///
2710 /// This action is *lazy*: upon invocation of this method the calculation is
2711 /// booked but not executed. Also see RResultPtr.
2712 ///
2713 /// ### Example usage:
2714 /// ~~~{.cpp}
2715 /// // Deduce column type (this invocation needs jitting internally)
2716 /// auto maxVal0 = myDf.Max("values");
2717 /// // Explicit column type
2718 /// auto maxVal1 = myDf.Max<double>("values");
2719 /// ~~~
2720 ///
2721 template <typename T = RDFDetail::RInferredType>
2722 RResultPtr<RDFDetail::MaxReturnType_t<T>> Max(std::string_view columnName = "")
2723 {
2724 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2725 using RetType_t = RDFDetail::MaxReturnType_t<T>;
2726 auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
2727 return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV, maxV, fProxiedPtr);
2728 }
2729
2730 ////////////////////////////////////////////////////////////////////////////
2731 /// \brief Return the mean of processed column values (*lazy action*).
2732 /// \tparam T The type of the branch/column.
2733 /// \param[in] columnName The name of the branch/column to be treated.
2734 /// \return the mean value of the selected column wrapped in a RResultPtr.
2735 ///
2736 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2737 /// template specialization of this method.
2738 ///
2739 /// This action is *lazy*: upon invocation of this method the calculation is
2740 /// booked but not executed. Also see RResultPtr.
2741 ///
2742 /// ### Example usage:
2743 /// ~~~{.cpp}
2744 /// // Deduce column type (this invocation needs jitting internally)
2745 /// auto meanVal0 = myDf.Mean("values");
2746 /// // Explicit column type
2747 /// auto meanVal1 = myDf.Mean<double>("values");
2748 /// ~~~
2749 ///
2750 template <typename T = RDFDetail::RInferredType>
2751 RResultPtr<double> Mean(std::string_view columnName = "")
2752 {
2753 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2754 auto meanV = std::make_shared<double>(0);
2755 return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV, meanV, fProxiedPtr);
2756 }
2757
2758 ////////////////////////////////////////////////////////////////////////////
2759 /// \brief Return the unbiased standard deviation of processed column values (*lazy action*).
2760 /// \tparam T The type of the branch/column.
2761 /// \param[in] columnName The name of the branch/column to be treated.
2762 /// \return the standard deviation value of the selected column wrapped in a RResultPtr.
2763 ///
2764 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2765 /// template specialization of this method.
2766 ///
2767 /// This action is *lazy*: upon invocation of this method the calculation is
2768 /// booked but not executed. Also see RResultPtr.
2769 ///
2770 /// ### Example usage:
2771 /// ~~~{.cpp}
2772 /// // Deduce column type (this invocation needs jitting internally)
2773 /// auto stdDev0 = myDf.StdDev("values");
2774 /// // Explicit column type
2775 /// auto stdDev1 = myDf.StdDev<double>("values");
2776 /// ~~~
2777 ///
2778 template <typename T = RDFDetail::RInferredType>
2779 RResultPtr<double> StdDev(std::string_view columnName = "")
2780 {
2781 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2782 auto stdDeviationV = std::make_shared<double>(0);
2783 return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV, stdDeviationV, fProxiedPtr);
2784 }
2785
2786 // clang-format off
2787 ////////////////////////////////////////////////////////////////////////////
2788 /// \brief Return the sum of processed column values (*lazy action*).
2789 /// \tparam T The type of the branch/column.
2790 /// \param[in] columnName The name of the branch/column.
2791 /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
2792 /// \return the sum of the selected column wrapped in a RResultPtr.
2793 ///
2794 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2795 /// template specialization of this method.
2796 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2797 ///
2798 /// This action is *lazy*: upon invocation of this method the calculation is
2799 /// booked but not executed. Also see RResultPtr.
2800 ///
2801 /// ### Example usage:
2802 /// ~~~{.cpp}
2803 /// // Deduce column type (this invocation needs jitting internally)
2804 /// auto sum0 = myDf.Sum("values");
2805 /// // Explicit column type
2806 /// auto sum1 = myDf.Sum<double>("values");
2807 /// ~~~
2808 ///
2809 template <typename T = RDFDetail::RInferredType>
2810 RResultPtr<RDFDetail::SumReturnType_t<T>>
2811 Sum(std::string_view columnName = "",
2812 const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
2813 {
2814 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2815 auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
2816 return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV, sumV, fProxiedPtr);
2817 }
2818 // clang-format on
2819
2820 ////////////////////////////////////////////////////////////////////////////
2821 /// \brief Gather filtering statistics.
2822 /// \return the resulting `RCutFlowReport` instance wrapped in a RResultPtr.
2823 ///
2824 /// Calling `Report` on the main `RDataFrame` object gathers stats for
2825 /// all named filters in the call graph. Calling this method on a
2826 /// stored chain state (i.e. a graph node different from the first) gathers
2827 /// the stats for all named filters in the chain section between the original
2828 /// `RDataFrame` and that node (included). Stats are gathered in the same
2829 /// order as the named filters have been added to the graph.
2830 /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
2831 /// effects cuts had.
2832 ///
2833 /// This action is *lazy*: upon invocation of
2834 /// this method the calculation is booked but not executed. See RResultPtr
2835 /// documentation.
2836 ///
2837 /// ### Example usage:
2838 /// ~~~{.cpp}
2839 /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
2840 /// auto cutReport = filtered3.Report();
2841 /// cutReport->Print();
2842 /// ~~~
2843 ///
2844 RResultPtr<RCutFlowReport> Report()
2845 {
2846 bool returnEmptyReport = false;
2847 // if this is a RInterface<RLoopManager> on which `Define` has been called, users
2848 // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
2849 // certainly does not contain named filters.
2850 // The number 4 takes into account the implicit columns for entry and slot number
2851 // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
2852 if (std::is_same<Proxied, RLoopManager>::value && fColRegister.GenerateColumnNames().size() > 4)
2853 returnEmptyReport = true;
2854
2855 auto rep = std::make_shared<RCutFlowReport>();
2856 using Helper_t = RDFInternal::ReportHelper<Proxied>;
2857 using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
2858
2859 auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr.get(), returnEmptyReport), ColumnNames_t({}),
2860 fProxiedPtr, RDFInternal::RColumnRegister(fColRegister));
2861
2862 return MakeResultPtr(rep, *fLoopManager, std::move(action));
2863 }
2864
2865 /// \brief Returns the names of the filters created.
2866 /// \return the container of filters names.
2867 ///
2868 /// If called on a root node, all the filters in the computation graph will
2869 /// be printed. For any other node, only the filters upstream of that node.
2870 /// Filters without a name are printed as "Unnamed Filter"
2871 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2872 ///
2873 /// ### Example usage:
2874 /// ~~~{.cpp}
2875 /// auto filtNames = d.GetFilterNames();
2876 /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
2877 /// ~~~
2878 ///
2879 std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
2880
2881 // clang-format off
2882 ////////////////////////////////////////////////////////////////////////////
2883 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2884 /// \tparam F The type of the aggregator callable. Automatically deduced.
2885 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2886 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2887 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
2888 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2889 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2890 /// \param[in] aggIdentity The aggregator variable of each thread is initialized to this value (or is default-constructed if the parameter is omitted)
2891 /// \return the result of the aggregation wrapped in a RResultPtr.
2892 ///
2893 /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
2894 /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
2895 /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
2896 /// the value of the column columnName.
2897 /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
2898 /// Otherwise the signature of aggregator must be `void(U&,T)`.
2899 ///
2900 /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
2901 /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
2902 /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
2903 ///
2904 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2905 ///
2906 /// Example usage:
2907 /// ~~~{.cpp}
2908 /// auto aggregator = [](double acc, double x) { return acc * x; };
2909 /// ROOT::EnableImplicitMT();
2910 /// // If multithread is enabled, the aggregator function will be called by more threads
2911 /// // and will produce a vector of partial accumulators.
2912 /// // The merger function performs the final aggregation of these partial results.
2913 /// auto merger = [](std::vector<double> &accumulators) {
2914 /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2915 /// accumulators[0] *= accumulators[i];
2916 /// }
2917 /// };
2918 ///
2919 /// // The accumulator is initialized at this value by every thread.
2920 /// double initValue = 1.;
2921 ///
2922 /// // Multiplies all elements of the column "x"
2923 /// auto result = d.Aggregate(aggregator, merger, "x", initValue);
2924 /// ~~~
2925 // clang-format on
2926 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2927 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2928 typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2929 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2930 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2931 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2932 {
2933 RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2934 const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2935
2936 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2937 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
2938
2939 auto accObjPtr = std::make_shared<U>(aggIdentity);
2940 using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2941 using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
2942 auto action = std::make_unique<Action_t>(
2943 Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2944 fProxiedPtr, fColRegister);
2945 return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2946 }
2947
2948 // clang-format off
2949 ////////////////////////////////////////////////////////////////////////////
2950 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2951 /// \tparam F The type of the aggregator callable. Automatically deduced.
2952 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2953 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2954 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2955 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2956 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2957 /// \return the result of the aggregation wrapped in a RResultPtr.
2958 ///
2959 /// See previous Aggregate overload for more information.
2960 // clang-format on
2961 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2962 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2963 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2964 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2965 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2966 {
2967 static_assert(
2968 std::is_default_constructible<U>::value,
2969 "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2970 return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2971 }
2972
2973 // clang-format off
2974 ////////////////////////////////////////////////////////////////////////////
2975 /// \brief Book execution of a custom action using a user-defined helper object.
2976 /// \tparam FirstColumn The type of the first column used by this action. Inferred together with OtherColumns if not present.
2977 /// \tparam OtherColumns A list of the types of the other columns used by this action
2978 /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2979 /// \param[in] helper The Action Helper to be scheduled.
2980 /// \param[in] columns The names of the columns on which the helper acts.
2981 /// \return the result of the helper wrapped in a RResultPtr.
2982 ///
2983 /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2984 /// Helper object provided by the caller. The required interface for the helper is described below (more
2985 /// methods that the ones required can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2986 ///
2987 /// ### Mandatory interface
2988 ///
2989 /// * `Helper` must publicly inherit from `ROOT::Detail::RDF::RActionImpl<Helper>`
2990 /// * `Helper::Result_t`: public alias for the type of the result of this action helper. `Result_t` must be default-constructible.
2991 /// * `Helper(Helper &&)`: a move-constructor is required. Copy-constructors are discouraged.
2992 /// * `std::shared_ptr<Result_t> GetResultPtr() const`: return a shared_ptr to the result of this action (of type
2993 /// Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called
2994 /// _before_ Initialize(), because the RResultPtr is constructed before the event loop is started.
2995 /// * `void Initialize()`: this method is called once before starting the event-loop. Useful for setup operations.
2996 /// It must reset the state of the helper to the expected state at the beginning of the event loop: the same helper,
2997 /// or copies of it, might be used for multiple event loops (e.g. in the presence of systematic variations).
2998 /// * `void InitTask(TTreeReader *, unsigned int slot)`: each working thread shall call this method during the event
2999 /// loop, before processing a batch of entries. The pointer passed as argument, if not null, will point to the TTreeReader
3000 /// that RDataFrame has set up to read the task's batch of entries. It is passed to the helper to allow certain advanced optimizations
3001 /// it should not usually serve any purpose for the Helper. This method is often no-op for simple helpers.
3002 /// * `void Exec(unsigned int slot, ColumnTypes...columnValues)`: each working thread shall call this method
3003 /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
3004 /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
3005 /// the requested columns for the particular entry being processed.
3006 /// * `void Finalize()`: this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
3007 /// * `std::string GetActionName()`: it returns a string identifier for this type of action that RDataFrame will use in
3008 /// diagnostics, SaveGraph(), etc.
3009 ///
3010 /// ### Optional methods
3011 ///
3012 /// If these methods are implemented they enable extra functionality as per the description below.
3013 ///
3014 /// * `Result_t &PartialUpdate(unsigned int slot)`: if present, it must return the value of the partial result of this action for the given 'slot'.
3015 /// Different threads might call this method concurrently, but will do so with different 'slot' numbers.
3016 /// RDataFrame leverages this method to implement RResultPtr::OnPartialResult().
3017 /// * `ROOT::RDF::SampleCallback_t GetSampleCallback()`: if present, it must return a callable with the
3018 /// appropriate signature (see ROOT::RDF::SampleCallback_t) that will be invoked at the beginning of the processing
3019 /// of every sample, as in DefinePerSample().
3020 /// * `Helper MakeNew(void *newResult, std::string_view variation = "nominal")`: if implemented, it enables varying
3021 /// the action's result with VariationsFor(). It takes a type-erased new result that can be safely cast to a
3022 /// `std::shared_ptr<Result_t> *` (a pointer to shared pointer) and should be used as the action's output result.
3023 /// The function optionally takes the name of the current variation which could be useful in customizing its behaviour.
3024 ///
3025 /// In case Book is called without specifying column types as template arguments, corresponding typed code will be just-in-time compiled
3026 /// by RDataFrame. In that case the Helper class needs to be known to the ROOT interpreter.
3027 ///
3028 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
3029 ///
3030 /// ### Examples
3031 /// See [this tutorial](https://root.cern/doc/master/df018__customActions_8C.html) for an example implementation of an action helper.
3032 ///
3033 /// It is also possible to inspect the code used by built-in RDataFrame actions at ActionHelpers.hxx.
3034 ///
3035 // clang-format on
3036 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper>
3037 RResultPtr<typename std::decay_t<Helper>::Result_t> Book(Helper &&helper, const ColumnNames_t &columns = {})
3038 {
3039 using HelperT = std::decay_t<Helper>;
3040 // TODO add more static sanity checks on Helper
3041 using AH = RDFDetail::RActionImpl<HelperT>;
3042 static_assert(std::is_base_of<AH, HelperT>::value && std::is_convertible<HelperT *, AH *>::value,
3043 "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
3044
3045 auto hPtr = std::make_shared<HelperT>(std::forward<Helper>(helper));
3046 auto resPtr = hPtr->GetResultPtr();
3047
3048 if (std::is_same<FirstColumn, RDFDetail::RInferredType>::value && columns.empty()) {
3049 return CallCreateActionWithoutColsIfPossible<HelperT>(resPtr, hPtr, TTraits::TypeList<FirstColumn>{});
3050 } else {
3051 return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr,
3052 fProxiedPtr, columns.size());
3053 }
3054 }
3055
3056 ////////////////////////////////////////////////////////////////////////////
3057 /// \brief Provides a representation of the columns in the dataset.
3058 /// \tparam ColumnTypes variadic list of branch/column types.
3059 /// \param[in] columnList Names of the columns to be displayed.
3060 /// \param[in] nRows Number of events for each column to be displayed.
3061 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3062 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3063 ///
3064 /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
3065 /// form. RDisplay will either print on the standard output a summarized version through `RDisplay::Print()` or will
3066 /// return a complete version through `RDisplay::AsString()`.
3067 ///
3068 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see
3069 /// RResultPtr.
3070 ///
3071 /// Example usage:
3072 /// ~~~{.cpp}
3073 /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
3074 /// auto d1 = rdf.Display("");
3075 /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
3076 /// auto d2 = d.Display({"x", "y"}, 128);
3077 /// // Printing the short representations, the event loop will run
3078 /// d1->Print();
3079 /// d2->Print();
3080 /// ~~~
3081 template <typename... ColumnTypes>
3082 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
3083 {
3084 CheckIMTDisabled("Display");
3085 auto newCols = columnList;
3086 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
3087 auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
3088 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
3089 // Need to add ULong64_t type corresponding to the first column rdfentry_
3090 return CreateAction<RDFInternal::ActionTags::Display, ULong64_t, ColumnTypes...>(
3091 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr);
3092 }
3093
3094 ////////////////////////////////////////////////////////////////////////////
3095 /// \brief Provides a representation of the columns in the dataset.
3096 /// \param[in] columnList Names of the columns to be displayed.
3097 /// \param[in] nRows Number of events for each column to be displayed.
3098 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3099 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3100 ///
3101 /// This overload automatically infers the column types.
3102 /// See the previous overloads for further details.
3103 ///
3104 /// Invoked when no types are specified to Display
3105 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
3106 {
3107 CheckIMTDisabled("Display");
3108 auto newCols = columnList;
3109 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
3110 auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
3111 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
3112 return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(
3113 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr,
3114 columnList.size() + 1);
3115 }
3116
3117 ////////////////////////////////////////////////////////////////////////////
3118 /// \brief Provides a representation of the columns in the dataset.
3119 /// \param[in] columnNameRegexp A regular expression to select the columns.
3120 /// \param[in] nRows Number of events for each column to be displayed.
3121 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3122 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3123 ///
3124 /// The existing columns are matched against the regular expression. If the string provided
3125 /// is empty, all columns are selected.
3126 /// See the previous overloads for further details.
3127 RResultPtr<RDisplay>
3128 Display(std::string_view columnNameRegexp = "", size_t nRows = 5, size_t nMaxCollectionElements = 10)
3129 {
3130 const auto columnNames = GetColumnNames();
3131 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Display");
3132 return Display(selectedColumns, nRows, nMaxCollectionElements);
3133 }
3134
3135 ////////////////////////////////////////////////////////////////////////////
3136 /// \brief Provides a representation of the columns in the dataset.
3137 /// \param[in] columnList Names of the columns to be displayed.
3138 /// \param[in] nRows Number of events for each column to be displayed.
3139 /// \param[in] nMaxCollectionElements Number of maximum elements in collection.
3140 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3141 ///
3142 /// See the previous overloads for further details.
3143 RResultPtr<RDisplay>
3144 Display(std::initializer_list<std::string> columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
3145 {
3146 ColumnNames_t selectedColumns(columnList);
3147 return Display(selectedColumns, nRows, nMaxCollectionElements);
3148 }
3149
3150private:
3151 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
3152 std::enable_if_t<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>
3153 DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
3154 {
3155 if (where.compare(0, 8, "Redefine") != 0) { // not a Redefine
3156 RDFInternal::CheckValidCppVarName(name, where);
3157 RDFInternal::CheckForRedefinition(where, name, fColRegister, fLoopManager->GetBranchNames(),
3158 GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
3159 } else {
3160 RDFInternal::CheckForDefinition(where, name, fColRegister, fLoopManager->GetBranchNames(),
3161 GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
3162 RDFInternal::CheckForNoVariations(where, name, fColRegister);
3163 }
3164
3165 using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
3166 using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
3167 std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::Slot>::value, ArgTypes_t>::type;
3168 using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
3169 std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::SlotAndEntry>::value, ColTypesTmp_t>::type;
3170
3171 constexpr auto nColumns = ColTypes_t::list_size;
3172
3173 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
3174 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
3175
3176 // Declare return type to the interpreter, for future use by jitted actions
3177 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
3178 if (retTypeName.empty()) {
3179 // The type is not known to the interpreter.
3180 // We must not error out here, but if/when this column is used in jitted code
3181 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
3182 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
3183 }
3184
3185 using NewCol_t = RDFDetail::RDefine<F, DefineType>;
3186 auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames,
3187 fColRegister, *fLoopManager);
3188
3189 RDFInternal::RColumnRegister newCols(fColRegister);
3190 newCols.AddDefine(std::move(newColumn));
3191
3192 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
3193
3194 return newInterface;
3195 }
3196
3197 // This overload is chosen when the callable passed to Define or DefineSlot returns void.
3198 // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
3199 // this way compilation of `Define` has no way to continue after throwing the error.
3200 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
3201 bool IsFStringConv = std::is_convertible<F, std::string>::value,
3202 bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
3203 std::enable_if_t<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>
3204 DefineImpl(std::string_view, F, const ColumnNames_t &, const std::string &)
3205 {
3206 static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
3207 "Error in `Define`: type returned by expression is not default-constructible");
3208 return *this; // never reached
3209 }
3210
3211 template <typename... ColumnTypes>
3212 RResultPtr<RInterface<RLoopManager>> SnapshotImpl(std::string_view fullTreeName, std::string_view filename,
3213 const ColumnNames_t &columnList, const RSnapshotOptions &options)
3214 {
3215 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
3216
3217 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnListWithoutSizeColumns.size());
3218 // validCols has aliases resolved, while columnListWithoutSizeColumns still has aliases in it.
3219 const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
3220 RDFInternal::CheckForDuplicateSnapshotColumns(validCols);
3221 CheckAndFillDSColumns(validCols, TTraits::TypeList<ColumnTypes...>());
3222
3223 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
3224 const auto &treename = parsedTreePath.fTreeName;
3225 const auto &dirname = parsedTreePath.fDirName;
3226
3227 ::TDirectory::TContext ctxt;
3228
3229 RResultPtr<RInterface<RLoopManager>> resPtr;
3230
3231 if (options.fOutputFormat == ESnapshotOutputFormat::kRNTuple) {
3232 if (RDFInternal::GetDataSourceLabel(*this) == "TTreeDS") {
3233 throw std::runtime_error("Snapshotting from TTree to RNTuple is not yet supported. The current recommended "
3234 "way to convert TTrees to RNTuple is through the RNTupleImporter.");
3235 }
3236
3237 auto newRDF =
3238 std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(columnListWithoutSizeColumns));
3239
3240 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
3241 std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options,
3242 newRDF->GetLoopManager(), GetLoopManager(), true /* fToRNTuple */});
3243
3244 // The Snapshot helper will use validCols (with aliases resolved) as input columns, and
3245 // columnListWithoutSizeColumns (still with aliases in it, passed through snapHelperArgs) as output column
3246 // names.
3247 resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs,
3248 fProxiedPtr);
3249 } else {
3250 if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS" &&
3251 options.fOutputFormat == ESnapshotOutputFormat::kDefault) {
3252 Warning("Snapshot",
3253 "The default Snapshot output data format is TTree, but the input data format is RNTuple. If you "
3254 "want to Snapshot to RNTuple or suppress this warning, set the appropriate fOutputFormat option in "
3255 "RSnapshotOptions. Note that this current default behaviour might change in the future.");
3256 }
3257
3258 // We create an RLoopManager without a data source. This needs to be initialised when the output TTree dataset
3259 // has actually been created and written to TFile, i.e. at the end of the Snapshot execution.
3260 auto newRDF =
3261 std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(columnListWithoutSizeColumns));
3262
3263 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
3264 std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options,
3265 newRDF->GetLoopManager(), GetLoopManager(), false /* fToRNTuple */});
3266
3267 // The Snapshot helper will use validCols (with aliases resolved) as input columns, and
3268 // columnListWithoutSizeColumns (still with aliases in it, passed through snapHelperArgs) as output column
3269 // names.
3270 resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs,
3271 fProxiedPtr);
3272 }
3273
3274 if (!options.fLazy)
3275 *resPtr;
3276 return resPtr;
3277 }
3278
3279 ////////////////////////////////////////////////////////////////////////////
3280 /// \brief Implementation of cache.
3281 template <typename... ColTypes, std::size_t... S>
3282 RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...>)
3283 {
3284 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
3285
3286 // Check at compile time that the columns types are copy constructible
3287 constexpr bool areCopyConstructible =
3288 RDFInternal::TEvalAnd<std::is_copy_constructible<ColTypes>::value...>::value;
3289 static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
3290
3291 RDFInternal::CheckTypesAndPars(sizeof...(ColTypes), columnListWithoutSizeColumns.size());
3292
3293 auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...);
3294 auto ds = std::make_unique<RLazyDS<ColTypes...>>(
3295 std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...);
3296
3297 RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnListWithoutSizeColumns));
3298
3299 return cachedRDF;
3300 }
3301
3302 template <bool IsSingleColumn, typename F>
3303 RInterface<Proxied, DS_t>
3304 VaryImpl(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
3305 const std::vector<std::string> &variationTags, std::string_view variationName)
3306 {
3307 using F_t = std::decay_t<F>;
3308 using ColTypes_t = typename TTraits::CallableTraits<F_t>::arg_types;
3309 using RetType = typename TTraits::CallableTraits<F_t>::ret_type;
3310 constexpr auto nColumns = ColTypes_t::list_size;
3311
3312 SanityChecksForVary<RetType>(colNames, variationTags, variationName);
3313
3314 const auto validColumnNames = GetValidatedColumnNames(nColumns, inputColumns);
3315 CheckAndFillDSColumns(validColumnNames, ColTypes_t{});
3316
3317 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
3318 if (retTypeName.empty()) {
3319 // The type is not known to the interpreter, but we don't want to error out
3320 // here, rather if/when this column is used in jitted code, so we inject a broken but telling type name.
3321 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
3322 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
3323 }
3324
3325 auto variation = std::make_shared<RDFInternal::RVariation<F_t, IsSingleColumn>>(
3326 colNames, variationName, std::forward<F>(expression), variationTags, retTypeName, fColRegister, *fLoopManager,
3327 validColumnNames);
3328
3329 RDFInternal::RColumnRegister newCols(fColRegister);
3330 newCols.AddVariation(std::move(variation));
3331
3332 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
3333
3334 return newInterface;
3335 }
3336
3337 RInterface<Proxied, DS_t> JittedVaryImpl(const std::vector<std::string> &colNames, std::string_view expression,
3338 const std::vector<std::string> &variationTags,
3339 std::string_view variationName, bool isSingleColumn)
3340 {
3341 R__ASSERT(!variationTags.empty() && "Must have at least one variation.")do { if (__builtin_expect(!!(!(!variationTags.empty() &&
"Must have at least one variation.")), 0)) ::Fatal("", kAssertMsg
, "!variationTags.empty() && \"Must have at least one variation.\""
, 3341, "/cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/root/ROOT/RDF/RInterface.hxx"
); } while (false)
;
3342 R__ASSERT(!colNames.empty() && "Must have at least one varied column.")do { if (__builtin_expect(!!(!(!colNames.empty() && "Must have at least one varied column."
)), 0)) ::Fatal("", kAssertMsg, "!colNames.empty() && \"Must have at least one varied column.\""
, 3342, "/cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/root/ROOT/RDF/RInterface.hxx"
); } while (false)
;
3343 R__ASSERT(!variationName.empty() && "Must provide a variation name.")do { if (__builtin_expect(!!(!(!variationName.empty() &&
"Must provide a variation name.")), 0)) ::Fatal("", kAssertMsg
, "!variationName.empty() && \"Must provide a variation name.\""
, 3343, "/cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/root/ROOT/RDF/RInterface.hxx"
); } while (false)
;
3344
3345 for (auto &colName : colNames) {
3346 RDFInternal::CheckValidCppVarName(colName, "Vary");
3347 RDFInternal::CheckForDefinition("Vary", colName, fColRegister, fLoopManager->GetBranchNames(),
3348 GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
3349 }
3350 RDFInternal::CheckValidCppVarName(variationName, "Vary");
3351
3352 // when varying multiple columns, they must be different columns
3353 if (colNames.size() > 1) {
3354 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
3355 if (uniqueCols.size() != colNames.size())
3356 throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
3357 }
3358
3359 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
3360 auto jittedVariation = RDFInternal::BookVariationJit(
3361 colNames, variationName, variationTags, expression, *fLoopManager, GetDataSource(), fColRegister,
3362 fLoopManager->GetBranchNames(), upcastNodeOnHeap, isSingleColumn);
3363
3364 RDFInternal::RColumnRegister newColRegister(fColRegister);
3365 newColRegister.AddVariation(std::move(jittedVariation));
3366
3367 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newColRegister));
3368
3369 return newInterface;
3370 }
3371
3372 template <typename Helper, typename ActionResultType>
3373 auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &resPtr,
3374 const std::shared_ptr<Helper> &hPtr,
3375 TTraits::TypeList<RDFDetail::RInferredType>)
3376 -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{})
3377 {
3378 return CreateAction<RDFInternal::ActionTags::Book>(/*columns=*/{}, resPtr, hPtr, fProxiedPtr, 0u);
3379 }
3380
3381 template <typename Helper, typename ActionResultType, typename... Others>
3382 RResultPtr<ActionResultType>
3383 CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &,
3384 const std::shared_ptr<Helper>& /*hPtr*/,
3385 Others...)
3386 {
3387 throw std::logic_error(std::string("An action was booked with no input columns, but the action requires "
3388 "columns! The action helper type was ") +
3389 typeid(Helper).name());
3390 return {};
3391 }
3392
3393protected:
3394 RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm,
3395 const RDFInternal::RColumnRegister &colRegister)
3396 : RInterfaceBase(lm, colRegister), fProxiedPtr(proxied)
3397 {
3398 }
3399
3400 const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
3401};
3402
3403} // namespace RDF
3404
3405} // namespace ROOT
3406
3407#endif // ROOT_RDF_INTERFACE

/cvmfs/belle.cern.ch/el9/externals/v02-04-00/include/root/ROOT/RDF/InterfaceUtils.hxx

1// Author: Enrico Guiraud, Danilo Piparo CERN 02/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_TINTERFACE_UTILS
12#define ROOT_RDF_TINTERFACE_UTILS
13
14#include "RColumnRegister.hxx"
15#include <ROOT/RDF/RAction.hxx>
16#include <ROOT/RDF/ActionHelpers.hxx> // for BuildAction
17#include <ROOT/RDF/RColumnRegister.hxx>
18#include <ROOT/RDF/RDefine.hxx>
19#include <ROOT/RDF/RDefinePerSample.hxx>
20#include <ROOT/RDF/RFilter.hxx>
21#include <ROOT/RDF/Utils.hxx>
22#include <ROOT/RDF/RJittedAction.hxx>
23#include <ROOT/RDF/RJittedDefine.hxx>
24#include <ROOT/RDF/RJittedFilter.hxx>
25#include <ROOT/RDF/RJittedVariation.hxx>
26#include <ROOT/RDF/RLoopManager.hxx>
27#include <string_view>
28#include <ROOT/RDF/RVariation.hxx>
29#include <ROOT/TypeTraits.hxx>
30#include <TError.h> // gErrorIgnoreLevel
31#include <TH1.h>
32#include <TROOT.h> // IsImplicitMTEnabled
33
34#include <deque>
35#include <functional>
36#include <map>
37#include <memory>
38#include <string>
39#include <type_traits>
40#include <typeinfo>
41#include <vector>
42#include <unordered_map>
43
44class TObjArray;
45class TTree;
46namespace ROOT {
47namespace Detail {
48namespace RDF {
49class RNodeBase;
50}
51}
52namespace RDF {
53template <typename T>
54class RResultPtr;
55template<typename T, typename V>
56class RInterface;
57using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
58class RDataSource;
59} // namespace RDF
60
61} // namespace ROOT
62
63/// \cond HIDDEN_SYMBOLS
64
65namespace ROOT {
66namespace Internal {
67namespace RDF {
68using namespace ROOT::Detail::RDF;
69using namespace ROOT::RDF;
70namespace TTraits = ROOT::TypeTraits;
71
72std::string DemangleTypeIdName(const std::type_info &typeInfo);
73
74ColumnNames_t
75ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName);
76
77/// An helper object that sets and resets gErrorIgnoreLevel via RAII.
78class RIgnoreErrorLevelRAII {
79private:
80 int fCurIgnoreErrorLevel = gErrorIgnoreLevel;
81
82public:
83 RIgnoreErrorLevelRAII(int errorIgnoreLevel) { gErrorIgnoreLevel = errorIgnoreLevel; }
84 ~RIgnoreErrorLevelRAII() { gErrorIgnoreLevel = fCurIgnoreErrorLevel; }
85};
86
87/****** BuildAction overloads *******/
88
89// clang-format off
90/// This namespace defines types to be used for tag dispatching in RInterface.
91namespace ActionTags {
92struct Histo1D{};
93struct Histo2D{};
94struct Histo3D{};
95struct HistoND{};
96struct Graph{};
97struct GraphAsymmErrors{};
98struct Profile1D{};
99struct Profile2D{};
100struct Min{};
101struct Max{};
102struct Sum{};
103struct Mean{};
104struct Fill{};
105struct StdDev{};
106struct Display{};
107struct Snapshot{};
108struct Book{};
109}
110// clang-format on
111
112template <typename T, bool ISV6HISTO = std::is_base_of<TH1, std::decay_t<T>>::value>
113struct HistoUtils {
114 static void SetCanExtendAllAxes(T &h) { h.SetCanExtend(::TH1::kAllAxes); }
115 static bool HasAxisLimits(T &h)
116 {
117 auto xaxis = h.GetXaxis();
118 return !(xaxis->GetXmin() == 0. && xaxis->GetXmax() == 0.);
119 }
120};
121
122template <typename T>
123struct HistoUtils<T, false> {
124 static void SetCanExtendAllAxes(T &) {}
125 static bool HasAxisLimits(T &) { return true; }
126};
127
128// Generic filling (covers Histo2D, Histo3D, HistoND, Profile1D and Profile2D actions, with and without weights)
129template <typename... ColTypes, typename ActionTag, typename ActionResultType, typename PrevNodeType>
130std::unique_ptr<RActionBase>
131BuildAction(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &h, const unsigned int nSlots,
132 std::shared_ptr<PrevNodeType> prevNode, ActionTag, const RColumnRegister &colRegister)
133{
134 using Helper_t = FillHelper<ActionResultType>;
135 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
136 return std::make_unique<Action_t>(Helper_t(h, nSlots), bl, std::move(prevNode), colRegister);
137}
138
139// Histo1D filling (must handle the special case of distinguishing FillHelper and BufferedFillHelper
140template <typename... ColTypes, typename PrevNodeType>
141std::unique_ptr<RActionBase>
142BuildAction(const ColumnNames_t &bl, const std::shared_ptr<::TH1D> &h, const unsigned int nSlots,
143 std::shared_ptr<PrevNodeType> prevNode, ActionTags::Histo1D, const RColumnRegister &colRegister)
144{
145 auto hasAxisLimits = HistoUtils<::TH1D>::HasAxisLimits(*h);
146
147 if (hasAxisLimits || !IsImplicitMTEnabled()) {
148 using Helper_t = FillHelper<::TH1D>;
149 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
150 return std::make_unique<Action_t>(Helper_t(h, nSlots), bl, std::move(prevNode), colRegister);
151 } else {
152 using Helper_t = BufferedFillHelper;
153 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
154 return std::make_unique<Action_t>(Helper_t(h, nSlots), bl, std::move(prevNode), colRegister);
155 }
156}
157
158template <typename... ColTypes, typename PrevNodeType>
159std::unique_ptr<RActionBase>
160BuildAction(const ColumnNames_t &bl, const std::shared_ptr<TGraph> &g, const unsigned int nSlots,
161 std::shared_ptr<PrevNodeType> prevNode, ActionTags::Graph, const RColumnRegister &colRegister)
162{
163 using Helper_t = FillTGraphHelper;
164 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
165 return std::make_unique<Action_t>(Helper_t(g, nSlots), bl, std::move(prevNode), colRegister);
166}
167
168template <typename... ColTypes, typename PrevNodeType>
169std::unique_ptr<RActionBase>
170BuildAction(const ColumnNames_t &bl, const std::shared_ptr<TGraphAsymmErrors> &g, const unsigned int nSlots,
171 std::shared_ptr<PrevNodeType> prevNode, ActionTags::GraphAsymmErrors, const RColumnRegister &colRegister)
172{
173 using Helper_t = FillTGraphAsymmErrorsHelper;
174 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
175 return std::make_unique<Action_t>(Helper_t(g, nSlots), bl, std::move(prevNode), colRegister);
176}
177
178// Min action
179template <typename ColType, typename PrevNodeType, typename ActionResultType>
180std::unique_ptr<RActionBase>
181BuildAction(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &minV, const unsigned int nSlots,
182 std::shared_ptr<PrevNodeType> prevNode, ActionTags::Min, const RColumnRegister &colRegister)
183{
184 using Helper_t = MinHelper<ActionResultType>;
185 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColType>>;
186 return std::make_unique<Action_t>(Helper_t(minV, nSlots), bl, std::move(prevNode), colRegister);
187}
188
189// Max action
190template <typename ColType, typename PrevNodeType, typename ActionResultType>
191std::unique_ptr<RActionBase>
192BuildAction(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &maxV, const unsigned int nSlots,
193 std::shared_ptr<PrevNodeType> prevNode, ActionTags::Max, const RColumnRegister &colRegister)
194{
195 using Helper_t = MaxHelper<ActionResultType>;
196 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColType>>;
197 return std::make_unique<Action_t>(Helper_t(maxV, nSlots), bl, std::move(prevNode), colRegister);
198}
199
200// Sum action
201template <typename ColType, typename PrevNodeType, typename ActionResultType>
202std::unique_ptr<RActionBase>
203BuildAction(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &sumV, const unsigned int nSlots,
204 std::shared_ptr<PrevNodeType> prevNode, ActionTags::Sum, const RColumnRegister &colRegister)
205{
206 using Helper_t = SumHelper<ActionResultType>;
207 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColType>>;
208 return std::make_unique<Action_t>(Helper_t(sumV, nSlots), bl, std::move(prevNode), colRegister);
209}
210
211// Mean action
212template <typename ColType, typename PrevNodeType>
213std::unique_ptr<RActionBase>
214BuildAction(const ColumnNames_t &bl, const std::shared_ptr<double> &meanV, const unsigned int nSlots,
215 std::shared_ptr<PrevNodeType> prevNode, ActionTags::Mean, const RColumnRegister &colRegister)
216{
217 using Helper_t = MeanHelper;
218 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColType>>;
219 return std::make_unique<Action_t>(Helper_t(meanV, nSlots), bl, std::move(prevNode), colRegister);
220}
221
222// Standard Deviation action
223template <typename ColType, typename PrevNodeType>
224std::unique_ptr<RActionBase>
225BuildAction(const ColumnNames_t &bl, const std::shared_ptr<double> &stdDeviationV, const unsigned int nSlots,
226 std::shared_ptr<PrevNodeType> prevNode, ActionTags::StdDev, const RColumnRegister &colRegister)
227{
228 using Helper_t = StdDevHelper;
229 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColType>>;
230 return std::make_unique<Action_t>(Helper_t(stdDeviationV, nSlots), bl, prevNode, colRegister);
231}
232
233using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<ROOT::RDF::RDisplay>>;
234
235// Display action
236template <typename... ColTypes, typename PrevNodeType>
237std::unique_ptr<RActionBase>
238BuildAction(const ColumnNames_t &bl, const std::shared_ptr<displayHelperArgs_t> &helperArgs, const unsigned int,
239 std::shared_ptr<PrevNodeType> prevNode, ActionTags::Display, const RColumnRegister &colRegister)
240{
241 using Helper_t = DisplayHelper<PrevNodeType>;
242 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
243 return std::make_unique<Action_t>(Helper_t(helperArgs->first, helperArgs->second, prevNode), bl, prevNode,
244 colRegister);
245}
246
247struct SnapshotHelperArgs {
248 std::string fFileName;
249 std::string fDirName;
250 std::string fTreeName;
251 std::vector<std::string> fOutputColNames;
252 ROOT::RDF::RSnapshotOptions fOptions;
253 ROOT::Detail::RDF::RLoopManager *fOutputLoopManager;
254 ROOT::Detail::RDF::RLoopManager *fInputLoopManager;
255 bool fToNTuple;
256};
257
258// SnapshotTTree action
259template <typename... ColTypes, typename PrevNodeType>
260std::unique_ptr<RActionBase>
261BuildAction(const ColumnNames_t &colNames, const std::shared_ptr<SnapshotHelperArgs> &snapHelperArgs,
262 const unsigned int nSlots, std::shared_ptr<PrevNodeType> prevNode, ActionTags::Snapshot,
263 const RColumnRegister &colRegister)
264{
265 const auto &filename = snapHelperArgs->fFileName;
266 const auto &dirname = snapHelperArgs->fDirName;
267 const auto &treename = snapHelperArgs->fTreeName;
268 const auto &outputColNames = snapHelperArgs->fOutputColNames;
269 const auto &options = snapHelperArgs->fOptions;
270 const auto &lmPtr = snapHelperArgs->fOutputLoopManager;
271 const auto &inputLM = snapHelperArgs->fInputLoopManager;
272
273 auto sz = sizeof...(ColTypes);
274 std::vector<bool> isDefine(sz);
275 for (auto i = 0u; i < sz; ++i)
276 isDefine[i] = colRegister.IsDefineOrAlias(colNames[i]);
277
278 std::unique_ptr<RActionBase> actionPtr;
279 if (snapHelperArgs->fToNTuple) {
280 if (!ROOT::IsImplicitMTEnabled()) {
281 // single-thread snapshot
282 using Helper_t = SnapshotRNTupleHelper<ColTypes...>;
283 using Action_t = RAction<Helper_t, PrevNodeType>;
284
285 actionPtr.reset(new Action_t(
286 Helper_t(filename, dirname, treename, colNames, outputColNames, options, lmPtr, std::move(isDefine)),
287 colNames, prevNode, colRegister));
288 } else {
289 // multi-thread snapshot to RNTuple is not yet supported
290 // TODO(fdegeus) Add MT snapshotting
291 throw std::runtime_error("Snapshot: Snapshotting to RNTuple with IMT enabled is not supported yet.");
292 }
293
294 return actionPtr;
295 } else {
296 if (!ROOT::IsImplicitMTEnabled()) {
297 // single-thread snapshot
298 using Helper_t = SnapshotTTreeHelper<ColTypes...>;
299 using Action_t = RAction<Helper_t, PrevNodeType>;
300 actionPtr.reset(new Action_t(Helper_t(filename, dirname, treename, colNames, outputColNames, options,
301 std::move(isDefine), lmPtr, inputLM),
302 colNames, prevNode, colRegister));
303 } else {
304 // multi-thread snapshot
305 using Helper_t = SnapshotTTreeHelperMT<ColTypes...>;
306 using Action_t = RAction<Helper_t, PrevNodeType>;
307 actionPtr.reset(new Action_t(Helper_t(nSlots, filename, dirname, treename, colNames, outputColNames, options,
308 std::move(isDefine), lmPtr, inputLM),
309 colNames, prevNode, colRegister));
310 }
311 }
312 return actionPtr;
313}
314
315// Book with custom helper type
316template <typename... ColTypes, typename PrevNodeType, typename Helper_t>
317std::unique_ptr<RActionBase>
318BuildAction(const ColumnNames_t &bl, const std::shared_ptr<Helper_t> &h, const unsigned int /*nSlots*/,
319 std::shared_ptr<PrevNodeType> prevNode, ActionTags::Book, const RColumnRegister &colRegister)
320{
321 using Action_t = RAction<Helper_t, PrevNodeType, TTraits::TypeList<ColTypes...>>;
322 return std::make_unique<Action_t>(Helper_t(std::move(*h)), bl, std::move(prevNode), colRegister);
323}
324
325/****** end BuildAndBook ******/
326
327template <typename Filter>
328void CheckFilter(Filter &)
329{
330 using FilterRet_t = typename RDF::CallableTraits<Filter>::ret_type;
331 static_assert(std::is_convertible<FilterRet_t, bool>::value,
332 "filter expression returns a type that is not convertible to bool");
333}
334
335ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action);
336
337void CheckValidCppVarName(std::string_view var, const std::string &where);
338
339void CheckForRedefinition(const std::string &where, std::string_view definedCol, const RColumnRegister &colRegister,
340 const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns);
341
342void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister,
343 const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns);
344
345void CheckForNoVariations(const std::string &where, std::string_view definedColView,
346 const RColumnRegister &colRegister);
347
348std::string PrettyPrintAddr(const void *const addr);
349
350std::shared_ptr<RJittedFilter> BookFilterJit(std::shared_ptr<RNodeBase> *prevNodeOnHeap, std::string_view name,
351 std::string_view expression, const ColumnNames_t &branches,
352 const RColumnRegister &colRegister, TTree *tree, RDataSource *ds);
353
354std::shared_ptr<RJittedDefine> BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm,
355 RDataSource *ds, const RColumnRegister &colRegister,
356 const ColumnNames_t &branches, std::shared_ptr<RNodeBase> *prevNodeOnHeap);
357
358std::shared_ptr<RJittedDefine> BookDefinePerSampleJit(std::string_view name, std::string_view expression,
359 RLoopManager &lm, const RColumnRegister &colRegister,
360 std::shared_ptr<RNodeBase> *upcastNodeOnHeap);
361
362std::shared_ptr<RJittedVariation>
363BookVariationJit(const std::vector<std::string> &colNames, std::string_view variationName,
364 const std::vector<std::string> &variationTags, std::string_view expression, RLoopManager &lm,
365 RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches,
366 std::shared_ptr<RNodeBase> *upcastNodeOnHeap, bool isSingleColumn);
367
368std::string JitBuildAction(const ColumnNames_t &bl, std::shared_ptr<RDFDetail::RNodeBase> *prevNode,
369 const std::type_info &art, const std::type_info &at, void *rOnHeap, TTree *tree,
370 const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds,
371 std::weak_ptr<RJittedAction> *jittedActionOnHeap, const bool vector2RVec = true);
372
373// Allocate a weak_ptr on the heap, return a pointer to it. The user is responsible for deleting this weak_ptr.
374// This function is meant to be used by RInterface's methods that book code for jitting.
375// The problem it solves is that we generate code to be lazily jitted with the addresses of certain objects in them,
376// and we need to check those objects are still alive when the generated code is finally jitted and executed.
377// So we pass addresses to weak_ptrs allocated on the heap to the jitted code, which is then responsible for
378// the deletion of the weak_ptr object.
379template <typename T>
380std::weak_ptr<T> *MakeWeakOnHeap(const std::shared_ptr<T> &shPtr)
381{
382 return new std::weak_ptr<T>(shPtr);
383}
384
385// Same as MakeWeakOnHeap, but create a shared_ptr that makes sure the object is definitely kept alive.
386template <typename T>
387std::shared_ptr<T> *MakeSharedOnHeap(const std::shared_ptr<T> &shPtr)
388{
389 return new std::shared_ptr<T>(shPtr);
10
Memory is allocated
390}
391
392bool AtLeastOneEmptyString(const std::vector<std::string_view> strings);
393
394/// Take a shared_ptr<AnyNodeType> and return a shared_ptr<RNodeBase>.
395/// This works for RLoopManager nodes as well as filters and ranges.
396std::shared_ptr<RNodeBase> UpcastNode(std::shared_ptr<RNodeBase> ptr);
397
398ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns,
399 const RColumnRegister &validDefines, RDataSource *ds);
400
401std::vector<std::string> GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister,
402 TTree *tree, RDataSource *ds, const std::string &context,
403 bool vector2RVec);
404
405std::vector<bool> FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedDSCols);
406
407template <typename T>
408void AddDSColumnsHelper(const std::string &colName, RLoopManager &lm, RDataSource &ds, RColumnRegister &colRegister)
409{
410
411 if (colRegister.IsDefineOrAlias(colName))
412 return;
413
414 if (lm.HasDataSourceColumnReaders(colName, typeid(T)))
415 return;
416
417 if (!ds.HasColumn(colName) &&
418 lm.GetSuppressErrorsForMissingBranches().find(colName) == lm.GetSuppressErrorsForMissingBranches().end())
419 return;
420
421 const auto nSlots = lm.GetNSlots();
422 std::vector<std::unique_ptr<RColumnReaderBase>> colReaders;
423 colReaders.reserve(nSlots);
424
425 const auto valuePtrs = ds.GetColumnReaders<T>(colName);
426 if (!valuePtrs.empty()) { // we are using the old GetColumnReaders mechanism in this RDataSource
427 for (auto *ptr : valuePtrs)
428 colReaders.emplace_back(new RDSColumnReader<T>(ptr));
429
430 } else { // using the new GetColumnReaders mechanism
431 // TODO consider changing the interface so we return all of these for all slots in one go
432 for (auto slot = 0u; slot < lm.GetNSlots(); ++slot)
433 colReaders.emplace_back(
434 ROOT::Internal::RDF::CreateColumnReader(ds, slot, colName, typeid(T), /*treeReader*/ nullptr));
435 }
436
437 lm.AddDataSourceColumnReaders(colName, std::move(colReaders), typeid(T));
438}
439
440/// Take list of column names that must be defined, current map of custom columns, current list of defined column names,
441/// and return a new map of custom columns (with the new datasource columns added to it)
442template <typename... ColumnTypes>
443void AddDSColumns(const std::vector<std::string> &requiredCols, RLoopManager &lm, RDataSource &ds,
444 TTraits::TypeList<ColumnTypes...>, RColumnRegister &colRegister)
445{
446 // hack to expand a template parameter pack without c++17 fold expressions.
447 using expander = int[];
448 int i = 0;
449 (void)expander{(AddDSColumnsHelper<ColumnTypes>(requiredCols[i], lm, ds, colRegister), ++i)..., 0};
450}
451
452// this function is meant to be called by the jitted code generated by BookFilterJit
453template <typename F, typename PrevNode>
454void JitFilterHelper(F &&f, const char **colsPtr, std::size_t colsSize, std::string_view name,
455 std::weak_ptr<RJittedFilter> *wkJittedFilter, std::shared_ptr<PrevNode> *prevNodeOnHeap,
456 RColumnRegister *colRegister) noexcept
457{
458 if (wkJittedFilter->expired()) {
459 // The branch of the computation graph that needed this jitted code went out of scope between the type
460 // jitting was booked and the time jitting actually happened. Nothing to do other than cleaning up.
461 delete wkJittedFilter;
462 delete colRegister;
463 delete prevNodeOnHeap;
464 return;
465 }
466
467 const ColumnNames_t cols(colsPtr, colsPtr + colsSize);
468 delete[] colsPtr;
469
470 const auto jittedFilter = wkJittedFilter->lock();
471
472 // mock Filter logic -- validity checks and Define-ition of RDataSource columns
473 using Callable_t = std::decay_t<F>;
474 using F_t = RFilter<Callable_t, PrevNode>;
475 using ColTypes_t = typename TTraits::CallableTraits<Callable_t>::arg_types;
476 constexpr auto nColumns = ColTypes_t::list_size;
477 CheckFilter(f);
478
479 auto &lm = *jittedFilter->GetLoopManagerUnchecked(); // RLoopManager must exist at this time
480 auto ds = lm.GetDataSource();
481
482 if (ds != nullptr)
483 AddDSColumns(cols, lm, *ds, ColTypes_t(), *colRegister);
484
485 jittedFilter->SetFilter(
486 std::unique_ptr<RFilterBase>(new F_t(std::forward<F>(f), cols, *prevNodeOnHeap, *colRegister, name)));
487 // colRegister points to the columns structure in the heap, created before the jitted call so that the jitter can
488 // share data after it has lazily compiled the code. Here the data has been used and the memory can be freed.
489 delete colRegister;
490 delete prevNodeOnHeap;
491 delete wkJittedFilter;
492}
493
494namespace DefineTypes {
495struct RDefineTag {};
496struct RDefinePerSampleTag {};
497}
498
499template <typename F>
500auto MakeDefineNode(DefineTypes::RDefineTag, std::string_view name, std::string_view dummyType, F &&f,
501 const ColumnNames_t &cols, RColumnRegister &colRegister, RLoopManager &lm)
502{
503 return std::unique_ptr<RDefineBase>(new RDefine<std::decay_t<F>, ExtraArgsForDefine::None>(
504 name, dummyType, std::forward<F>(f), cols, colRegister, lm));
505}
506
507template <typename F>
508auto MakeDefineNode(DefineTypes::RDefinePerSampleTag, std::string_view name, std::string_view dummyType, F &&f,
509 const ColumnNames_t &, RColumnRegister &, RLoopManager &lm)
510{
511 return std::unique_ptr<RDefineBase>(
512 new RDefinePerSample<std::decay_t<F>>(name, dummyType, std::forward<F>(f), lm));
513}
514
515// Build a RDefine or a RDefinePerSample object and attach it to an existing RJittedDefine
516// This function is meant to be called by jitted code right before starting the event loop.
517// If colsPtr is null, build a RDefinePerSample (it has no input columns), otherwise a RDefine.
518template <typename RDefineTypeTag, typename F>
519void JitDefineHelper(F &&f, const char **colsPtr, std::size_t colsSize, std::string_view name, RLoopManager *lm,
520 std::weak_ptr<RJittedDefine> *wkJittedDefine, RColumnRegister *colRegister,
521 std::shared_ptr<RNodeBase> *prevNodeOnHeap) noexcept
522{
523 // a helper to delete objects allocated before jitting, so that the jitter can share data with lazily jitted code
524 auto doDeletes = [&] {
525 delete wkJittedDefine;
526 delete colRegister;
527 delete prevNodeOnHeap;
528 delete[] colsPtr;
529 };
530
531 if (wkJittedDefine->expired()) {
532 // The branch of the computation graph that needed this jitted code went out of scope between the type
533 // jitting was booked and the time jitting actually happened. Nothing to do other than cleaning up.
534 doDeletes();
535 return;
536 }
537
538 const ColumnNames_t cols(colsPtr, colsPtr + colsSize);
539
540 auto jittedDefine = wkJittedDefine->lock();
541
542 using Callable_t = std::decay_t<F>;
543 using ColTypes_t = typename TTraits::CallableTraits<Callable_t>::arg_types;
544
545 auto ds = lm->GetDataSource();
546 if (ds != nullptr && colsPtr)
547 AddDSColumns(cols, *lm, *ds, ColTypes_t(), *colRegister);
548
549 // will never actually be used (trumped by jittedDefine->GetTypeName()), but we set it to something meaningful
550 // to help devs debugging
551 const auto dummyType = "jittedCol_t";
552 // use unique_ptr<RDefineBase> instead of make_unique<NewCol_t> to reduce jit/compile-times
553 std::unique_ptr<RDefineBase> newCol{
554 MakeDefineNode(RDefineTypeTag{}, name, dummyType, std::forward<F>(f), cols, *colRegister, *lm)};
555 jittedDefine->SetDefine(std::move(newCol));
556
557 doDeletes();
558}
559
560template <bool IsSingleColumn, typename F>
561void JitVariationHelper(F &&f, const char **colsPtr, std::size_t colsSize, const char **variedCols,
562 std::size_t variedColsSize, const char **variationTags, std::size_t variationTagsSize,
563 std::string_view variationName, RLoopManager *lm,
564 std::weak_ptr<RJittedVariation> *wkJittedVariation, RColumnRegister *colRegister,
565 std::shared_ptr<RNodeBase> *prevNodeOnHeap) noexcept
566{
567 // a helper to delete objects allocated before jitting, so that the jitter can share data with lazily jitted code
568 auto doDeletes = [&] {
569 delete[] colsPtr;
570 delete[] variedCols;
571 delete[] variationTags;
572
573 delete wkJittedVariation;
574 delete colRegister;
575 delete prevNodeOnHeap;
576 };
577
578 if (wkJittedVariation->expired()) {
579 // The branch of the computation graph that needed this jitted variation went out of scope between the type
580 // jitting was booked and the time jitting actually happened. Nothing to do other than cleaning up.
581 doDeletes();
582 return;
583 }
584
585 const ColumnNames_t inputColNames(colsPtr, colsPtr + colsSize);
586 std::vector<std::string> variedColNames(variedCols, variedCols + variedColsSize);
587 std::vector<std::string> tags(variationTags, variationTags + variationTagsSize);
588
589 auto jittedVariation = wkJittedVariation->lock();
590
591 using Callable_t = std::decay_t<F>;
592 using ColTypes_t = typename TTraits::CallableTraits<Callable_t>::arg_types;
593
594 auto ds = lm->GetDataSource();
595 if (ds != nullptr)
596 AddDSColumns(inputColNames, *lm, *ds, ColTypes_t(), *colRegister);
597
598 // use unique_ptr<RDefineBase> instead of make_unique<NewCol_t> to reduce jit/compile-times
599 std::unique_ptr<RVariationBase> newVariation{new RVariation<std::decay_t<F>, IsSingleColumn>(
600 std::move(variedColNames), variationName, std::forward<F>(f), std::move(tags), jittedVariation->GetTypeName(),
601 *colRegister, *lm, inputColNames)};
602 jittedVariation->SetVariation(std::move(newVariation));
603
604 doDeletes();
605}
606
607/// Convenience function invoked by jitted code to build action nodes at runtime
608template <typename ActionTag, typename... ColTypes, typename PrevNodeType, typename HelperArgType>
609void CallBuildAction(std::shared_ptr<PrevNodeType> *prevNodeOnHeap, const char **colsPtr, std::size_t colsSize,
610 const unsigned int nSlots, std::shared_ptr<HelperArgType> *helperArgOnHeap,
611 std::weak_ptr<RJittedAction> *wkJittedActionOnHeap, RColumnRegister *colRegister) noexcept
612{
613 // a helper to delete objects allocated before jitting, so that the jitter can share data with lazily jitted code
614 auto doDeletes = [&] {
615 delete[] colsPtr;
616 delete helperArgOnHeap;
617 delete wkJittedActionOnHeap;
618 // colRegister must be deleted before prevNodeOnHeap because their dtor needs the RLoopManager to be alive
619 // and prevNodeOnHeap is what keeps it alive if the rest of the computation graph is already out of scope
620 delete colRegister;
621 delete prevNodeOnHeap;
622 };
623
624 if (wkJittedActionOnHeap->expired()) {
625 // The branch of the computation graph that needed this jitted variation went out of scope between the type
626 // jitting was booked and the time jitting actually happened. Nothing to do other than cleaning up.
627 doDeletes();
628 return;
629 }
630
631 const ColumnNames_t cols(colsPtr, colsPtr + colsSize);
632
633 auto jittedActionOnHeap = wkJittedActionOnHeap->lock();
634
635 // if we are here it means we are jitting, if we are jitting the loop manager must be alive
636 auto &prevNodePtr = *prevNodeOnHeap;
637 auto &loopManager = *prevNodePtr->GetLoopManagerUnchecked();
638 using ColTypes_t = TypeList<ColTypes...>;
639 constexpr auto nColumns = ColTypes_t::list_size;
640 auto ds = loopManager.GetDataSource();
641 if (ds != nullptr)
642 AddDSColumns(cols, loopManager, *ds, ColTypes_t(), *colRegister);
643
644 auto actionPtr = BuildAction<ColTypes...>(cols, std::move(*helperArgOnHeap), nSlots, std::move(prevNodePtr),
645 ActionTag{}, *colRegister);
646 jittedActionOnHeap->SetAction(std::move(actionPtr));
647
648 doDeletes();
649}
650
651/// The contained `type` alias is `double` if `T == RInferredType`, `U` if `T == std::container<U>`, `T` otherwise.
652template <typename T, bool Container = IsDataContainer<T>::value && !std::is_same<T, std::string>::value>
653struct RMinReturnType {
654 using type = T;
655};
656
657template <>
658struct RMinReturnType<RInferredType, false> {
659 using type = double;
660};
661
662template <typename T>
663struct RMinReturnType<T, true> {
664 using type = TTraits::TakeFirstParameter_t<T>;
665};
666
667// return wrapper around f that prepends an `unsigned int slot` parameter
668template <typename R, typename F, typename... Args>
669std::function<R(unsigned int, Args...)> AddSlotParameter(F &f, TypeList<Args...>)
670{
671 return [f](unsigned int, Args... a) mutable -> R { return f(a...); };
672}
673
674template <typename ColType, typename... Rest>
675struct RNeedJittingHelper {
676 static constexpr bool value = RNeedJittingHelper<Rest...>::value;
677};
678
679template <typename... Rest>
680struct RNeedJittingHelper<RInferredType, Rest...> {
681 static constexpr bool value = true;
682};
683
684template <typename T>
685struct RNeedJittingHelper<T> {
686 static constexpr bool value = false;
687};
688
689template <>
690struct RNeedJittingHelper<RInferredType> {
691 static constexpr bool value = true;
692};
693
694template <typename ...ColTypes>
695struct RNeedJitting {
696 static constexpr bool value = RNeedJittingHelper<ColTypes...>::value;
697};
698
699template <>
700struct RNeedJitting<> {
701 static constexpr bool value = false;
702};
703
704///////////////////////////////////////////////////////////////////////////////
705/// Check preconditions for RInterface::Aggregate:
706/// - the aggregator callable must have signature `U(U,T)` or `void(U&,T)`.
707/// - the merge callable must have signature `U(U,U)` or `void(std::vector<U>&)`
708template <typename R, typename Merge, typename U, typename T, typename decayedU = std::decay_t<U>,
709 typename mergeArgsNoDecay_t = typename CallableTraits<Merge>::arg_types_nodecay,
710 typename mergeArgs_t = typename CallableTraits<Merge>::arg_types,
711 typename mergeRet_t = typename CallableTraits<Merge>::ret_type>
712void CheckAggregate(TypeList<U, T>)
713{
714 constexpr bool isAggregatorOk =
715 (std::is_same<R, decayedU>::value) || (std::is_same<R, void>::value && std::is_lvalue_reference<U>::value);
716 static_assert(isAggregatorOk, "aggregator function must have signature `U(U,T)` or `void(U&,T)`");
717 constexpr bool isMergeOk =
718 (std::is_same<TypeList<decayedU, decayedU>, mergeArgs_t>::value && std::is_same<decayedU, mergeRet_t>::value) ||
719 (std::is_same<TypeList<std::vector<decayedU> &>, mergeArgsNoDecay_t>::value &&
720 std::is_same<void, mergeRet_t>::value);
721 static_assert(isMergeOk, "merge function must have signature `U(U,U)` or `void(std::vector<U>&)`");
722}
723
724///////////////////////////////////////////////////////////////////////////////
725/// This overload of CheckAggregate is called when the aggregator takes more than two arguments
726template <typename R, typename T>
727void CheckAggregate(T)
728{
729 static_assert(sizeof(T) == 0, "aggregator function must take exactly two arguments");
730}
731
732///////////////////////////////////////////////////////////////////////////////
733/// Check as many template parameters were passed as the number of column names, throw if this is not the case.
734void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames);
735
736/// Return local BranchNames or default BranchNames according to which one should be used
737const ColumnNames_t SelectColumns(unsigned int nArgs, const ColumnNames_t &bl, const ColumnNames_t &defBl);
738
739/// Check whether column names refer to a valid branch of a TTree or have been `Define`d. Return invalid column names.
740ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const ColumnNames_t &datasetColumns,
741 const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns);
742
743/// Returns the list of Filters defined in the whole graph
744std::vector<std::string> GetFilterNames(const std::shared_ptr<RLoopManager> &loopManager);
745
746/// Returns the list of Filters defined in the branch
747template <typename NodeType>
748std::vector<std::string> GetFilterNames(const std::shared_ptr<NodeType> &node)
749{
750 std::vector<std::string> filterNames;
751 node->AddFilterName(filterNames);
752 return filterNames;
753}
754
755struct ParsedTreePath {
756 std::string fTreeName;
757 std::string fDirName;
758};
759
760ParsedTreePath ParseTreePath(std::string_view fullTreeName);
761
762// Check if a condition is true for all types
763template <bool...>
764struct TBoolPack;
765
766template <bool... bs>
767using IsTrueForAllImpl_t = typename std::is_same<TBoolPack<bs..., true>, TBoolPack<true, bs...>>;
768
769template <bool... Conditions>
770struct TEvalAnd {
771 static constexpr bool value = IsTrueForAllImpl_t<Conditions...>::value;
772};
773
774// Check if a class is a specialisation of stl containers templates
775// clang-format off
776
777template <typename>
778struct IsList_t : std::false_type {};
779
780template <typename T>
781struct IsList_t<std::list<T>> : std::true_type {};
782
783template <typename>
784struct IsDeque_t : std::false_type {};
785
786template <typename T>
787struct IsDeque_t<std::deque<T>> : std::true_type {};
788// clang-format on
789
790void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols);
791
792template <typename T>
793struct InnerValueType {
794 using type = T; // fallback for when T is not a nested RVec
795};
796
797template <typename Elem>
798struct InnerValueType<ROOT::VecOps::RVec<ROOT::VecOps::RVec<Elem>>> {
799 using type = Elem;
800};
801
802template <typename T>
803using InnerValueType_t = typename InnerValueType<T>::type;
804
805std::pair<std::vector<std::string>, std::vector<std::string>>
806AddSizeBranches(const std::vector<std::string> &branches, ROOT::RDF::RDataSource *ds,
807 std::vector<std::string> &&colsWithoutAliases, std::vector<std::string> &&colsWithAliases);
808
809void RemoveDuplicates(ColumnNames_t &columnNames);
810void RemoveRNTupleSubFields(ColumnNames_t &columnNames);
811
812} // namespace RDF
813} // namespace Internal
814
815namespace Detail {
816namespace RDF {
817
818/// The aliased type is `double` if `T == RInferredType`, `U` if `T == container<U>`, `T` otherwise.
819template <typename T>
820using MinReturnType_t = typename RDFInternal::RMinReturnType<T>::type;
821
822template <typename T>
823using MaxReturnType_t = MinReturnType_t<T>;
824
825template <typename T>
826using SumReturnType_t = MinReturnType_t<T>;
827
828} // namespace RDF
829} // namespace Detail
830} // namespace ROOT
831
832/// \endcond
833
834#endif