Skim using gbasf2
Skim using gbasf2#
Finally, let us look at the skim task whose output files make up the input mdst lists for the reconstruction.
1# @cond
2import b2luigi as luigi
3from b2luigi.basf2_helper.tasks import Basf2PathTask
4
5import os
6import json
7import basf2 as b2
8import modularAnalysis as ma
9import vertex as vx
10
11
12class SkimTask(Basf2PathTask):
13 batch_system = "gbasf2"
14 gbasf2_project_name_prefix = luigi.Parameter(significant=False)
15 gbasf2_input_dataset = luigi.Parameter(hashed=True)
16# gbasf2_release = "<release name>" #defaults to current basf2 release if not specified
17 gbasf2_print_status_updates = True
18 gbasf2_max_retries = 10
19 gbasf2_download_dataset = True
20 gbasf2_download_logs = False
21
22 runningOnMC = luigi.BoolParameter()
23
24 def output(self):
25 yield self.add_to_output("skim.udst.root") # udst.skimmed_udst_output will add ending .udst.root if different
26
27 def create_path(self):
28 mypath = b2.create_path()
29 ma.inputMdstList(filelist=self.gbasf2_input_dataset, path=mypath, entrySequences=['0:10'])
30
31 ma.fillParticleList(
32 decayString='K+:my',
33 cut="dr < 0.5 and abs(dz) < 3 and thetaInCDCAcceptance and kaonID > 0.01",
34 path=mypath)
35 ma.fillParticleList(decayString='pi+:my', cut="dr < 0.5 and abs(dz) < 3 and thetaInCDCAcceptance", path=mypath)
36
37 ma.reconstructDecay(decayString="D-:K2Pi -> K+:my pi-:my pi-:my", cut="1.5 < M < 2.2", path=mypath)
38
39 ma.reconstructDecay(decayString='B0:PiD-toK2Pi -> D-:K2Pi pi+:my', cut='5.0 < Mbc and abs(deltaE) < 1.0', path=mypath)
40 vx.treeFit('B0:PiD-toK2Pi', 0, path=mypath, updateAllDaughters=False, ipConstraint=True, massConstraint=[411])
41 ma.applyCuts('B0:PiD-toK2Pi', '5.2 < Mbc and abs(deltaE) < 0.5', path=mypath)
42
43 import udst
44 # dump in UDST format
45 # basf2 currently does not support pickling paths with skimmed udst outputs
46 udst.add_udst_output(path=mypath, filename="skim.udst.root", particleLists=['B0:PiD-toK2Pi'], mc=self.runningOnMC)
47
48# udst.add_skimmed_udst_output(mypath, skimDecayMode="BtoPiD", skimParticleLists=['B0:PiD-toK2Pi'], mc=self.runningOnMC,
49# outputFile="skim.udst.root" # WARNING: here do not use self.get_output_file_name
50# )
51 return mypath
52
53
54class BatchesToTextFile(luigi.Task):
55 batch_system = 'local'
56 skim = luigi.Parameter(hashed=True)
57 projectName = luigi.Parameter()
58 runningOnMC = luigi.BoolParameter()
59 NumBatches = 3
60
61 def requires(self):
62 yield SkimTask(
63 runningOnMC=self.runningOnMC,
64 gbasf2_project_name_prefix=self.projectName,
65 gbasf2_input_dataset=self.skim
66 )
67
68 def get_batch_file_names(self, key="skim.udst.root"):
69 inputdir = self._transform_input(self.input(), key)[0]
70 skimfiles = [f"{inputdir}/{file}" for file in os.listdir(inputdir)]
71
72 binwidth = int(len(skimfiles)/self.NumBatches)
73
74 batches = {}
75 for batch in range(self.NumBatches):
76 if(batch == self.NumBatches - 1):
77 batches.update({f"batch{batch}.json": list(skimfiles[binwidth*batch:])})
78 else:
79 batches.update({f"batch{batch}.json": list(skimfiles[binwidth*batch:binwidth*(batch+1)])})
80 return batches
81
82 def output(self):
83 for batch in range(self.NumBatches):
84 yield self.add_to_output(f"batch{batch}.json")
85
86 def run(self):
87
88 for key, file_list in self.get_batch_file_names().items():
89 if hasattr(self, "keys") and key not in self.keys:
90 continue
91
92 with open(self.get_output_file_name(key), "w+") as f:
93 f.write(json.dumps(file_list))
94# @endcond
The BatchesToTextFile
task fills text files with lists of skim output mdst paths, where NumBatches
specifies the number of batches per skim. Instead of through text files, one could pass a batch directly to the reconstruction by merging the corresponding skim output files. However direct, this would require a significant amount of additional storage space.
The SkimTask
task again employs b2luigi.basf2_helper.Basf2PathTask
to run a minimal steering script on the specified datasets and has a number of parameters specific to gbasf2, such as batch_system = "gbasf2"
. b2luigi will automatically setup a proxy, reschedule failed gbasf2 jobs a maximum of gbasf2_max_retries
times, download the skims and check for their completeness. Make sure that you are using the latest release of b2luigi, which may not be on the python package index.