Skim using gbasf2#

Finally, let us look at the skim task whose output files make up the input mdst lists for the reconstruction.

Listing 3.5 skim.py#
 1# @cond
 2import b2luigi as luigi
 3from b2luigi.basf2_helper.tasks import Basf2PathTask
 4
 5import os
 6import json
 7import basf2 as b2
 8import modularAnalysis as ma
 9import vertex as vx
10
11
12class SkimTask(Basf2PathTask):
13    batch_system = "gbasf2"
14    gbasf2_project_name_prefix = luigi.Parameter(significant=False)
15    gbasf2_input_dataset = luigi.Parameter(hashed=True)
16#    gbasf2_release = "<release name>" #defaults to current basf2 release if not specified
17    gbasf2_print_status_updates = True
18    gbasf2_max_retries = 10
19    gbasf2_download_dataset = True
20    gbasf2_download_logs = False
21
22    runningOnMC = luigi.BoolParameter()
23
24    def output(self):
25        yield self.add_to_output("skim.udst.root")  # udst.skimmed_udst_output will add ending .udst.root if different
26
27    def create_path(self):
28        mypath = b2.create_path()
29        ma.inputMdstList(filelist=self.gbasf2_input_dataset, path=mypath, entrySequences=['0:10'])
30
31        ma.fillParticleList(
32            decayString='K+:my',
33            cut="dr < 0.5 and abs(dz) < 3 and thetaInCDCAcceptance and kaonID > 0.01",
34            path=mypath)
35        ma.fillParticleList(decayString='pi+:my', cut="dr < 0.5 and abs(dz) < 3 and thetaInCDCAcceptance", path=mypath)
36
37        ma.reconstructDecay(decayString="D-:K2Pi -> K+:my pi-:my pi-:my", cut="1.5 < M < 2.2", path=mypath)
38
39        ma.reconstructDecay(decayString='B0:PiD-toK2Pi -> D-:K2Pi pi+:my', cut='5.0 < Mbc and abs(deltaE) < 1.0', path=mypath)
40        vx.treeFit('B0:PiD-toK2Pi', 0, path=mypath, updateAllDaughters=False, ipConstraint=True, massConstraint=[411])
41        ma.applyCuts('B0:PiD-toK2Pi', '5.2 < Mbc and abs(deltaE) < 0.5', path=mypath)
42
43        import udst
44        # dump in UDST format
45        # basf2 currently does not support pickling paths with skimmed udst outputs
46        udst.add_udst_output(path=mypath, filename="skim.udst.root", particleLists=['B0:PiD-toK2Pi'], mc=self.runningOnMC)
47
48#        udst.add_skimmed_udst_output(mypath, skimDecayMode="BtoPiD", skimParticleLists=['B0:PiD-toK2Pi'], mc=self.runningOnMC,
49#                                     outputFile="skim.udst.root"  # WARNING: here do not use self.get_output_file_name
50#                                    )
51        return mypath
52
53
54class BatchesToTextFile(luigi.Task):
55    batch_system = 'local'
56    skim = luigi.Parameter(hashed=True)
57    projectName = luigi.Parameter()
58    runningOnMC = luigi.BoolParameter()
59    NumBatches = 3
60
61    def requires(self):
62        yield SkimTask(
63                runningOnMC=self.runningOnMC,
64                gbasf2_project_name_prefix=self.projectName,
65                gbasf2_input_dataset=self.skim
66            )
67
68    def get_batch_file_names(self, key="skim.udst.root"):
69        inputdir = self._transform_input(self.input(), key)[0]
70        skimfiles = [f"{inputdir}/{file}" for file in os.listdir(inputdir)]
71
72        binwidth = int(len(skimfiles)/self.NumBatches)
73
74        batches = {}
75        for batch in range(self.NumBatches):
76            if(batch == self.NumBatches - 1):
77                batches.update({f"batch{batch}.json": list(skimfiles[binwidth*batch:])})
78            else:
79                batches.update({f"batch{batch}.json": list(skimfiles[binwidth*batch:binwidth*(batch+1)])})
80        return batches
81
82    def output(self):
83        for batch in range(self.NumBatches):
84            yield self.add_to_output(f"batch{batch}.json")
85
86    def run(self):
87
88        for key, file_list in self.get_batch_file_names().items():
89            if hasattr(self, "keys") and key not in self.keys:
90                continue
91
92            with open(self.get_output_file_name(key), "w+") as f:
93                f.write(json.dumps(file_list))
94# @endcond

The BatchesToTextFile task fills text files with lists of skim output mdst paths, where NumBatches specifies the number of batches per skim. Instead of through text files, one could pass a batch directly to the reconstruction by merging the corresponding skim output files. However direct, this would require a significant amount of additional storage space.

The SkimTask task again employs b2luigi.basf2_helper.Basf2PathTask to run a minimal steering script on the specified datasets and has a number of parameters specific to gbasf2, such as batch_system = "gbasf2". b2luigi will automatically setup a proxy, reschedule failed gbasf2 jobs a maximum of gbasf2_max_retries times, download the skims and check for their completeness. Make sure that you are using the latest release of b2luigi, which may not be on the python package index.