Separate Scripts for the Snakemake Workflow

Separate Scripts for the Snakemake Workflow#

For completeness, we here provide the separate analysis scripts called by the snakemake workflow. Existing scripts can be implemented with the script: directive in the snakefile with minimal adaptations. The input and output file paths, as well as the parameters for the corresponding rule can be retrieved in the scripts with snakemake.input, snakemake.output and snakemake.params.

Listing 3.11 offlineanalysis.py#
# @cond
import uproot
import matplotlib.pyplot as plt

output_file = {"Mbc": snakemake.output.mbc_plot, "deltaE": snakemake.output.deltaE_plot}
BBdata = snakemake.input.data_BB
QQdata = snakemake.input.data_QQ

treeName = 'BtoPiDtoKPiPi'
some_variables = ["Mbc", "deltaE"]
BBtuple = uproot.open(f"{BBdata}:{treeName}")
QQtuple = uproot.open(f"{QQdata}:{treeName}")

for var in some_variables:
    plt.hist([QQtuple[var].array(), BBtuple[var].array()], label=["uudd Continuum", "mixed B mesons"], stacked=True)
    plt.legend(loc='best')
    plt.xlabel(f"{var} [GeV]")
    plt.savefig(output_file[var], dpi=100)
    plt.close()
# @endcond
Listing 3.12 reconstruction.py#
# @cond
import json
import basf2 as b2
import modularAnalysis as ma
import vertex as vx

runningOnMC = snakemake.params.runningOnMC

inputjson = snakemake.input.inputfileList
outname = snakemake.output[0]

mypath = b2.create_path()
with open(inputjson) as f:
    inputMdstList = json.load(f)

ma.inputMdstList(filelist=inputMdstList, path=mypath)

ma.fillParticleList(decayString='K+:my',  cut="dr < 0.5 and abs(dz) < 3 and thetaInCDCAcceptance and kaonID > 0.01", path=mypath)
ma.fillParticleList(decayString='pi+:my', cut="dr < 0.5 and abs(dz) < 3 and thetaInCDCAcceptance", path=mypath)

ma.reconstructDecay(decayString="D-:K2Pi -> K+:my pi-:my pi-:my", cut="1.844 < M < 1.894", path=mypath)

ma.reconstructDecay(decayString='B0:PiD-toK2Pi -> D-:K2Pi pi+:my', cut='5.0 < Mbc and abs(deltaE) < 1.0', path=mypath)
vx.treeFit('B0:PiD-toK2Pi', 0, path=mypath, updateAllDaughters=False, ipConstraint=True, massConstraint=[411])

if(runningOnMC):
    ma.matchMCTruth(list_name='B0:PiD-toK2Pi', path=mypath)

some_variables = ['Mbc', 'deltaE']
ma.variablesToNtuple(decayString='B0:PiD-toK2Pi', variables=some_variables,
                     filename=outname,  path=mypath, treename='BtoPiDtoKPiPi')

b2.process(mypath)
# @endcond
Listing 3.13 batchToTxt.py#
# @cond
import json
skimfiles = json.loads(open(snakemake.input.skim_dirs, "r").read())
outputfiles = snakemake.output
BatchesPerSkim = snakemake.params.BatchesPerSkim

binwidth = int(len(skimfiles)/BatchesPerSkim)
if(binwidth < 1.):
    raise ValueError("Attempting to batching with binwidth smaller 1. Decrease the number of batches!")

batches = {}
for batch in range(BatchesPerSkim):
    if(batch == BatchesPerSkim - 1):
        batches.update({outputfiles[batch]: list(skimfiles[binwidth*batch:])})
    else:
        batches.update({outputfiles[batch]: list(skimfiles[binwidth*batch:binwidth*(batch+1)])})

for key, file_list in batches.items():
    with open(key, "w") as f:
        f.write(json.dumps(file_list))
# @endcond

When using our provided wrapper: for submitting gbasf2 jobs to the grid, the steering file does not need any specific adaptations.

Listing 3.14 skim.py#
# @cond
import basf2 as b2
import modularAnalysis as ma
import vertex as vx
from extravariables import runningOnMC, outputfile
import udst

gbasf2_dataset = []  # gbasf2 will figure this out

mypath = b2.create_path()
ma.inputMdstList(filelist=gbasf2_dataset, path=mypath, entrySequences=['0:10'])

ma.fillParticleList(decayString='K+:my',  cut="dr < 0.5 and abs(dz) < 3 and thetaInCDCAcceptance and kaonID > 0.01", path=mypath)
ma.fillParticleList(decayString='pi+:my', cut="dr < 0.5 and abs(dz) < 3 and thetaInCDCAcceptance", path=mypath)

ma.reconstructDecay(decayString="D-:K2Pi -> K+:my pi-:my pi-:my", cut="1.5 < M < 2.2", path=mypath)

ma.reconstructDecay(decayString='B0:PiD-toK2Pi -> D-:K2Pi pi+:my', cut='5.0 < Mbc and abs(deltaE) < 1.0', path=mypath)
vx.treeFit('B0:PiD-toK2Pi', 0, path=mypath, updateAllDaughters=False, ipConstraint=True, massConstraint=[411])
ma.applyCuts('B0:PiD-toK2Pi', '5.2 < Mbc and abs(deltaE) < 0.5', path=mypath)

# dump in UDST format
udst.add_skimmed_udst_output(
    mypath,
    skimDecayMode='BtoPiD',
    skimParticleLists=['B0:PiD-toK2Pi'],
    mc=runningOnMC,
    outputFile=outputfile)
b2.process(mypath)
print(b2.statistics)
# @endcond