Belle II Software development
b2hlt_print_result.py
1#!/usr/bin/env python3
2
3
10
11from ROOT import PyConfig
12PyConfig.IgnoreCommandLineOptions = True # noqa
13PyConfig.StartGuiThread = False # noqa
14
15import basf2
16from argparse import ArgumentParser
17import uproot
18import pandas as pd
19import numpy as np
20
21
22if __name__ == "__main__":
23 parser = ArgumentParser(description="Print the results of the SoftwareTrigger decision for a certain file.")
24 parser.add_argument(
25 "input",
26 help="Input file name (where to read the events from). "
27 "If omitted, just use the already produced result by another SoftwareTriggerResultPrinter execution",
28 default="",
29 nargs="?")
30 parser.add_argument("--output", help="Output file name (will be used internally). "
31 "Defaults to trigger_results.root.",
32 default="software_trigger_results.root")
33 choices = ["list", "categorized"]
34 try:
35 from tabulate import tabulate
36 choices += ['github', 'gitlab', 'grid']
37 except ImportError:
38 pass
39
40 parser.add_argument("--format", help="Choose the format how to print the trigger cuts. "
41 "To get access to more options please install the tabulate package using pip",
42 choices=choices, default="categorized")
43 parser.add_argument("--override-globaltags", dest="override", action="store_true", default=False,
44 help="Use this option in case the data file does not provide globaltag information. "
45 "The only case where this should occur is when analyzing raw data.")
46 parser.add_argument('--local-db-path', type=str,
47 help="set path to the local payload locations to use for the ConditionDB",
48 default=None)
49
50 args = parser.parse_args()
51
52 if args.input:
53 # For data, the prescales are only valid when using the online database!
54 if args.local_db_path is not None:
55 basf2.conditions.metadata_providers = ["file://" + basf2.find_file(args.local_db_path + "/metadata.sqlite")]
56 basf2.conditions.payload_locations = [basf2.find_file(args.local_db_path)]
57
58 if args.override:
59 basf2.conditions.override_globaltags(["online"])
60
61 path = basf2.Path()
62
63 if args.input.endswith(".sroot"):
64 path.add_module("SeqRootInput", inputFileName=args.input)
65 else:
66 path.add_module("RootInput", inputFileName=args.input)
67 path.add_module("SoftwareTriggerResultPrinter", outputFileName=args.output)
68
69 basf2.process(path)
70
71 df = uproot.open(args.output)["software_trigger_results"].arrays(library="pd")
72
73 # Make sure to cope with strings rather than bools (which is a bit strange in pandas)
74 df[["accept_or_reject", "prescaled", "cut"]] = df[["accept_or_reject", "prescaled", "cut"]].astype("str")
75
76 # Group and order as we need it
77 df = df.set_index(["cut", "accept_or_reject", "prescaled"]).T
78 df.index = df.index.str.replace("software_trigger_cut_", "")
79 df.index = df.index.str.replace("_", " ")
80
81 # Separate cuts and prescaling
82 df_prescales = df["False"].copy()
83 df_cuts = df["True"].copy()
84
85 # For the prescaling, the total_events is nonsense...
86 df_prescales.loc["total events"] = np.NAN
87
88 # Now also separate out only the accepted results
89 df_cuts = df_cuts["True"].copy()
90
91 # Give the columns some meaningful names
92 df_cuts = df_cuts[["True", "False"]]
93 df_cuts.columns = ["Prescaled", "Non Prescaled"]
94
95 # Make sure to print all information
96 pd.set_option("display.max_rows", 500)
97 pd.set_option("display.max_colwidth", 200)
98 pd.set_option('display.max_columns', 500)
99 pd.set_option('display.width', 1000)
100
101 # Function used for formatting
102 def format(x, total_events):
103 if np.isnan(x):
104 return ""
105 return f"{int(x):d} ({x/total_events:7.2%})"
106
107 # Create a new dataframe just for printing
108 df_print = pd.DataFrame(index=df_cuts.index)
109
110 df_print["Prescaled"] = df_cuts["Prescaled"].apply(lambda x: format(x, df_cuts["Prescaled"]["total events"]))
111 df_print["Non Prescaled"] = df_cuts["Non Prescaled"].apply(lambda x: format(x, df_cuts["Non Prescaled"]["total events"]))
112 df_print["Prescales"] = df_prescales.fillna("NaN")
113 df_print = df_print[["Prescaled", "Non Prescaled", "Prescales"]]
114
115 if args.format == "list":
116 print(df_print)
117 elif args.format == "categorized":
118 from softwaretrigger import filter_categories
119
120 def local_print_function(title, categories):
121 empty_row = {key: "" for key in df_print.columns}
122 tmp = pd.DataFrame(columns=df_print.columns)
123 tmp.loc[title] = pd.Series(empty_row)
124 tmp = pd.concat([tmp, df_print.reindex(categories)])
125 tmp.loc[""] = pd.Series(empty_row)
126
127 return tmp
128
129 df_sorted = pd.concat([
130 local_print_function("Overview", filter_categories.RESULTS),
131 local_print_function("ECL - Physics", filter_categories.ECL_PHYSICS),
132 local_print_function("ECL - Potentially Prescaled", filter_categories.ECL_PRESCALED),
133 local_print_function("CDC - Physics", filter_categories.CDC_PHYSICS),
134 local_print_function("CDC - Potentially Prescaled", filter_categories.CDC_PRESCALED),
135 local_print_function("Targeted Physics Lines", filter_categories.PHYSICS),
136 local_print_function("QED / Control Samples", filter_categories.QED),
137 local_print_function("Level 1 Passthrough ", filter_categories.LEVEL1),
138 local_print_function("Prescaled Vetoes", filter_categories.VETOES),
139 local_print_function("Obsolete", filter_categories.OBSOLETE),
140 local_print_function("Skims", [index for index in df_print.index if index.startswith("skim ")]),
141 ])
142
143 remaining_columns = set(df_print.index) - set(df_sorted.index)
144 if remaining_columns:
145 df_sorted = pd.concat([df_sorted, local_print_function("Uncategorized", remaining_columns)])
146
147 print(df_sorted)
148
149 elif args.format == "grid":
150 print(tabulate(df_print, tablefmt="grid", showindex=True, headers="keys"))
151 elif args.format in ["github", "gitlab"]:
152 print(tabulate(df_print, tablefmt="github", showindex=True, headers="keys"))