Belle II Software  release-08-01-10
b2hlt_print_result.py
1 #!/usr/bin/env python3
2 
3 
10 
11 from ROOT import PyConfig
12 PyConfig.IgnoreCommandLineOptions = True # noqa
13 PyConfig.StartGuiThread = False # noqa
14 
15 import basf2
16 from argparse import ArgumentParser
17 import uproot
18 import pandas as pd
19 import numpy as np
20 
21 
22 if __name__ == "__main__":
23  parser = ArgumentParser(description="Print the results of the SoftwareTrigger decision for a certain file.")
24  parser.add_argument(
25  "input",
26  help="Input file name (where to read the events from). "
27  "If omitted, just use the already produced result by another SoftwareTriggerResultPrinter execution",
28  default="",
29  nargs="?")
30  parser.add_argument("--output", help="Output file name (will be used internally). "
31  "Defaults to trigger_results.root.",
32  default="software_trigger_results.root")
33  choices = ["list", "categorized"]
34  try:
35  from tabulate import tabulate
36  choices += ['github', 'gitlab', 'grid']
37  except ImportError:
38  pass
39 
40  parser.add_argument("--format", help="Choose the format how to print the trigger cuts. "
41  "To get access to more options please install the tabulate package using pip",
42  choices=choices, default="categorized")
43  parser.add_argument("--override-globaltags", dest="override", action="store_true", default=False,
44  help="Use this option in case the data file does not provide globaltag information. "
45  "The only case where this should occur is when analyzing raw data.")
46  parser.add_argument('--local-db-path', type=str,
47  help="set path to the local payload locations to use for the ConditionDB",
48  default=None)
49 
50  args = parser.parse_args()
51 
52  if args.input:
53  # For data, the prescales are only valid when using the online database!
54  if args.local_db_path is not None:
55  basf2.conditions.metadata_providers = ["file://" + basf2.find_file(args.local_db_path + "/metadata.sqlite")]
56  basf2.conditions.payload_locations = [basf2.find_file(args.local_db_path)]
57 
58  if args.override:
59  basf2.conditions.override_globaltags(["online"])
60 
61  path = basf2.Path()
62 
63  if args.input.endswith(".sroot"):
64  path.add_module("SeqRootInput", inputFileName=args.input)
65  else:
66  path.add_module("RootInput", inputFileName=args.input)
67  path.add_module("SoftwareTriggerResultPrinter", outputFileName=args.output)
68 
69  basf2.process(path)
70 
71  df = uproot.open(args.output)["software_trigger_results"].arrays(library="pd")
72 
73  # Make sure to cope with strings rather than bools (which is a bit strange in pandas)
74  df[["accept_or_reject", "prescaled", "cut"]] = df[["accept_or_reject", "prescaled", "cut"]].astype("str")
75 
76  # Group and order as we need it
77  df = df.set_index(["cut", "accept_or_reject", "prescaled"]).T
78  df.index = df.index.str.replace("software_trigger_cut_", "")
79  df.index = df.index.str.replace("_", " ")
80 
81  # Separate cuts and prescaling
82  df_prescales = df["False"].copy()
83  df_cuts = df["True"].copy()
84 
85  # For the prescaling, the total_events is nonsense...
86  df_prescales.loc["total events"] = np.NAN
87 
88  # Now also separate out only the accepted results
89  df_cuts = df_cuts["True"].copy()
90 
91  # Give the columns some meaningful names
92  df_cuts = df_cuts[["True", "False"]]
93  df_cuts.columns = ["Prescaled", "Non Prescaled"]
94 
95  # Make sure to print all information
96  pd.set_option("display.max_rows", 500)
97  pd.set_option("display.max_colwidth", 200)
98  pd.set_option('display.max_columns', 500)
99  pd.set_option('display.width', 1000)
100 
101  # Function used for formatting
102  def format(x, total_events):
103  if np.isnan(x):
104  return ""
105  return f"{int(x):d} ({x/total_events:7.2%})"
106 
107  # Create a new dataframe just for printing
108  df_print = pd.DataFrame(index=df_cuts.index)
109 
110  df_print["Prescaled"] = df_cuts["Prescaled"].apply(lambda x: format(x, df_cuts["Prescaled"]["total events"]))
111  df_print["Non Prescaled"] = df_cuts["Non Prescaled"].apply(lambda x: format(x, df_cuts["Non Prescaled"]["total events"]))
112  df_print["Prescales"] = df_prescales.fillna("NaN")
113  df_print = df_print[["Prescaled", "Non Prescaled", "Prescales"]]
114 
115  if args.format == "list":
116  print(df_print)
117  elif args.format == "categorized":
118  from softwaretrigger import filter_categories
119 
120  def local_print_function(title, categories):
121  empty_row = {key: "" for key in df_print.columns}
122  tmp = pd.DataFrame(columns=df_print.columns)
123  tmp = tmp.append(pd.Series(empty_row, name=title))
124  tmp = tmp.append(df_print.reindex(categories))
125  tmp = tmp.append(pd.Series(empty_row, name=""))
126 
127  return tmp
128 
129  df_sorted = pd.concat([
130  local_print_function("Overview", filter_categories.RESULTS),
131  local_print_function("ECL - Physics", filter_categories.ECL_PHYSICS),
132  local_print_function("ECL - Potentially Prescaled", filter_categories.ECL_PRESCALED),
133  local_print_function("CDC - Physics", filter_categories.CDC_PHYSICS),
134  local_print_function("CDC - Potentially Prescaled", filter_categories.CDC_PRESCALED),
135  local_print_function("Targeted Physics Lines", filter_categories.PHYSICS),
136  local_print_function("QED / Control Samples", filter_categories.QED),
137  local_print_function("Level 1 Passthrough ", filter_categories.LEVEL1),
138  local_print_function("Prescaled Vetoes", filter_categories.VETOES),
139  local_print_function("Obsolete", filter_categories.OBSOLETE),
140  local_print_function("Skims", [index for index in df_print.index if index.startswith("skim ")]),
141  ])
142 
143  remaining_columns = set(df_print.index) - set(df_sorted.index)
144  if remaining_columns:
145  df_sorted = df_sorted.append(local_print_function("Uncategorized", remaining_columns))
146 
147  print(df_sorted)
148 
149  elif args.format == "grid":
150  print(tabulate(df_print, tablefmt="grid", showindex=True, headers="keys"))
151  elif args.format in ["github", "gitlab"]:
152  print(tabulate(df_print, tablefmt="github", showindex=True, headers="keys"))