Belle II Software  release-06-00-14
b2hlt_print_result.py
1 #!/usr/bin/env python3
2 
3 
10 from ROOT import PyConfig
11 PyConfig.IgnoreCommandLineOptions = True # noqa
12 PyConfig.StartGuiThread = False # noqa
13 
14 import basf2
15 from argparse import ArgumentParser
16 from root_pandas import read_root
17 import pandas as pd
18 import numpy as np
19 
20 
21 if __name__ == "__main__":
22  parser = ArgumentParser(description="Print the results of the SoftwareTrigger decision for a certain file.")
23  parser.add_argument(
24  "input",
25  help="Input file name (where to read the events from). "
26  "If omitted, just use the already produced result by another SoftwareTriggerResultPrinter execution",
27  default="",
28  nargs="?")
29  parser.add_argument("--output", help="Output file name (will be used internally). "
30  "Defaults to trigger_results.root.",
31  default="software_trigger_results.root")
32  choices = ["list", "categorized"]
33  try:
34  from tabulate import tabulate
35  choices += ['jira', 'grid', 'stash']
36  except ImportError:
37  pass
38 
39  parser.add_argument("--format", help="Choose the format how to print the trigger cuts. "
40  "To get access to more options please install the tabulate package using pip",
41  choices=choices, default="categorized")
42  parser.add_argument("--override-globaltags", dest="override", action="store_true", default=False,
43  help="Use this option in case the data file does not provide globaltag information. "
44  "The only case where this should occur is when analyzing raw data.")
45  parser.add_argument('--local-db-path', type=str,
46  help="set path to the local payload locations to use for the ConditionDB",
47  default=None)
48 
49  args = parser.parse_args()
50 
51  if args.input:
52  # For data, the prescales are only valid when using the online database!
53  if args.local_db_path is not None:
54  basf2.conditions.metadata_providers = ["file://" + basf2.find_file(args.local_db_path + "/metadata.sqlite")]
55  basf2.conditions.payload_locations = [basf2.find_file(args.local_db_path)]
56 
57  if args.override:
58  basf2.conditions.override_globaltags(["online"])
59 
60  path = basf2.Path()
61 
62  if args.input.endswith(".sroot"):
63  path.add_module("SeqRootInput", inputFileName=args.input)
64  else:
65  path.add_module("RootInput", inputFileName=args.input)
66  path.add_module("SoftwareTriggerResultPrinter", outputFileName=args.output)
67 
68  basf2.process(path)
69 
70  df = read_root(args.output)
71 
72  # Make sure to cope with strings rather than bools (which is a bit strange in pandas)
73  df[["accept_or_reject", "prescaled", "cut"]] = df[["accept_or_reject", "prescaled", "cut"]].astype("str")
74 
75  # Group and order as we need it
76  df = df.set_index(["cut", "accept_or_reject", "prescaled"]).T
77  df.index = df.index.str.replace("software_trigger_cut_", "")
78  df.index = df.index.str.replace("_", " ")
79 
80  # Separate cuts and prescaling
81  df_prescales = df["False"].copy()
82  df_cuts = df["True"].copy()
83 
84  # For the prescaling, the total_events is nonsense...
85  df_prescales.loc["total events"] = np.NAN
86 
87  # Now also separate out only the accepted results
88  df_cuts = df_cuts["True"].copy()
89 
90  # Give the columns some meaningful names
91  df_cuts = df_cuts[["True", "False"]]
92  df_cuts.columns = ["Prescaled", "Non Prescaled"]
93 
94  # Make sure to print all information
95  pd.set_option("display.max_rows", 500)
96  pd.set_option("display.max_colwidth", 200)
97  pd.set_option('display.max_columns', 500)
98  pd.set_option('display.width', 1000)
99 
100  # Function used for formatting
101  def format(x, total_events):
102  if np.isnan(x):
103  return ""
104  return f"{int(x):d} ({x/total_events:7.2%})"
105 
106  # Create a new dataframe just for printing
107  df_print = pd.DataFrame(index=df_cuts.index)
108 
109  df_print["Prescaled"] = df_cuts["Prescaled"].apply(lambda x: format(x, df_cuts["Prescaled"]["total events"]))
110  df_print["Non Prescaled"] = df_cuts["Non Prescaled"].apply(lambda x: format(x, df_cuts["Non Prescaled"]["total events"]))
111  df_print["Prescales"] = df_prescales.fillna("NaN")
112  df_print = df_print[["Prescaled", "Non Prescaled", "Prescales"]]
113 
114  if args.format == "list":
115  print(df_print)
116  elif args.format == "categorized":
117  from softwaretrigger import filter_categories
118 
119  def local_print_function(title, categories):
120  empty_row = {key: "" for key in df_print.columns}
121  tmp = pd.DataFrame(columns=df_print.columns)
122  tmp = tmp.append(pd.Series(empty_row, name=title))
123  tmp = tmp.append(df_print.reindex(categories))
124  tmp = tmp.append(pd.Series(empty_row, name=""))
125 
126  return tmp
127 
128  df_sorted = pd.concat([
129  local_print_function("Overview", filter_categories.RESULTS),
130  local_print_function("ECL - Physics", filter_categories.ECL_PHYSICS),
131  local_print_function("ECL - Potentially Prescaled", filter_categories.ECL_PRESCALED),
132  local_print_function("CDC - Physics", filter_categories.CDC_PHYSICS),
133  local_print_function("CDC - Potentially Prescaled", filter_categories.CDC_PRESCALED),
134  local_print_function("Targeted Physics Lines", filter_categories.PHYSICS),
135  local_print_function("QED / Control Samples", filter_categories.QED),
136  local_print_function("Level 1 Passthrough ", filter_categories.LEVEL1),
137  local_print_function("Prescaled Vetoes", filter_categories.VETOES),
138  local_print_function("Skims", [index for index in df_print.index if index.startswith("skim ")]),
139  ])
140 
141  remaining_columns = set(df_print.index) - set(df_sorted.index)
142  if remaining_columns:
143  df_sorted = df_sorted.append(local_print_function("Uncategorized", remaining_columns))
144 
145  print(df_sorted)
146 
147  elif args.format == "jira":
148  print(tabulate(df_print, tablefmt="jira", showindex=True, headers="keys"))
149  elif args.format == "stash":
150  print(tabulate(df_print, tablefmt="pipe", showindex=True, headers="keys"))
151  elif args.format == "grid":
152  print(tabulate(df_print, tablefmt="grid", showindex=True, headers="keys"))