Belle II Software  release-05-02-19
fullTrackingTableValidationAnalyseData.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 """
5 <header>
6  <contact>software-tracking@belle2.org</contact>
7  <output>fullTrackingValidationTable.root</output>
8  <input>matching_validation.root</input>
9  <description>This module generates events for the validation using the full tracking with a tabular output.</description>
10 </header>
11 """
12 import basf2
13 from ROOT import TFile, TNamed, Belle2
14 
15 VALIDATION_OUTPUT_FILE = "fullTrackingTableValidation.root"
16 import os
17 
18 try:
19  from root_pandas import read_root
20  import pandas as pd
21 except ImportError:
22  basf2.B2FATAL("You need to have pandas installed for this validation script to run.")
23 
24 FORMAT_STRING = "&nbsp;{:.2%} <br/> -{:.2%} <br/> <b>-{:.2%}</b> <br/> Matching: {:.2%} <br/> CDC: {:.2%} <br/> VXD: {:.2%}"
25 SHORT_FORMAT_STRING = "MC: &nbsp;{:.2%} <br/>Missing: -{:.2%} <br/>Missing and !fit: <b>-{:.2%}</b>"
26 
27 
28 def reducelist(list_of_cuts, df, current_name=None, current_cut=None, x=0, y=0):
29  if current_name is not None:
30  if y == 5:
31  return_string = FORMAT_STRING.format(current_cut.mean(),
32  (current_cut & (df.is_matched == 0)).mean(),
33  (current_cut & (df.fitted_is_matched == 0)).mean(),
34  (current_cut & (df.both_related == 1)).mean(),
35  (current_cut & (df.cdc_has_related == 1)).mean(),
36  (current_cut & (df.vxd_has_related == 1)).mean())
37  else:
38  return_string = SHORT_FORMAT_STRING.format(current_cut.mean(),
39  (current_cut & (df.is_matched == 0)).mean(),
40  (current_cut & (df.fitted_is_matched == 0)).mean())
41  yield (y, x, current_name), return_string
42 
43  if not list_of_cuts:
44  return
45 
46  name, cut = list_of_cuts[0]
47 
48  if cut is None:
49  # Make a "always true" cut
50  def cut(x):
51  return x.is_missing == x.is_missing
52 
53  if current_name is None:
54  yield from reducelist(list_of_cuts[1:], df, name, cut(df),
55  x + 2 ** (len(list_of_cuts) - 1), y + 1)
56  else:
57  yield from reducelist(list_of_cuts[1:], df, current_name + "_no_" + name, current_cut & (~cut(df)),
58  x, y + 1)
59  yield from reducelist(list_of_cuts[1:], df, current_name + "_" + name, current_cut & (cut(df)),
60  x + 2 ** (len(list_of_cuts) - 1), y + 1)
61 
62 
63 def make_chunks(l, n):
64  return [l[i:i + n] for i in range(0, len(l), n)]
65 
66 
67 def write_value_cell(key, value):
68  y, x, name, _ = key
69  colspan = 2 ** int(5 - y)
70 
71  colors = {
72  3: ["white", "gray", "orange", "green"],
73  4: ["gray", "white", "gray", "gray",
74  "orange", "orange", "green", "green"],
75  5: ["gray", "gray", "red", "green",
76  "gray", "gray", "gray", "gray",
77  "red", "gray", "red", "orange",
78  "green", "gray", "orange", "green"]
79  }
80 
81  if y in colors:
82  color_index = int((x - 2 ** 4) / (2 ** (5 - y)))
83  color = colors[y][color_index]
84  else:
85  color = "white"
86 
87  return """
88  <td style="border: 1px solid black" colspan={colspan}
89  align="center" valign=middle bgcolor="{color}">{value}</td>
90  """.format(colspan=colspan, color=color, value=value)
91 
92 
93 def make_html_row(x):
94  keys = [key for key, _ in x.iteritems()]
95  titles = [key[2] for key, _ in x.iteritems()]
96  values = [value for _, value in x.iteritems()]
97 
98  chunked_titles = make_chunks(titles, 2)
99  common_prefixes = list(map(os.path.commonprefix, chunked_titles))
100 
101  shorter_titles = [title.replace(prefix, "").replace("_", " ")
102  for list_titles, prefix in zip(chunked_titles, common_prefixes)
103  for title in list_titles]
104 
105  row_content = "".join([write_value_cell(key, value) for key, value in zip(keys, shorter_titles)])
106  html = "<tr>" + row_content + "</tr>"
107 
108  row_content = "".join([write_value_cell(key, value) for key, value in x.sort_index().iteritems()])
109  html += "<tr>" + row_content + "</tr>"
110 
111  return html
112 
113 
114 def get_html(df, test):
115  results = pd.DataFrame(dict(reducelist(test, df)), index=[0]).unstack()
116 
117  last_row_titles = ["", "", "CDCTF may help", "Criteria?", "", "", "", "", "VXDTF may help", "",
118  "hard cases", "CKF may help", "Criteria?", "", "CKF may help", "Merging"]
119 
120  html = "<table>"
121  html += "".join(results.groupby(level=0).apply(make_html_row))
122  html += "<tr>" + ("".join(["<td>" + value + "</td>" for value in last_row_titles])) + "</tr>"
123  html += "</table>"
124 
125  return html
126 
127 
128 if __name__ == '__main__':
129  # These are the categories to be tested successively
130  test = [
131  ("all", None),
132  ("has_vxd", lambda x: (x.n_svd_hits >= 2)),
133  ("vxd_was_found", lambda x: x["vxd_was_found"] == 1),
134  ("has_cdc", lambda x: x.n_cdc_hits >= 3),
135  ("cdc_was_found", lambda x: x["cdc_was_found"] == 1),
136  ]
137 
138  df = read_root("../matching_validation.root")
139  html = get_html(df, test)
140 
141  tfile = TFile(VALIDATION_OUTPUT_FILE, "RECREATE")
142  html_content = TNamed("Tracking Table Validation", html)
143  html_content.Write()
144  tfile.Close()