Belle II Software development
fullTrackingTableValidationAnalyseData.py
1#!/usr/bin/env python3
2
3
10
11"""
12<header>
13 <contact>software-tracking@belle2.org</contact>
14 <output>fullTrackingValidationTable.root</output>
15 <input>matching_validation.root</input>
16 <description>This module generates events for the validation using the full tracking with a tabular output.</description>
17</header>
18"""
19import basf2
20from ROOT import TFile, TNamed
21import os
22
23ACTIVE = True
24
25VALIDATION_OUTPUT_FILE = "fullTrackingValidationTable.root"
26
27try:
28 import uproot # noqa
29 import pandas as pd # noqa
30except ImportError:
31 basf2.B2FATAL("You need to have pandas installed for this validation script to run.")
32
33FORMAT_STRING = "&nbsp;{:.2%} <br/> -{:.2%} <br/> <b>-{:.2%}</b> <br/> Matching: {:.2%} <br/> CDC: {:.2%} <br/> VXD: {:.2%}"
34SHORT_FORMAT_STRING = "MC: &nbsp;{:.2%} <br/>Missing: -{:.2%} <br/>Missing and !fit: <b>-{:.2%}</b>"
35
36
37def reducelist(list_of_cuts, df, current_name=None, current_cut=None, x=0, y=0):
38 if current_name is not None:
39 if y == 5:
40 return_string = FORMAT_STRING.format(current_cut.mean(),
41 (current_cut & (df.is_matched == 0)).mean(),
42 (current_cut & (df.fitted_is_matched == 0)).mean(),
43 (current_cut & (df.both_related == 1)).mean(),
44 (current_cut & (df.cdc_has_related == 1)).mean(),
45 (current_cut & (df.vxd_has_related == 1)).mean())
46 else:
47 return_string = SHORT_FORMAT_STRING.format(current_cut.mean(),
48 (current_cut & (df.is_matched == 0)).mean(),
49 (current_cut & (df.fitted_is_matched == 0)).mean())
50 yield (y, x, current_name), return_string
51
52 if not list_of_cuts:
53 return
54
55 name, cut = list_of_cuts[0]
56
57 if cut is None:
58 # Make a "always true" cut
59 def cut(x):
60 return x.is_missing == x.is_missing
61
62 if current_name is None:
63 yield from reducelist(list_of_cuts[1:], df, name, cut(df),
64 x + 2 ** (len(list_of_cuts) - 1), y + 1)
65 else:
66 yield from reducelist(list_of_cuts[1:], df, current_name + "_no_" + name, current_cut & (~cut(df)),
67 x, y + 1)
68 yield from reducelist(list_of_cuts[1:], df, current_name + "_" + name, current_cut & (cut(df)),
69 x + 2 ** (len(list_of_cuts) - 1), y + 1)
70
71
72def make_chunks(k, n):
73 return [k[i:i + n] for i in range(0, len(k), n)]
74
75
76def write_value_cell(key, value):
77 y, x, name, _ = key
78 colspan = 2 ** int(5 - y)
79
80 colors = {
81 3: ["white", "gray", "orange", "green"],
82 4: ["gray", "white", "gray", "gray",
83 "orange", "orange", "green", "green"],
84 5: ["gray", "gray", "red", "green",
85 "gray", "gray", "gray", "gray",
86 "red", "gray", "red", "orange",
87 "green", "gray", "orange", "green"]
88 }
89
90 if y in colors:
91 color_index = int((x - 2 ** 4) / (2 ** (5 - y)))
92 color = colors[y][color_index]
93 else:
94 color = "white"
95
96 return f"""
97 <td style="border: 1px solid black" colspan={colspan}
98 align="center" valign=middle bgcolor="{color}">{value}</td>
99 """
100
101
102def make_html_row(x):
103 keys = [key for key, _ in x.items()]
104 titles = [key[2] for key, _ in x.items()]
105
106 chunked_titles = make_chunks(titles, 2)
107 common_prefixes = list(map(os.path.commonprefix, chunked_titles))
108
109 shorter_titles = [title.replace(prefix, "").replace("_", " ")
110 for list_titles, prefix in zip(chunked_titles, common_prefixes)
111 for title in list_titles]
112
113 row_content = "".join([write_value_cell(key, value) for key, value in zip(keys, shorter_titles)])
114 html = "<tr>" + row_content + "</tr>"
115
116 row_content = "".join([write_value_cell(key, value) for key, value in x.sort_index().items()])
117 html += "<tr>" + row_content + "</tr>"
118
119 return html
120
121
122def get_html(df, test):
123 results = pd.DataFrame(dict(reducelist(test, df)), index=[0]).unstack()
124
125 last_row_titles = ["", "", "CDCTF may help", "Criteria?", "", "", "", "", "VXDTF may help", "",
126 "hard cases", "CKF may help", "Criteria?", "", "CKF may help", "Merging"]
127
128 html = "<table>"
129 html += "".join(results.groupby(level=0).apply(make_html_row))
130 html += "<tr>" + ("".join(["<td>" + value + "</td>" for value in last_row_titles])) + "</tr>"
131 html += "</table>"
132
133 return html
134
135
136if __name__ == '__main__':
137 if ACTIVE:
138 # These are the categories to be tested successively
139 test = [
140 ("all", None),
141 ("has_vxd", lambda x: (x.n_svd_hits >= 2)),
142 ("vxd_was_found", lambda x: x["vxd_was_found"] == 1),
143 ("has_cdc", lambda x: x.n_cdc_hits >= 3),
144 ("cdc_was_found", lambda x: x["cdc_was_found"] == 1),
145 ]
146
147 df = uproot.open("../matching_validation.root")['VxdCdcPartFinderHarvester_tree'].arrays(library='pd')
148 html = get_html(df, test)
149
150 tfile = TFile(VALIDATION_OUTPUT_FILE, "RECREATE")
151 html_content = TNamed("Tracking Table Validation", html)
152 html_content.Write()
153 tfile.Close()
154 else:
155 print("This validation deactivated and thus basf2 is not executed.\n"
156 "If you want to run this validation, please set the 'ACTIVE' flag above to 'True'.\n"
157 "Exiting.")