Belle II Software development
validationfunctions.py
1#!/usr/bin/env python3
2
3
10
11# Import timeit module and start a timer. Allows to get the runtime of the
12# program at any given point
13import timeit
14
15g_start_time = timeit.default_timer() # noqa
16
17# std
18import argparse
19import glob
20import os
21import subprocess
22import sys
23import time
24from typing import Dict, Optional, List, Union
25import logging
26from pathlib import Path
27import json
28import shutil
29
30# 3rd party
31import ROOT
32
33# ours
34import validationpath
35
36
39
40
41def get_timezone() -> str:
42 """
43 Returns the correct timezone as short string
44 """
45 tz_tuple = time.tzname
46
47 # in some timezones, there is a daylight saving times entry in the
48 # second item of the tuple
49 if time.daylight != 0:
50 return tz_tuple[1]
51 else:
52 return tz_tuple[0]
53
54
55def get_compact_git_hash(repo_folder: str) -> Optional[str]:
56 """
57 Returns the compact git hash from a folder inside of a git repository
58 """
59 try:
60 cmd_output = (
61 subprocess.check_output(
62 ["git", "show", "--oneline", "-s"], cwd=repo_folder
63 )
64 .decode()
65 .rstrip()
66 )
67 # the first word in this string will be the hash
68 cmd_output = cmd_output.split(" ")
69 if len(cmd_output) > 1:
70 return cmd_output[0]
71 else:
72 # something went wrong
73 return
74 except subprocess.CalledProcessError:
75 return
76
77
78def basf2_command_builder(
79 steering_file: str, parameters: List[str], use_multi_processing=False
80) -> List[str]:
81 """
82 This utility function takes the steering file name and other basf2
83 parameters and returns a list which can be executed via the OS shell for
84 example to subprocess.Popen(params ...) If use_multi_processing is True,
85 the script will be executed in multi-processing mode with only 1
86 parallel process in order to test if the code also performs as expected
87 in multi-processing mode
88 """
89 cmd_params = ["basf2"]
90 if use_multi_processing:
91 cmd_params += ["-p1"]
92 cmd_params += [steering_file]
93 cmd_params += parameters
94
95 return cmd_params
96
97
98def available_revisions(work_folder: str) -> List[str]:
99 """
100 Loops over the results folder and looks for revisions. It then returns an
101 ordered list, with the most recent revision being the first element in the
102 list and the oldest revision being the last element.
103 The 'age' of a revision is determined by the 'Last-modified'-timestamp of
104 the corresponding folder.
105 :return: A list of all revisions available for plotting
106 """
107
108 # Get all folders in ./results/ sorted descending by the date they were
109 # created (i.e. newest folder first)
110 search_folder = validationpath.get_results_folder(work_folder)
111 subfolders = [p for p in os.scandir(search_folder) if p.is_dir()]
112 revisions = [
113 p.name for p in sorted(subfolders, key=lambda p: p.stat().st_mtime)
114 ]
115 return revisions
116
117
118def get_latest_nightly(work_folder: str) -> str:
119 """
120 Loops over the results folder and looks for nightly builds. It then returns
121 the most recent nightly tag sorted by date in the name. If no
122 nightly results are available then it returns the default 'current' tag.
123 :return: the most recent nightly build or current
124 """
125 available = available_revisions(work_folder)
126 available_nightlies = [
127 revision for revision in available
128 if revision.startswith("nightly")
129 ]
130 if available_nightlies:
131 return sorted(available_nightlies, reverse=True)[0]
132 else:
133 return 'current'
134
135
136def get_popular_revision_combinations(work_folder: str) -> List[str]:
137 """
138 Returns several combinations of available revisions that we might
139 want to pre-build on the server.
140
141 Returns:
142 List[List of revisions (str)]
143 """
144 available = sorted(
145 available_revisions(work_folder),
146 reverse=True
147 )
148 available_releases = [
149 revision for revision in available
150 if revision.startswith("release") or revision.startswith("prerelease")
151 ]
152 available_nightlies = [
153 revision for revision in available
154 if revision.startswith("nightly")
155 ]
156
157 def atindex_or_none(lst, index):
158 """ Returns item at index from lst or None"""
159 try:
160 return lst[index]
161 except IndexError:
162 return None
163
164 def remove_duplicates_lstlst(lstlst):
165 """ Removes duplicate lists in a list of lists """
166 # If we didn't care about the order:
167 # return list(map(list, set(map(tuple, lstlst))))
168 # would do the job. But we do care, or at least it is very
169 # relevant which revision is first (because it gets taken
170 # as reference)
171 ret = []
172 for lst in lstlst:
173 if lst not in ret:
174 ret.append(lst)
175 return ret
176
177 # Return value
178 ret = [
179 # All revisions
180 ["reference"] + sorted(available),
181
182 # Latest X + reference
183 ["reference", atindex_or_none(available_releases, 0)],
184 ["reference", atindex_or_none(available_nightlies, 0)],
185
186 # All latest + reference
187 ["reference"] + sorted(list(filter(
188 None,
189 [
190 atindex_or_none(available_releases, 0),
191 atindex_or_none(available_nightlies, 0)
192 ]
193 ))),
194
195 # All nightlies + reference
196 ["reference"] + sorted(available_nightlies)
197 ]
198
199 # Remove all Nones from the sublists
200 ret = [
201 list(filter(None, comb)) for comb in ret
202 ]
203 # Remove all empty lists
204 ret = list(filter(None, ret))
205
206 # Remove duplicates
207 ret = remove_duplicates_lstlst(ret)
208
209 if not ret:
210 sys.exit("No revisions seem to be available. Exit.")
211
212 return ret
213
214
215def clear_plots(work_folder: str, keep_revisions: List[str]):
216 """
217 This function will clear the plots folder to get rid of all but the
218 skipped revisions' associated plot files. """
219
220 rainbow_file = os.path.join(work_folder, 'rainbow.json')
221 cleaned_rainbow = {}
222
223 keep_revisions = [sorted(revs) for revs in keep_revisions]
224
225 with open(rainbow_file) as rainbow:
226 entries = json.loads(rainbow.read())
227 for hash, revisions in entries.items():
228
229 if sorted(revisions) in keep_revisions:
230 print(f'Retaining {hash}')
231 cleaned_rainbow[hash] = revisions
232 continue
233
234 print(f'Removing {hash}:{revisions}')
235 work_folder_path = Path(os.path.join(work_folder, hash))
236 if work_folder_path.exists() and work_folder_path.is_dir():
237 shutil.rmtree(work_folder_path)
238
239 with open(rainbow_file, 'w') as rainbow:
240 rainbow.write(json.dumps(cleaned_rainbow, indent=4))
241
242
243def get_start_time() -> float:
244 """!
245 The function returns the value g_start_time which contain the start time
246 of the validation and is set just a few lines above.
247
248 @return: Time since the validation has been started
249 """
250 return g_start_time
251
252
253def get_validation_folders(
254 location: str, basepaths: Dict[str, str], log: logging.Logger
255) -> Dict[str, str]:
256 """!
257 Collects the validation folders for all packages from the stated release
258 directory (either local or central). Returns a dict with the following
259 form:
260 {'name of package':'absolute path to validation folder of package'}
261
262 @param location: The location where we want to search for validation
263 folders (either 'local' or 'central')
264 """
265
266 # Make sure we only look in existing locations:
267 if location not in ["local", "central"]:
268 return {}
269 if basepaths[location] is None:
270 return {}
271
272 # Write to log what we are collecting
273 log.debug(f"Collecting {location} folders")
274
275 # Reserve some memory for our results
276 results = {}
277
278 # Now start collecting the folders.
279 # First, collect the general validation folders, because it needs special
280 # treatment (does not belong to any other package but may include
281 # steering files):
282 if os.path.isdir(basepaths[location] + "/validation"):
283 results["validation"] = basepaths[location] + "/validation"
284
285 # get the special folder containing the validation tests
286 if os.path.isdir(basepaths[location] + "/validation/validation-test"):
287 results["validation-test"] = (
288 basepaths[location] + "/validation/validation-test"
289 )
290
291 # Now get a list of all folders with name 'validation' which are
292 # subfolders of a folder (=package) in the release directory
293 package_dirs = glob.glob(
294 os.path.join(basepaths[location], "*", "validation")
295 )
296
297 # Now loop over all these folders, find the name of the package they belong
298 # to and append them to our results dictionary
299 for package_dir in package_dirs:
300 package_name = os.path.basename(os.path.dirname(package_dir))
301 results[package_name] = package_dir
302
303 # Return our results
304 return results
305
306
307def get_argument_parser(
308 modes: Optional[List[str]] = None,
309) -> argparse.ArgumentParser:
310
311 if not modes:
312 modes = ["local"]
313
314 # Set up the command line parser
315 parser = argparse.ArgumentParser()
316
317 # Define the accepted command line flags and read them in
318 parser.add_argument(
319 "-d",
320 "--dry",
321 help="Perform a dry run, i.e. run the validation module without "
322 "actually executing the steering files (for debugging purposes).",
323 action="store_true",
324 )
325 parser.add_argument(
326 "-m",
327 "--mode",
328 help="The mode which will be used for running the validation. "
329 "Possible values: " + ", ".join(modes) + ". Default is 'local'",
330 choices=modes,
331 type=str,
332 default="local",
333 )
334 parser.add_argument(
335 "-i",
336 "--intervals",
337 help="Comma separated list of intervals for which to execute the "
338 "validation scripts. Default is 'nightly'",
339 type=str,
340 default="nightly",
341 )
342 parser.add_argument(
343 "-o",
344 "--options",
345 help="One or more strings that will be passed to basf2 as arguments. "
346 "Example: '-n 100'. Quotes are necessary!",
347 type=str,
348 nargs="+",
349 )
350 parser.add_argument(
351 "-p",
352 "--parallel",
353 help="The maximum number of parallel processes to run the "
354 "validation. Only used for local execution. Default is number "
355 "of CPU cores.",
356 type=int,
357 default=None,
358 )
359 parser.add_argument(
360 "-pkg",
361 "--packages",
362 help="The name(s) of one or multiple packages. Validation will be "
363 "run only on these packages! E.g. -pkg analysis arich",
364 type=str,
365 nargs="+",
366 )
367 parser.add_argument(
368 "-s",
369 "--select",
370 help="The file name(s) of one or more space separated validation "
371 "scripts that should be executed exclusively. All dependent "
372 "scripts will also be executed. E.g. -s ECL2D.C "
373 "(use -si instead to execute script(s) ignoring dependencies)",
374 type=str,
375 nargs="+",
376 )
377 parser.add_argument(
378 "-si",
379 "--select-ignore-dependencies",
380 help="The file name of one or more space separated validation "
381 "scripts that should be executed exclusively. This will ignore "
382 "all dependencies. This is useful if you modified a script that "
383 "produces plots based on the output of its dependencies.",
384 type=str,
385 nargs="+",
386 )
387 parser.add_argument(
388 "--send-mails",
389 help="Send email to the contact persons who have failed comparison "
390 "plots. Mail is sent from b2soft@mail.desy.de via "
391 "/usr/sbin/sendmail.",
392 action="store_true",
393 )
394 parser.add_argument(
395 "--send-mails-mode",
396 help="How to send mails: Full report, incremental report (new/changed "
397 "warnings/failures only) or automatic (default; follow hard coded "
398 "rule, e.g. full reports every Monday).",
399 choices=["full", "incremental", "automatic"],
400 default="automatic",
401 )
402 parser.add_argument(
403 "-q", "--quiet", help="Suppress the progress bar", action="store_true"
404 )
405 parser.add_argument(
406 "-t",
407 "--tag",
408 help="The name that will be used for the current revision in the "
409 "results folder. Default is 'current'.",
410 type=str,
411 default="current",
412 )
413 parser.add_argument(
414 "--test",
415 help="Execute validation in testing mode where only the validation "
416 "scripts contained in the validation package are executed. "
417 "During regular validation, these scripts are ignored.",
418 action="store_true",
419 )
420 parser.add_argument(
421 "--use-cache",
422 help="If validation scripts are marked as cacheable and their output "
423 "files already exist, don't execute these scripts again",
424 action="store_true",
425 )
426 parser.add_argument(
427 "--view",
428 help="Once the validation is finished, start the local web server and "
429 "display the validation results in the system's default browser.",
430 action="store_true",
431 )
432 parser.add_argument(
433 "--max-run-time",
434 help="By default, running scripts (that is, steering files executed by"
435 "the validation framework) are terminated after a "
436 "certain time. Use this flag to change this setting by supplying "
437 "the maximal run time in minutes. Value <=0 disables the run "
438 "time upper limit entirely.",
439 type=int,
440 default=None,
441 )
442
443 return parser
444
445
446def parse_cmd_line_arguments(
447 modes: Optional[List[str]] = None,
448) -> argparse.Namespace:
449 """!
450 Sets up a parser for command line arguments, parses them and returns the
451 arguments.
452 @return: An object containing the parsed command line arguments.
453 Arguments are accessed like they are attributes of the object,
454 i.e. [name_of_object].[desired_argument]
455 """
456
457 if not modes:
458 modes = ["local"]
459
460 # Return the parsed arguments!
461 return get_argument_parser(modes).parse_args()
462
463
464def scripts_in_dir(dirpath: str, log: logging.Logger, ext="*") -> List[str]:
465 """!
466 Returns all the files in the given dir (and its subdirs) that have
467 the extension 'ext', if an extension is given (default: all extensions)
468
469 @param dirpath: The directory in which we are looking for files
470 @param log: logging.Logger object
471 @param ext: The extension of the files, which we are looking for.
472 '*' is the wildcard-operator (=all extensions are accepted)
473 @return: A sorted list of all files with the specified extension in the
474 given directory.
475 """
476
477 # Write to log what we are collecting
478 log.debug(f"Collecting *{ext} files from {dirpath}")
479
480 # Some space where we store our results before returning them
481 results = []
482
483 # A list of all folder names that will be ignored (e.g. folders that are
484 # important for SCons
485 blacklist = [
486 "tools",
487 "scripts",
488 "examples",
489 validationpath.folder_name_html_static,
490 ]
491
492 # Loop over the given directory and its subdirectories and find all files
493 for root, dirs, files in os.walk(dirpath):
494
495 # Skip a directory if it is blacklisted
496 if os.path.basename(root) in blacklist:
497 continue
498
499 # Loop over all files
500 for current_file in files:
501 # If the file has the requested extension, append its full paths to
502 # the results
503 if current_file.endswith(ext):
504 results.append(os.path.join(root, current_file))
505
506 # Return our sorted results
507 return sorted(results)
508
509
510def strip_ext(path: str) -> str:
511 """
512 Takes a path and returns only the name of the file, without the
513 extension on the file name
514 """
515 return os.path.splitext(os.path.split(path)[1])[0]
516
517
518def get_style(index: Optional[int], overall_item_count=1):
519 """
520 Takes an index and returns the corresponding line attributes,
521 i.e. LineColor, LineWidth and LineStyle.
522 """
523
524 # Define the colors for the plot
525 colors = [
526 ROOT.kRed,
527 ROOT.kOrange,
528 ROOT.kPink + 9,
529 ROOT.kOrange - 8,
530 ROOT.kGreen + 2,
531 ROOT.kCyan + 2,
532 ROOT.kBlue + 1,
533 ROOT.kRed + 2,
534 ROOT.kOrange + 3,
535 ROOT.kYellow + 2,
536 ROOT.kSpring,
537 ]
538
539 # Define the linestyles for the plot
540 linestyles = {
541 "dashed": 2, # Dashed: - - - - -
542 "solid": 1, # Solid: ----------
543 "dashdot": 10,
544 } # Dash-dot: -?-?-?-
545 ls_index = {0: "dashed", 1: "solid", 2: "dashdot"}
546
547 # Define the linewidth for the plots
548 linewidth = 2
549
550 # make sure the index is set
551 if not index:
552 index = 0
553
554 # Get the color for the (index)th revisions
555 color = colors[index % len(colors)]
556
557 # Figure out the linestyle
558 # If there is only one revision, make it solid!
559 # It cannot overlap with any other line
560 if overall_item_count == 1:
561 linestyle = linestyles["solid"]
562 # Otherwise make sure the newest revision (which is drawn on top) gets a
563 # dashed linestyle
564 else:
565 linestyle = linestyles[ls_index[index % len(ls_index)]]
566
567 return ROOT.TAttLine(color, linestyle, linewidth)
568
569
570def index_from_revision(revision: str, work_folder: str) -> Optional[int]:
571 """
572 Takes the name of a revision and returns the corresponding index. Indices
573 are used to ensure that the color and style of a revision in a plot are
574 always the same, regardless of the displayed revisions.
575 Example: release-X is always red, and no other release get drawn in red if
576 release-X is not selected for display.
577 :param revision: A string containing the name of a revision
578 :param work_folder: The work folder containing the results and plots
579 :return: The index of the requested revision, or None, if no index could
580 be found for 'revision'
581 """
582
583 revisions = available_revisions(work_folder) + ["reference"]
584
585 if revision in revisions:
586 return revisions.index(revision)
587 else:
588 return None
589
590
591def get_log_file_paths(logger: logging.Logger) -> List[str]:
592 """
593 Returns list of paths that the FileHandlers of logger write to.
594 :param logger: logging.logger object.
595 :return: List of paths
596 """
597 ret = []
598 for handler in logger.handlers:
599 try:
600 ret.append(handler.baseFilename)
601 except AttributeError:
602 pass
603 return ret
604
605
606def get_terminal_width() -> int:
607 """
608 Returns width of terminal in characters, or 80 if unknown.
609
610 Copied from basf2 utils. However, we only compile the validation package
611 on b2master, so copy this here.
612 """
613 from shutil import get_terminal_size
614
615 return get_terminal_size(fallback=(80, 24)).columns
616
617
618def congratulator(
619 success: Optional[Union[int, float]] = None,
620 failure: Optional[Union[int, float]] = None,
621 total: Optional[Union[int, float]] = None,
622 just_comment=False,
623 rate_name="Success rate",
624) -> str:
625 """ Keeping the morale up by commenting on success rates.
626
627 Args:
628 success: Number of successes
629 failure: Number of failures
630 total: success + failures (out of success, failure and total, exactly
631 2 have to be specified. If you want to use your own figure of
632 merit, just set total = 1. and set success to a number between 0.0
633 (infernal) to 1.0 (stellar))
634 just_comment: Do not add calculated percentage to return string.
635 rate_name: How to refer to the calculated success rate.
636
637 Returns:
638 Comment on your success rate (str).
639 """
640
641 n_nones = [success, failure, total].count(None)
642
643 if n_nones == 0 and total != success + failure:
644 print(
645 "ERROR (congratulator): Specify 2 of the arguments 'success',"
646 "'failure', 'total'.",
647 file=sys.stderr,
648 )
649 return ""
650 elif n_nones >= 2:
651 print(
652 "ERROR (congratulator): Specify 2 of the arguments 'success',"
653 "'failure', 'total'.",
654 file=sys.stderr,
655 )
656 return ""
657 else:
658 if total is None:
659 total = success + failure
660 if failure is None:
661 failure = total - success
662 if success is None:
663 success = total - failure
664
665 # Beware of zero division errors.
666 if total == 0:
667 return "That wasn't really exciting, was it?"
668
669 success_rate = 100 * success / total
670
671 comments = {
672 00.0: "You're grounded!",
673 10.0: "Infernal...",
674 20.0: "That's terrible!",
675 40.0: "You can do better than that.",
676 50.0: "That still requires some work.",
677 75.0: "Three quarters! Almost there!",
678 80.0: "Way to go ;)",
679 90.0: "Gold medal!",
680 95.0: "Legendary!",
681 99.0: "Nobel price!",
682 99.9: "Godlike!",
683 }
684
685 for value in sorted(comments.keys(), reverse=True):
686 if success_rate >= value:
687 comment = comments[value]
688 break
689 else:
690 # below minimum?
691 comment = comments[0]
692
693 if just_comment:
694 return comment
695 else:
696 return f"{rate_name} {int(success_rate)}%. {comment}"
697
698
699def terminal_title_line(title="", subtitle="", level=0) -> str:
700 """ Print a title line in the terminal.
701
702 Args:
703 title (str): The title. If no title is given, only a separating line
704 is printed.
705 subtitle (str): Subtitle.
706 level (int): The lower, the more dominantly the line will be styled.
707 """
708 linewidth = get_terminal_width()
709
710 # using the markdown title underlining chars for lack of better
711 # alternatives
712 char_dict = {0: "=", 1: "-", 2: "~"}
713
714 for key in sorted(char_dict.keys(), reverse=True):
715 if level >= key:
716 char = char_dict[key]
717 break
718 else:
719 # below minimum, shouldn't happen but anyway
720 char = char_dict[0]
721
722 line = char * linewidth
723 if not title:
724 return line
725
726 # guess we could make a bit more effort with indenting/handling long titles
727 # capitalization etc., but for now:
728 ret = line + "\n"
729 ret += title.capitalize() + "\n"
730 if subtitle:
731 ret += subtitle + "\n"
732 ret += line
733 return ret
734
735
736def get_file_metadata(filename: str) -> str:
737 """
738 Retrieve the metadata for a file using ``b2file-metadata-show -a``.
739
740 Args:
741 metadata (str): File to get number of events from.
742
743 Returns:
744 (str): Metadata of file.
745 """
746 if not Path(filename).exists():
747 raise FileNotFoundError(f"Could not find file {filename}")
748
749 metadata = None
750
751 try:
752 proc = subprocess.run(
753 ["b2file-metadata-show", "-a", str(filename)],
754 stdout=subprocess.PIPE,
755 check=True,
756 )
757 metadata = proc.stdout.decode("utf-8")
758 except subprocess.CalledProcessError as e:
759 print(e.stderr)
760
761 return metadata
762
def get_results_folder(output_base_dir)