Belle II Software development
validationfunctions.py
1#!/usr/bin/env python3
2
3
10
11# Import timeit module and start a timer. Allows to get the runtime of the
12# program at any given point
13import timeit
14
15g_start_time = timeit.default_timer() # noqa
16
17# std
18import argparse
19import glob
20import os
21import subprocess
22import sys
23import time
24from typing import Dict, Optional, List, Union
25import logging
26from pathlib import Path
27import json
28import shutil
29
30# 3rd party
31import ROOT
32
33# ours
34import validationpath
35
36
39
40
41def get_timezone() -> str:
42 """
43 Returns the correct timezone as short string
44 """
45 tz_tuple = time.tzname
46
47 # in some timezones, there is a daylight saving times entry in the
48 # second item of the tuple
49 if time.daylight != 0:
50 return tz_tuple[1]
51 else:
52 return tz_tuple[0]
53
54
55def get_compact_git_hash(repo_folder: str) -> Optional[str]:
56 """
57 Returns the compact git hash from a folder inside of a git repository
58 """
59 try:
60 cmd_output = (
61 subprocess.check_output(
62 ["git", "show", "--oneline", "-s"], cwd=repo_folder
63 )
64 .decode()
65 .rstrip()
66 )
67 # the first word in this string will be the hash
68 cmd_output = cmd_output.split(" ")
69 if len(cmd_output) > 1:
70 return cmd_output[0]
71 else:
72 # something went wrong
73 return
74 except subprocess.CalledProcessError:
75 return
76
77
78def basf2_command_builder(
79 steering_file: str, parameters: List[str], use_multi_processing=False
80) -> List[str]:
81 """
82 This utility function takes the steering file name and other basf2
83 parameters and returns a list which can be executed via the OS shell for
84 example to subprocess.Popen(params ...) If use_multi_processing is True,
85 the script will be executed in multi-processing mode with only 1
86 parallel process in order to test if the code also performs as expected
87 in multi-processing mode
88 """
89 cmd_params = ["basf2"]
90 if use_multi_processing:
91 cmd_params += ["-p1"]
92 cmd_params += [steering_file]
93 cmd_params += parameters
94
95 return cmd_params
96
97
98def available_revisions(work_folder: str) -> List[str]:
99 """
100 Loops over the results folder and looks for revisions. It then returns an
101 ordered list, with the most recent revision being the first element in the
102 list and the oldest revision being the last element.
103 The 'age' of a revision is determined by the 'Last-modified'-timestamp of
104 the corresponding folder.
105 :return: A list of all revisions available for plotting
106 """
107
108 # Get all folders in ./results/ sorted descending by the date they were
109 # created (i.e. newest folder first)
110 search_folder = validationpath.get_results_folder(work_folder)
111 subfolders = [p for p in os.scandir(search_folder) if p.is_dir()]
112 revisions = [
113 p.name for p in sorted(subfolders, key=lambda p: p.stat().st_mtime)
114 ]
115 return revisions
116
117
118def get_latest_nightly(work_folder: str) -> str:
119 """
120 Loops over the results folder and looks for nightly builds. It then returns
121 the most recent nightly tag sorted by date in the name. If no
122 nightly results are available then it returns the default 'current' tag.
123 :return: the most recent nightly build or current
124 """
125 available = available_revisions(work_folder)
126 available_nightlies = [
127 revision for revision in available
128 if revision.startswith("nightly")
129 ]
130 if available_nightlies:
131 return sorted(available_nightlies, reverse=True)[0]
132 else:
133 return 'current'
134
135
136def get_popular_revision_combinations(work_folder: str) -> List[str]:
137 """
138 Returns several combinations of available revisions that we might
139 want to pre-build on the server.
140
141 Returns:
142 List[List of revisions (str)]
143 """
144 available = sorted(
145 available_revisions(work_folder),
146 reverse=True
147 )
148 available_releases = [
149 revision for revision in available
150 if revision.startswith("release") or revision.startswith("prerelease")
151 ]
152 available_nightlies = [
153 revision for revision in available
154 if revision.startswith("nightly")
155 ]
156
157 def atindex_or_none(lst, index):
158 """ Returns item at index from lst or None"""
159 try:
160 return lst[index]
161 except IndexError:
162 return None
163
164 def remove_duplicates_lstlst(lstlst):
165 """ Removes duplicate lists in a list of lists """
166 # If we didn't care about the order:
167 # return list(map(list, set(map(tuple, lstlst))))
168 # would do the job. But we do care, or at least it is very
169 # relevant which revision is first (because it gets taken
170 # as reference)
171 ret = []
172 for lst in lstlst:
173 if lst not in ret:
174 ret.append(lst)
175 return ret
176
177 # Return value
178 ret = [
179 # All revisions
180 ["reference"] + sorted(available),
181
182 # Latest X + reference
183 ["reference", atindex_or_none(available_releases, 0)],
184 ["reference", atindex_or_none(available_nightlies, 0)],
185
186 # All latest + reference
187 ["reference"] + sorted(list(filter(
188 None,
189 [
190 atindex_or_none(available_releases, 0),
191 atindex_or_none(available_nightlies, 0)
192 ]
193 ))),
194
195 # All nightlies + reference
196 ["reference"] + sorted(available_nightlies)
197 ]
198
199 # Remove all Nones from the sublists
200 ret = [
201 list(filter(None, comb)) for comb in ret
202 ]
203 # Remove all empty lists
204 ret = list(filter(None, ret))
205
206 # Remove duplicates
207 ret = remove_duplicates_lstlst(ret)
208
209 if not ret:
210 sys.exit("No revisions seem to be available. Exit.")
211
212 return ret
213
214
215def clear_plots(work_folder: str, keep_revisions: List[str]):
216 """
217 This function will clear the plots folder to get rid of all but the
218 skipped revisions' associated plot files.
219 """
220
221 rainbow_file = os.path.join(work_folder, 'rainbow.json')
222 cleaned_rainbow = {}
223
224 keep_revisions = [sorted(revs) for revs in keep_revisions]
225
226 with open(rainbow_file) as rainbow:
227 entries = json.loads(rainbow.read())
228 for hash, revisions in entries.items():
229
230 if sorted(revisions) in keep_revisions:
231 print(f'Retaining {hash}')
232 cleaned_rainbow[hash] = revisions
233 continue
234
235 print(f'Removing {hash}:{revisions}')
236 work_folder_path = Path(os.path.join(work_folder, hash))
237 if work_folder_path.exists() and work_folder_path.is_dir():
238 shutil.rmtree(work_folder_path)
239
240 with open(rainbow_file, 'w') as rainbow:
241 rainbow.write(json.dumps(cleaned_rainbow, indent=4))
242
243
244def get_start_time() -> float:
245 """!
246 The function returns the value g_start_time which contain the start time
247 of the validation and is set just a few lines above.
248
249 @return: Time since the validation has been started
250 """
251 return g_start_time
252
253
254def get_validation_folders(
255 location: str, basepaths: Dict[str, str], log: logging.Logger
256) -> Dict[str, str]:
257 """!
258 Collects the validation folders for all packages from the stated release
259 directory (either local or central). Returns a dict with the following
260 form:
261 {'name of package':'absolute path to validation folder of package'}
262
263 @param location: The location where we want to search for validation
264 folders (either 'local' or 'central')
265 """
266
267 # Make sure we only look in existing locations:
268 if location not in ["local", "central"]:
269 return {}
270 if basepaths[location] is None:
271 return {}
272
273 # Write to log what we are collecting
274 log.debug(f"Collecting {location} folders")
275
276 # Reserve some memory for our results
277 results = {}
278
279 # Now start collecting the folders.
280 # First, collect the general validation folders, because it needs special
281 # treatment (does not belong to any other package but may include
282 # steering files):
283 if os.path.isdir(basepaths[location] + "/validation"):
284 results["validation"] = basepaths[location] + "/validation"
285
286 # get the special folder containing the validation tests
287 if os.path.isdir(basepaths[location] + "/validation/validation-test"):
288 results["validation-test"] = (
289 basepaths[location] + "/validation/validation-test"
290 )
291
292 # Now get a list of all folders with name 'validation' which are
293 # subfolders of a folder (=package) in the release directory
294 package_dirs = glob.glob(
295 os.path.join(basepaths[location], "*", "validation")
296 )
297
298 # Now loop over all these folders, find the name of the package they belong
299 # to and append them to our results dictionary
300 for package_dir in package_dirs:
301 package_name = os.path.basename(os.path.dirname(package_dir))
302 results[package_name] = package_dir
303
304 # Return our results
305 return results
306
307
308def get_argument_parser(
309 modes: Optional[List[str]] = None,
310) -> argparse.ArgumentParser:
311
312 if not modes:
313 modes = ["local"]
314
315 # Set up the command line parser
316 parser = argparse.ArgumentParser()
317
318 # Define the accepted command line flags and read them in
319 parser.add_argument(
320 "-d",
321 "--dry",
322 help="Perform a dry run, i.e. run the validation module without "
323 "actually executing the steering files (for debugging purposes).",
324 action="store_true",
325 )
326 parser.add_argument(
327 "-m",
328 "--mode",
329 help="The mode which will be used for running the validation. "
330 "Possible values: " + ", ".join(modes) + ". Default is 'local'",
331 choices=modes,
332 type=str,
333 default="local",
334 )
335 parser.add_argument(
336 "-i",
337 "--intervals",
338 help="Comma separated list of intervals for which to execute the "
339 "validation scripts. Default is 'nightly'",
340 type=str,
341 default="nightly",
342 )
343 parser.add_argument(
344 "-o",
345 "--options",
346 help="One or more strings that will be passed to basf2 as arguments. "
347 "Example: '-n 100'. Quotes are necessary!",
348 type=str,
349 nargs="+",
350 )
351 parser.add_argument(
352 "-p",
353 "--parallel",
354 help="The maximum number of parallel processes to run the "
355 "validation. Only used for local execution. Default is number "
356 "of CPU cores.",
357 type=int,
358 default=None,
359 )
360 parser.add_argument(
361 "-pkg",
362 "--packages",
363 help="The name(s) of one or multiple packages. Validation will be "
364 "run only on these packages! E.g. -pkg analysis arich",
365 type=str,
366 nargs="+",
367 )
368 parser.add_argument(
369 "-s",
370 "--select",
371 help="The file name(s) of one or more space separated validation "
372 "scripts that should be executed exclusively. All dependent "
373 "scripts will also be executed. E.g. -s ECL2D.C "
374 "(use -si instead to execute script(s) ignoring dependencies)",
375 type=str,
376 nargs="+",
377 )
378 parser.add_argument(
379 "-si",
380 "--select-ignore-dependencies",
381 help="The file name of one or more space separated validation "
382 "scripts that should be executed exclusively. This will ignore "
383 "all dependencies. This is useful if you modified a script that "
384 "produces plots based on the output of its dependencies.",
385 type=str,
386 nargs="+",
387 )
388 parser.add_argument(
389 "--send-mails",
390 help="Send email to the contact persons who have failed comparison "
391 "plots. Mail is sent from b2soft@mail.desy.de via "
392 "/usr/sbin/sendmail.",
393 action="store_true",
394 )
395 parser.add_argument(
396 "--send-mails-mode",
397 help="How to send mails: Full report, incremental report (new/changed "
398 "warnings/failures only) or automatic (default; follow hard coded "
399 "rule, e.g. full reports every Monday).",
400 choices=["full", "incremental", "automatic"],
401 default="automatic",
402 )
403 parser.add_argument(
404 "-q", "--quiet", help="Suppress the progress bar", action="store_true"
405 )
406 parser.add_argument(
407 "-t",
408 "--tag",
409 help="The name that will be used for the current revision in the "
410 "results folder. Default is 'current'.",
411 type=str,
412 default="current",
413 )
414 parser.add_argument(
415 "--test",
416 help="Execute validation in testing mode where only the validation "
417 "scripts contained in the validation package are executed. "
418 "During regular validation, these scripts are ignored.",
419 action="store_true",
420 )
421 parser.add_argument(
422 "--use-cache",
423 help="If validation scripts are marked as cacheable and their output "
424 "files already exist, don't execute these scripts again",
425 action="store_true",
426 )
427 parser.add_argument(
428 "--view",
429 help="Once the validation is finished, start the local web server and "
430 "display the validation results in the system's default browser.",
431 action="store_true",
432 )
433 parser.add_argument(
434 "--max-run-time",
435 help="By default, running scripts (that is, steering files executed by"
436 "the validation framework) are terminated after a "
437 "certain time. Use this flag to change this setting by supplying "
438 "the maximal run time in minutes. Value <=0 disables the run "
439 "time upper limit entirely.",
440 type=int,
441 default=None,
442 )
443
444 return parser
445
446
447def parse_cmd_line_arguments(
448 modes: Optional[List[str]] = None,
449) -> argparse.Namespace:
450 """!
451 Sets up a parser for command line arguments, parses them and returns the
452 arguments.
453 @return: An object containing the parsed command line arguments.
454 Arguments are accessed like they are attributes of the object,
455 i.e. [name_of_object].[desired_argument]
456 """
457
458 if not modes:
459 modes = ["local"]
460
461 # Return the parsed arguments!
462 return get_argument_parser(modes).parse_args()
463
464
465def scripts_in_dir(dirpath: str, log: logging.Logger, ext="*") -> List[str]:
466 """!
467 Returns all the files in the given dir (and its subdirs) that have
468 the extension 'ext', if an extension is given (default: all extensions)
469
470 @param dirpath: The directory in which we are looking for files
471 @param log: logging.Logger object
472 @param ext: The extension of the files, which we are looking for.
473 '*' is the wildcard-operator (=all extensions are accepted)
474 @return: A sorted list of all files with the specified extension in the
475 given directory.
476 """
477
478 # Write to log what we are collecting
479 log.debug(f"Collecting *{ext} files from {dirpath}")
480
481 # Some space where we store our results before returning them
482 results = []
483
484 # A list of all folder names that will be ignored (e.g. folders that are
485 # important for SCons
486 blacklist = [
487 "tools",
488 "scripts",
489 "examples",
490 validationpath.folder_name_html_static,
491 ]
492
493 # Loop over the given directory and its subdirectories and find all files
494 for root, dirs, files in os.walk(dirpath):
495
496 # Skip a directory if it is blacklisted
497 if os.path.basename(root) in blacklist:
498 continue
499
500 # Loop over all files
501 for current_file in files:
502 # If the file has the requested extension, append its full paths to
503 # the results
504 if current_file.endswith(ext):
505 results.append(os.path.join(root, current_file))
506
507 # Return our sorted results
508 return sorted(results)
509
510
511def strip_ext(path: str) -> str:
512 """
513 Takes a path and returns only the name of the file, without the
514 extension on the file name
515 """
516 return os.path.splitext(os.path.split(path)[1])[0]
517
518
519def get_style(index: Optional[int], overall_item_count=1):
520 """
521 Takes an index and returns the corresponding line attributes,
522 i.e. LineColor, LineWidth and LineStyle.
523 """
524
525 # Define the colors for the plot
526 colors = [
527 ROOT.kRed,
528 ROOT.kOrange,
529 ROOT.kPink + 9,
530 ROOT.kOrange - 8,
531 ROOT.kGreen + 2,
532 ROOT.kCyan + 2,
533 ROOT.kBlue + 1,
534 ROOT.kRed + 2,
535 ROOT.kOrange + 3,
536 ROOT.kYellow + 2,
537 ROOT.kSpring,
538 ]
539
540 # Define the linestyles for the plot
541 linestyles = {
542 "dashed": 2, # Dashed: - - - - -
543 "solid": 1, # Solid: ----------
544 "dashdot": 10,
545 } # Dash-dot: -?-?-?-
546 ls_index = {0: "dashed", 1: "solid", 2: "dashdot"}
547
548 # Define the linewidth for the plots
549 linewidth = 2
550
551 # make sure the index is set
552 if not index:
553 index = 0
554
555 # Get the color for the (index)th revisions
556 color = colors[index % len(colors)]
557
558 # Figure out the linestyle
559 # If there is only one revision, make it solid!
560 # It cannot overlap with any other line
561 if overall_item_count == 1:
562 linestyle = linestyles["solid"]
563 # Otherwise make sure the newest revision (which is drawn on top) gets a
564 # dashed linestyle
565 else:
566 linestyle = linestyles[ls_index[index % len(ls_index)]]
567
568 return ROOT.TAttLine(color, linestyle, linewidth)
569
570
571def index_from_revision(revision: str, work_folder: str) -> Optional[int]:
572 """
573 Takes the name of a revision and returns the corresponding index. Indices
574 are used to ensure that the color and style of a revision in a plot are
575 always the same, regardless of the displayed revisions.
576 Example: release-X is always red, and no other release get drawn in red if
577 release-X is not selected for display.
578 :param revision: A string containing the name of a revision
579 :param work_folder: The work folder containing the results and plots
580 :return: The index of the requested revision, or None, if no index could
581 be found for 'revision'
582 """
583
584 revisions = available_revisions(work_folder) + ["reference"]
585
586 if revision in revisions:
587 return revisions.index(revision)
588 else:
589 return None
590
591
592def get_log_file_paths(logger: logging.Logger) -> List[str]:
593 """
594 Returns list of paths that the FileHandlers of logger write to.
595 :param logger: logging.logger object.
596 :return: List of paths
597 """
598 ret = []
599 for handler in logger.handlers:
600 try:
601 ret.append(handler.baseFilename)
602 except AttributeError:
603 pass
604 return ret
605
606
607def get_terminal_width() -> int:
608 """
609 Returns width of terminal in characters, or 80 if unknown.
610
611 Copied from basf2 utils. However, we only compile the validation package
612 on b2master, so copy this here.
613 """
614 from shutil import get_terminal_size
615
616 return get_terminal_size(fallback=(80, 24)).columns
617
618
619def congratulator(
620 success: Optional[Union[int, float]] = None,
621 failure: Optional[Union[int, float]] = None,
622 total: Optional[Union[int, float]] = None,
623 just_comment=False,
624 rate_name="Success rate",
625) -> str:
626 """ Keeping the morale up by commenting on success rates.
627
628 Args:
629 success: Number of successes
630 failure: Number of failures
631 total: success + failures (out of success, failure and total, exactly
632 2 have to be specified. If you want to use your own figure of
633 merit, just set total = 1. and set success to a number between 0.0
634 (infernal) to 1.0 (stellar))
635 just_comment: Do not add calculated percentage to return string.
636 rate_name: How to refer to the calculated success rate.
637
638 Returns:
639 Comment on your success rate (str).
640 """
641
642 n_nones = [success, failure, total].count(None)
643
644 if n_nones == 0 and total != success + failure:
645 print(
646 "ERROR (congratulator): Specify 2 of the arguments 'success',"
647 "'failure', 'total'.",
648 file=sys.stderr,
649 )
650 return ""
651 elif n_nones >= 2:
652 print(
653 "ERROR (congratulator): Specify 2 of the arguments 'success',"
654 "'failure', 'total'.",
655 file=sys.stderr,
656 )
657 return ""
658 else:
659 if total is None:
660 total = success + failure
661 if failure is None:
662 failure = total - success
663 if success is None:
664 success = total - failure
665
666 # Beware of zero division errors.
667 if total == 0:
668 return "That wasn't really exciting, was it?"
669
670 success_rate = 100 * success / total
671
672 comments = {
673 00.0: "You're grounded!",
674 10.0: "Infernal...",
675 20.0: "That's terrible!",
676 40.0: "You can do better than that.",
677 50.0: "That still requires some work.",
678 75.0: "Three quarters! Almost there!",
679 80.0: "Way to go ;)",
680 90.0: "Gold medal!",
681 95.0: "Legendary!",
682 99.0: "Nobel price!",
683 99.9: "Godlike!",
684 }
685
686 for value in sorted(comments.keys(), reverse=True):
687 if success_rate >= value:
688 comment = comments[value]
689 break
690 else:
691 # below minimum?
692 comment = comments[0]
693
694 if just_comment:
695 return comment
696 else:
697 return f"{rate_name} {int(success_rate)}%. {comment}"
698
699
700def terminal_title_line(title="", subtitle="", level=0) -> str:
701 """ Print a title line in the terminal.
702
703 Args:
704 title (str): The title. If no title is given, only a separating line
705 is printed.
706 subtitle (str): Subtitle.
707 level (int): The lower, the more dominantly the line will be styled.
708 """
709 linewidth = get_terminal_width()
710
711 # using the markdown title underlining chars for lack of better
712 # alternatives
713 char_dict = {0: "=", 1: "-", 2: "~"}
714
715 for key in sorted(char_dict.keys(), reverse=True):
716 if level >= key:
717 char = char_dict[key]
718 break
719 else:
720 # below minimum, shouldn't happen but anyway
721 char = char_dict[0]
722
723 line = char * linewidth
724 if not title:
725 return line
726
727 # guess we could make a bit more effort with indenting/handling long titles
728 # capitalization etc., but for now:
729 ret = line + "\n"
730 ret += title.capitalize() + "\n"
731 if subtitle:
732 ret += subtitle + "\n"
733 ret += line
734 return ret
735
736
737def get_file_metadata(filename: str) -> str:
738 """
739 Retrieve the metadata for a file using ``b2file-metadata-show -a``.
740
741 Args:
742 metadata (str): File to get number of events from.
743
744 Returns:
745 (str): Metadata of file.
746 """
747 if not Path(filename).exists():
748 raise FileNotFoundError(f"Could not find file {filename}")
749
750 metadata = None
751
752 try:
753 proc = subprocess.run(
754 ["b2file-metadata-show", "-a", str(filename)],
755 stdout=subprocess.PIPE,
756 check=True,
757 )
758 metadata = proc.stdout.decode("utf-8")
759 except subprocess.CalledProcessError as e:
760 print(e.stderr)
761
762 return metadata
get_results_folder(output_base_dir)