Belle II Software development
validationfunctions.py
1#!/usr/bin/env python3
2
3
10
11# Import timeit module and start a timer. Allows to get the runtime of the
12# program at any given point
13import timeit
14
15g_start_time = timeit.default_timer() # noqa
16
17# std
18import argparse
19import glob
20import os
21import subprocess
22import sys
23import time
24from typing import Dict, Optional, List, Union
25import logging
26from pathlib import Path
27import json
28import shutil
29
30# 3rd party
31import ROOT
32
33# ours
34import validationpath
35
36
39
40
41def get_timezone() -> str:
42 """
43 Returns the correct timezone as short string
44 """
45 tz_tuple = time.tzname
46
47 # in some timezones, there is a daylight saving times entry in the
48 # second item of the tuple
49 if time.daylight != 0:
50 return tz_tuple[1]
51 else:
52 return tz_tuple[0]
53
54
55def get_compact_git_hash(repo_folder: str) -> Optional[str]:
56 """
57 Returns the compact git hash from a folder inside of a git repository
58 """
59 try:
60 cmd_output = (
61 subprocess.check_output(
62 ["git", "show", "--oneline", "-s"], cwd=repo_folder
63 )
64 .decode()
65 .rstrip()
66 )
67 # the first word in this string will be the hash
68 cmd_output = cmd_output.split(" ")
69 if len(cmd_output) > 1:
70 return cmd_output[0]
71 else:
72 # something went wrong
73 return
74 except subprocess.CalledProcessError:
75 return
76
77
78def basf2_command_builder(
79 steering_file: str, parameters: List[str], use_multi_processing=False
80) -> List[str]:
81 """
82 This utility function takes the steering file name and other basf2
83 parameters and returns a list which can be executed via the OS shell for
84 example to subprocess.Popen(params ...) If use_multi_processing is True,
85 the script will be executed in multi-processing mode with only 1
86 parallel process in order to test if the code also performs as expected
87 in multi-processing mode
88 """
89 cmd_params = ["basf2"]
90 if use_multi_processing:
91 cmd_params += ["-p1"]
92 cmd_params += [steering_file]
93 cmd_params += parameters
94
95 return cmd_params
96
97
98def available_revisions(work_folder: str) -> List[str]:
99 """
100 Loops over the results folder and looks for revisions. It then returns an
101 ordered list, with the most recent revision being the first element in the
102 list and the oldest revision being the last element.
103 The 'age' of a revision is determined by the 'Last-modified'-timestamp of
104 the corresponding folder.
105 :return: A list of all revisions available for plotting
106 """
107
108 # Get all folders in ./results/ sorted descending by the date they were
109 # created (i.e. newest folder first)
110 search_folder = validationpath.get_results_folder(work_folder)
111 subfolders = [p for p in os.scandir(search_folder) if p.is_dir()]
112 revisions = [
113 p.name for p in sorted(subfolders, key=lambda p: p.stat().st_mtime)
114 ]
115 return revisions
116
117
118def get_latest_nightly(work_folder: str) -> str:
119 """
120 Loops over the results folder and looks for nightly builds. It then returns
121 the most recent nightly tag sorted by date in the name. If no
122 nightly results are available then it returns the default 'current' tag.
123 :return: the most recent nightly build or current
124 """
125 available = available_revisions(work_folder)
126 available_nightlies = [
127 revision for revision in available
128 if revision.startswith("nightly")
129 ]
130 if available_nightlies:
131 return sorted(available_nightlies, reverse=True)[0]
132 else:
133 return 'current'
134
135
136def get_popular_revision_combinations(work_folder: str) -> List[str]:
137 """
138 Returns several combinations of available revisions that we might
139 want to pre-build on the server.
140
141 Returns:
142 List[List of revisions (str)]
143 """
144 available = sorted(
145 available_revisions(work_folder),
146 reverse=True
147 )
148 available_releases = [
149 revision for revision in available
150 if revision.startswith("release") or revision.startswith("prerelease")
151 ]
152 available_nightlies = [
153 revision for revision in available
154 if revision.startswith("nightly")
155 ]
156
157 def atindex_or_none(lst, index):
158 """ Returns item at index from lst or None"""
159 try:
160 return lst[index]
161 except IndexError:
162 return None
163
164 def remove_duplicates_lstlst(lstlst):
165 """ Removes duplicate lists in a list of lists """
166 # If we didn't care about the order:
167 # return list(map(list, set(map(tuple, lstlst))))
168 # would do the job. But we do care, or at least it is very
169 # relevant which revision is first (because it gets taken
170 # as reference)
171 ret = []
172 for lst in lstlst:
173 if lst not in ret:
174 ret.append(lst)
175 return ret
176
177 # Return value
178 ret = [
179 # All revisions
180 ["reference"] + sorted(available),
181
182 # Latest X + reference
183 ["reference", atindex_or_none(available_releases, 0)],
184 ["reference", atindex_or_none(available_nightlies, 0)],
185
186 # All latest + reference
187 ["reference"] + sorted(list(filter(
188 None,
189 [
190 atindex_or_none(available_releases, 0),
191 atindex_or_none(available_nightlies, 0)
192 ]
193 ))),
194
195 # All nightlies + reference
196 ["reference"] + sorted(available_nightlies)
197 ]
198
199 # Remove all Nones from the sublists
200 ret = [
201 list(filter(None, comb)) for comb in ret
202 ]
203 # Remove all empty lists
204 ret = list(filter(None, ret))
205
206 # Remove duplicates
207 ret = remove_duplicates_lstlst(ret)
208
209 if not ret:
210 sys.exit("No revisions seem to be available. Exit.")
211
212 return ret
213
214
215def clear_plots(work_folder: str, keep_revisions: List[str]):
216 """
217 This function will clear the plots folder to get rid of all but the
218 skipped revisions' associated plot files. """
219
220 rainbow_file = os.path.join(work_folder, 'rainbow.json')
221 cleaned_rainbow = {}
222
223 keep_revisions = [sorted(revs) for revs in keep_revisions]
224
225 with open(rainbow_file) as rainbow:
226 entries = json.loads(rainbow.read())
227 for hash, revisions in entries.items():
228
229 if sorted(revisions) in keep_revisions:
230 print(f'Retaining {hash}')
231 cleaned_rainbow[hash] = revisions
232 continue
233
234 print(f'Removing {hash}:{revisions}')
235 work_folder_path = Path(os.path.join(work_folder, hash))
236 if work_folder_path.exists() and work_folder_path.is_dir():
237 shutil.rmtree(work_folder_path)
238
239 with open(rainbow_file, 'w') as rainbow:
240 rainbow.write(json.dumps(cleaned_rainbow, indent=4))
241
242
243def get_start_time() -> float:
244 """!
245 The function returns the value g_start_time which contain the start time
246 of the validation and is set just a few lines above.
247
248 @return: Time since the validation has been started
249 """
250 return g_start_time
251
252
253def get_validation_folders(
254 location: str, basepaths: Dict[str, str], log: logging.Logger
255) -> Dict[str, str]:
256 """!
257 Collects the validation folders for all packages from the stated release
258 directory (either local or central). Returns a dict with the following
259 form:
260 {'name of package':'absolute path to validation folder of package'}
261
262 @param location: The location where we want to search for validation
263 folders (either 'local' or 'central')
264 """
265
266 # Make sure we only look in existing locations:
267 if location not in ["local", "central"]:
268 return {}
269 if basepaths[location] is None:
270 return {}
271
272 # Write to log what we are collecting
273 log.debug(f"Collecting {location} folders")
274
275 # Reserve some memory for our results
276 results = {}
277
278 # Now start collecting the folders.
279 # First, collect the general validation folders, because it needs special
280 # treatment (does not belong to any other package but may include
281 # steering files):
282 if os.path.isdir(basepaths[location] + "/validation"):
283 results["validation"] = basepaths[location] + "/validation"
284
285 # get the special folder containing the validation tests
286 if os.path.isdir(basepaths[location] + "/validation/validation-test"):
287 results["validation-test"] = (
288 basepaths[location] + "/validation/validation-test"
289 )
290
291 # Now get a list of all folders with name 'validation' which are
292 # subfolders of a folder (=package) in the release directory
293 package_dirs = glob.glob(
294 os.path.join(basepaths[location], "*", "validation")
295 )
296
297 # Now loop over all these folders, find the name of the package they belong
298 # to and append them to our results dictionary
299 for package_dir in package_dirs:
300 package_name = os.path.basename(os.path.dirname(package_dir))
301 results[package_name] = package_dir
302
303 # Return our results
304 return results
305
306
307def get_argument_parser(
308 modes: Optional[List[str]] = None,
309) -> argparse.ArgumentParser:
310
311 if not modes:
312 modes = ["local"]
313
314 # Set up the command line parser
315 parser = argparse.ArgumentParser()
316
317 # Define the accepted command line flags and read them in
318 parser.add_argument(
319 "-d",
320 "--dry",
321 help="Perform a dry run, i.e. run the validation module without "
322 "actually executing the steering files (for debugging purposes).",
323 action="store_true",
324 )
325 parser.add_argument(
326 "-m",
327 "--mode",
328 help="The mode which will be used for running the validation. "
329 "Possible values: " + ", ".join(modes) + ". Default is 'local'",
330 choices=modes,
331 type=str,
332 default="local",
333 )
334 parser.add_argument(
335 "-i",
336 "--intervals",
337 help="Comma separated list of intervals for which to execute the "
338 "validation scripts. Default is 'nightly'",
339 type=str,
340 default="nightly",
341 )
342 parser.add_argument(
343 "-o",
344 "--options",
345 help="One or more strings that will be passed to basf2 as arguments. "
346 "Example: '-n 100'. Quotes are necessary!",
347 type=str,
348 nargs="+",
349 )
350 parser.add_argument(
351 "-p",
352 "--parallel",
353 help="The maximum number of parallel processes to run the "
354 "validation. Only used for local execution. Default is number "
355 "of CPU cores.",
356 type=int,
357 default=None,
358 )
359 parser.add_argument(
360 "-pkg",
361 "--packages",
362 help="The name(s) of one or multiple packages. Validation will be "
363 "run only on these packages! E.g. -pkg analysis arich",
364 type=str,
365 nargs="+",
366 )
367 parser.add_argument(
368 "-s",
369 "--select",
370 help="The file name(s) of one or more space separated validation "
371 "scripts that should be executed exclusively. All dependent "
372 "scripts will also be executed. E.g. -s ECL2D.C",
373 type=str,
374 nargs="+",
375 )
376 parser.add_argument(
377 "-si",
378 "--select-ignore-dependencies",
379 help="The file name of one or more space separated validation "
380 "scripts that should be executed exclusively. This will ignore "
381 "all dependencies. This is useful if you modified a script that "
382 "produces plots based on the output of its dependencies.",
383 type=str,
384 nargs="+",
385 )
386 parser.add_argument(
387 "--send-mails",
388 help="Send email to the contact persons who have failed comparison "
389 "plots. Mail is sent from b2soft@mail.desy.de via "
390 "/usr/sbin/sendmail.",
391 action="store_true",
392 )
393 parser.add_argument(
394 "--send-mails-mode",
395 help="How to send mails: Full report, incremental report (new/changed "
396 "warnings/failures only) or automatic (default; follow hard coded "
397 "rule, e.g. full reports every Monday).",
398 choices=["full", "incremental", "automatic"],
399 default="automatic",
400 )
401 parser.add_argument(
402 "-q", "--quiet", help="Suppress the progress bar", action="store_true"
403 )
404 parser.add_argument(
405 "-t",
406 "--tag",
407 help="The name that will be used for the current revision in the "
408 "results folder. Default is 'current'.",
409 type=str,
410 default="current",
411 )
412 parser.add_argument(
413 "--test",
414 help="Execute validation in testing mode where only the validation "
415 "scripts contained in the validation package are executed. "
416 "During regular validation, these scripts are ignored.",
417 action="store_true",
418 )
419 parser.add_argument(
420 "--use-cache",
421 help="If validation scripts are marked as cacheable and their output "
422 "files already exist, don't execute these scripts again",
423 action="store_true",
424 )
425 parser.add_argument(
426 "--view",
427 help="Once the validation is finished, start the local web server and "
428 "display the validation results in the system's default browser.",
429 action="store_true",
430 )
431 parser.add_argument(
432 "--max-run-time",
433 help="By default, running scripts (that is, steering files executed by"
434 "the validation framework) are terminated after a "
435 "certain time. Use this flag to change this setting by supplying "
436 "the maximal run time in minutes. Value <=0 disables the run "
437 "time upper limit entirely.",
438 type=int,
439 default=None,
440 )
441
442 return parser
443
444
445def parse_cmd_line_arguments(
446 modes: Optional[List[str]] = None,
447) -> argparse.Namespace:
448 """!
449 Sets up a parser for command line arguments, parses them and returns the
450 arguments.
451 @return: An object containing the parsed command line arguments.
452 Arguments are accessed like they are attributes of the object,
453 i.e. [name_of_object].[desired_argument]
454 """
455
456 if not modes:
457 modes = ["local"]
458
459 # Return the parsed arguments!
460 return get_argument_parser(modes).parse_args()
461
462
463def scripts_in_dir(dirpath: str, log: logging.Logger, ext="*") -> List[str]:
464 """!
465 Returns all the files in the given dir (and its subdirs) that have
466 the extension 'ext', if an extension is given (default: all extensions)
467
468 @param dirpath: The directory in which we are looking for files
469 @param log: logging.Logger object
470 @param ext: The extension of the files, which we are looking for.
471 '*' is the wildcard-operator (=all extensions are accepted)
472 @return: A sorted list of all files with the specified extension in the
473 given directory.
474 """
475
476 # Write to log what we are collecting
477 log.debug(f"Collecting *{ext} files from {dirpath}")
478
479 # Some space where we store our results before returning them
480 results = []
481
482 # A list of all folder names that will be ignored (e.g. folders that are
483 # important for SCons
484 blacklist = [
485 "tools",
486 "scripts",
487 "examples",
488 validationpath.folder_name_html_static,
489 ]
490
491 # Loop over the given directory and its subdirectories and find all files
492 for root, dirs, files in os.walk(dirpath):
493
494 # Skip a directory if it is blacklisted
495 if os.path.basename(root) in blacklist:
496 continue
497
498 # Loop over all files
499 for current_file in files:
500 # If the file has the requested extension, append its full paths to
501 # the results
502 if current_file.endswith(ext):
503 results.append(os.path.join(root, current_file))
504
505 # Return our sorted results
506 return sorted(results)
507
508
509def strip_ext(path: str) -> str:
510 """
511 Takes a path and returns only the name of the file, without the
512 extension on the file name
513 """
514 return os.path.splitext(os.path.split(path)[1])[0]
515
516
517def get_style(index: Optional[int], overall_item_count=1):
518 """
519 Takes an index and returns the corresponding line attributes,
520 i.e. LineColor, LineWidth and LineStyle.
521 """
522
523 # Define the colors for the plot
524 colors = [
525 ROOT.kRed,
526 ROOT.kOrange,
527 ROOT.kPink + 9,
528 ROOT.kOrange - 8,
529 ROOT.kGreen + 2,
530 ROOT.kCyan + 2,
531 ROOT.kBlue + 1,
532 ROOT.kRed + 2,
533 ROOT.kOrange + 3,
534 ROOT.kYellow + 2,
535 ROOT.kSpring,
536 ]
537
538 # Define the linestyles for the plot
539 linestyles = {
540 "dashed": 2, # Dashed: - - - - -
541 "solid": 1, # Solid: ----------
542 "dashdot": 10,
543 } # Dash-dot: -?-?-?-
544 ls_index = {0: "dashed", 1: "solid", 2: "dashdot"}
545
546 # Define the linewidth for the plots
547 linewidth = 2
548
549 # make sure the index is set
550 if not index:
551 index = 0
552
553 # Get the color for the (index)th revisions
554 color = colors[index % len(colors)]
555
556 # Figure out the linestyle
557 # If there is only one revision, make it solid!
558 # It cannot overlap with any other line
559 if overall_item_count == 1:
560 linestyle = linestyles["solid"]
561 # Otherwise make sure the newest revision (which is drawn on top) gets a
562 # dashed linestyle
563 else:
564 linestyle = linestyles[ls_index[index % len(ls_index)]]
565
566 return ROOT.TAttLine(color, linestyle, linewidth)
567
568
569def index_from_revision(revision: str, work_folder: str) -> Optional[int]:
570 """
571 Takes the name of a revision and returns the corresponding index. Indices
572 are used to ensure that the color and style of a revision in a plot are
573 always the same, regardless of the displayed revisions.
574 Example: release-X is always red, and no other release get drawn in red if
575 release-X is not selected for display.
576 :param revision: A string containing the name of a revision
577 :param work_folder: The work folder containing the results and plots
578 :return: The index of the requested revision, or None, if no index could
579 be found for 'revision'
580 """
581
582 revisions = available_revisions(work_folder) + ["reference"]
583
584 if revision in revisions:
585 return revisions.index(revision)
586 else:
587 return None
588
589
590def get_log_file_paths(logger: logging.Logger) -> List[str]:
591 """
592 Returns list of paths that the FileHandlers of logger write to.
593 :param logger: logging.logger object.
594 :return: List of paths
595 """
596 ret = []
597 for handler in logger.handlers:
598 try:
599 ret.append(handler.baseFilename)
600 except AttributeError:
601 pass
602 return ret
603
604
605def get_terminal_width() -> int:
606 """
607 Returns width of terminal in characters, or 80 if unknown.
608
609 Copied from basf2 utils. However, we only compile the validation package
610 on b2master, so copy this here.
611 """
612 from shutil import get_terminal_size
613
614 return get_terminal_size(fallback=(80, 24)).columns
615
616
617def congratulator(
618 success: Optional[Union[int, float]] = None,
619 failure: Optional[Union[int, float]] = None,
620 total: Optional[Union[int, float]] = None,
621 just_comment=False,
622 rate_name="Success rate",
623) -> str:
624 """ Keeping the morale up by commenting on success rates.
625
626 Args:
627 success: Number of successes
628 failure: Number of failures
629 total: success + failures (out of success, failure and total, exactly
630 2 have to be specified. If you want to use your own figure of
631 merit, just set total = 1. and set success to a number between 0.0
632 (infernal) to 1.0 (stellar))
633 just_comment: Do not add calculated percentage to return string.
634 rate_name: How to refer to the calculated success rate.
635
636 Returns:
637 Comment on your success rate (str).
638 """
639
640 n_nones = [success, failure, total].count(None)
641
642 if n_nones == 0 and total != success + failure:
643 print(
644 "ERROR (congratulator): Specify 2 of the arguments 'success',"
645 "'failure', 'total'.",
646 file=sys.stderr,
647 )
648 return ""
649 elif n_nones >= 2:
650 print(
651 "ERROR (congratulator): Specify 2 of the arguments 'success',"
652 "'failure', 'total'.",
653 file=sys.stderr,
654 )
655 return ""
656 else:
657 if total is None:
658 total = success + failure
659 if failure is None:
660 failure = total - success
661 if success is None:
662 success = total - failure
663
664 # Beware of zero division errors.
665 if total == 0:
666 return "That wasn't really exciting, was it?"
667
668 success_rate = 100 * success / total
669
670 comments = {
671 00.0: "You're grounded!",
672 10.0: "Infernal...",
673 20.0: "That's terrible!",
674 40.0: "You can do better than that.",
675 50.0: "That still requires some work.",
676 75.0: "Three quarters! Almost there!",
677 80.0: "Way to go ;)",
678 90.0: "Gold medal!",
679 95.0: "Legendary!",
680 99.0: "Nobel price!",
681 99.9: "Godlike!",
682 }
683
684 for value in sorted(comments.keys(), reverse=True):
685 if success_rate >= value:
686 comment = comments[value]
687 break
688 else:
689 # below minimum?
690 comment = comments[0]
691
692 if just_comment:
693 return comment
694 else:
695 return f"{rate_name} {int(success_rate)}%. {comment}"
696
697
698def terminal_title_line(title="", subtitle="", level=0) -> str:
699 """ Print a title line in the terminal.
700
701 Args:
702 title (str): The title. If no title is given, only a separating line
703 is printed.
704 subtitle (str): Subtitle.
705 level (int): The lower, the more dominantly the line will be styled.
706 """
707 linewidth = get_terminal_width()
708
709 # using the markdown title underlining chars for lack of better
710 # alternatives
711 char_dict = {0: "=", 1: "-", 2: "~"}
712
713 for key in sorted(char_dict.keys(), reverse=True):
714 if level >= key:
715 char = char_dict[key]
716 break
717 else:
718 # below minimum, shouldn't happen but anyway
719 char = char_dict[0]
720
721 line = char * linewidth
722 if not title:
723 return line
724
725 # guess we could make a bit more effort with indenting/handling long titles
726 # capitalization etc., but for now:
727 ret = line + "\n"
728 ret += title.capitalize() + "\n"
729 if subtitle:
730 ret += subtitle + "\n"
731 ret += line
732 return ret
733
734
735def get_file_metadata(filename: str) -> str:
736 """
737 Retrieve the metadata for a file using ``b2file-metadata-show -a``.
738
739 Args:
740 metadata (str): File to get number of events from.
741
742 Returns:
743 (str): Metadata of file.
744 """
745 if not Path(filename).exists():
746 raise FileNotFoundError(f"Could not find file {filename}")
747
748 metadata = None
749
750 try:
751 proc = subprocess.run(
752 ["b2file-metadata-show", "-a", str(filename)],
753 stdout=subprocess.PIPE,
754 check=True,
755 )
756 metadata = proc.stdout.decode("utf-8")
757 except subprocess.CalledProcessError as e:
758 print(e.stderr)
759
760 return metadata
761
def get_results_folder(output_base_dir)