Belle II Software development
validationfunctions.py
1#!/usr/bin/env python3
2
3
10
11# Import timeit module and start a timer. Allows to get the runtime of the
12# program at any given point
13import timeit
14
15g_start_time = timeit.default_timer() # noqa
16
17# std
18import argparse
19import glob
20import os
21import subprocess
22import sys
23import time
24from typing import Dict, Optional, List, Union
25import logging
26from pathlib import Path
27import json
28import shutil
29
30# 3rd party
31import ROOT
32
33# ours
34import validationpath
35
36
39
40
41def get_timezone() -> str:
42 """
43 Returns the correct timezone as short string
44 """
45 tz_tuple = time.tzname
46
47 # in some timezones, there is a daylight saving times entry in the
48 # second item of the tuple
49 if time.daylight != 0:
50 return tz_tuple[1]
51 else:
52 return tz_tuple[0]
53
54
55def get_compact_git_hash(repo_folder: str) -> Optional[str]:
56 """
57 Returns the compact git hash from a folder inside of a git repository
58 """
59 try:
60 cmd_output = (
61 subprocess.check_output(
62 ["git", "show", "--oneline", "-s"], cwd=repo_folder
63 )
64 .decode()
65 .rstrip()
66 )
67 # the first word in this string will be the hash
68 cmd_output = cmd_output.split(" ")
69 if len(cmd_output) > 1:
70 return cmd_output[0]
71 else:
72 # something went wrong
73 return
74 except subprocess.CalledProcessError:
75 return
76
77
78def basf2_command_builder(
79 steering_file: str, parameters: List[str], use_multi_processing=False
80) -> List[str]:
81 """
82 This utility function takes the steering file name and other basf2
83 parameters and returns a list which can be executed via the OS shell for
84 example to subprocess.Popen(params ...) If use_multi_processing is True,
85 the script will be executed in multi-processing mode with only 1
86 parallel process in order to test if the code also performs as expected
87 in multi-processing mode
88 """
89 cmd_params = ["basf2"]
90 if use_multi_processing:
91 cmd_params += ["-p1"]
92 cmd_params += [steering_file]
93 cmd_params += parameters
94
95 return cmd_params
96
97
98def available_revisions(work_folder: str) -> List[str]:
99 """
100 Loops over the results folder and looks for revisions. It then returns an
101 ordered list, with the most recent revision being the first element in the
102 list and the oldest revision being the last element.
103 The 'age' of a revision is determined by the 'Last-modified'-timestamp of
104 the corresponding folder.
105 :return: A list of all revisions available for plotting
106 """
107
108 # Get all folders in ./results/ sorted descending by the date they were
109 # created (i.e. newest folder first)
110 search_folder = validationpath.get_results_folder(work_folder)
111 subfolders = [p for p in os.scandir(search_folder) if p.is_dir()]
112 revisions = [
113 p.name for p in sorted(subfolders, key=lambda p: p.stat().st_mtime)
114 ]
115 return revisions
116
117
118def get_latest_nightly(work_folder: str) -> str:
119 """
120 Loops over the results folder and looks for nightly builds. It then returns
121 the most recent nightly tag sorted by date in the name. If no
122 nightly results are available then it returns the default 'current' tag.
123 :return: the most recent nightly build or current
124 """
125 available = available_revisions(work_folder)
126 available_nightlies = [
127 revision for revision in available
128 if revision.startswith("nightly")
129 ]
130 if available_nightlies:
131 return sorted(available_nightlies, reverse=True)[0]
132 else:
133 return 'current'
134
135
136def get_popular_revision_combinations(work_folder: str) -> List[str]:
137 """
138 Returns several combinations of available revisions that we might
139 want to pre-build on the server.
140
141 Returns:
142 List[List of revisions (str)]
143 """
144 available = sorted(
145 available_revisions(work_folder),
146 reverse=True
147 )
148 available_releases = [
149 revision for revision in available
150 if revision.startswith("release") or revision.startswith("prerelease")
151 ]
152 available_nightlies = [
153 revision for revision in available
154 if revision.startswith("nightly")
155 ]
156
157 def atindex_or_none(lst, index):
158 """ Returns item at index from lst or None"""
159 try:
160 return lst[index]
161 except IndexError:
162 return None
163
164 def remove_duplicates_lstlst(lstlst):
165 """ Removes duplicate lists in a list of lists """
166 # If we didn't care about the order:
167 # return list(map(list, set(map(tuple, lstlst))))
168 # would do the job. But we do care, or at least it is very
169 # relevant which revision is first (because it gets taken
170 # as reference)
171 ret = []
172 for lst in lstlst:
173 if lst not in ret:
174 ret.append(lst)
175 return ret
176
177 # Return value
178 ret = [
179 # All revisions
180 ["reference"] + sorted(available),
181
182 # Latest X + reference
183 ["reference", atindex_or_none(available_releases, 0)],
184 ["reference", atindex_or_none(available_nightlies, 0)],
185
186 # All latest + reference
187 ["reference"] + sorted(list(filter(
188 None,
189 [
190 atindex_or_none(available_releases, 0),
191 atindex_or_none(available_nightlies, 0)
192 ]
193 ))),
194
195 # All nightlies + reference
196 ["reference"] + sorted(available_nightlies)
197 ]
198
199 # Remove all Nones from the sublists
200 ret = [
201 list(filter(None, comb)) for comb in ret
202 ]
203 # Remove all empty lists
204 ret = list(filter(None, ret))
205
206 # Remove duplicates
207 ret = remove_duplicates_lstlst(ret)
208
209 if not ret:
210 sys.exit("No revisions seem to be available. Exit.")
211
212 return ret
213
214
215def clear_plots(work_folder: str, keep_revisions: List[str]):
216 """
217 This function will clear the plots folder to get rid of all but the
218 skipped revisions' associated plot files.
219 """
220
221 rainbow_file = os.path.join(work_folder, 'rainbow.json')
222 cleaned_rainbow = {}
223
224 keep_revisions = [sorted(revs) for revs in keep_revisions]
225
226 with open(rainbow_file) as rainbow:
227 entries = json.loads(rainbow.read())
228 for hash, revisions in entries.items():
229
230 if sorted(revisions) in keep_revisions:
231 print(f'Retaining {hash}')
232 cleaned_rainbow[hash] = revisions
233 continue
234
235 print(f'Removing {hash}:{revisions}')
236 work_folder_path = Path(os.path.join(work_folder, hash))
237 if work_folder_path.exists() and work_folder_path.is_dir():
238 shutil.rmtree(work_folder_path)
239
240 with open(rainbow_file, 'w') as rainbow:
241 rainbow.write(json.dumps(cleaned_rainbow, indent=4))
242
243
244def get_start_time() -> float:
245 """!
246 The function returns the value g_start_time which contain the start time
247 of the validation and is set just a few lines above.
248
249 @return: Time since the validation has been started
250 """
251 return g_start_time
252
253
254def get_validation_folders(
255 location: str, basepaths: Dict[str, str], log: logging.Logger
256) -> Dict[str, str]:
257 """!
258 Collects the validation folders for all packages from the stated release
259 directory (either local or central). Returns a dict with the following
260 form:
261 {'name of package':'absolute path to validation folder of package'}
262
263 @param location: The location where we want to search for validation
264 folders (either 'local' or 'central')
265 @param basepaths: The dictionary with base paths of local and release directory
266 @param log: The logging dictionary
267 """
268
269 # Make sure we only look in existing locations:
270 if location not in ["local", "central"]:
271 return {}
272 if basepaths[location] is None:
273 return {}
274
275 # Write to log what we are collecting
276 log.debug(f"Collecting {location} folders")
277
278 # Reserve some memory for our results
279 results = {}
280
281 # Now start collecting the folders.
282 # First, collect the general validation folders, because it needs special
283 # treatment (does not belong to any other package but may include
284 # steering files):
285 if os.path.isdir(basepaths[location] + "/validation"):
286 results["validation"] = basepaths[location] + "/validation"
287
288 # get the special folder containing the validation tests
289 if os.path.isdir(basepaths[location] + "/validation/validation-test"):
290 results["validation-test"] = (
291 basepaths[location] + "/validation/validation-test"
292 )
293
294 # Now get a list of all folders with name 'validation' which are
295 # subfolders of a folder (=package) in the release directory
296 package_dirs = glob.glob(
297 os.path.join(basepaths[location], "*", "validation")
298 )
299
300 # Now loop over all these folders, find the name of the package they belong
301 # to and append them to our results dictionary
302 for package_dir in package_dirs:
303 package_name = os.path.basename(os.path.dirname(package_dir))
304 results[package_name] = package_dir
305
306 # Return our results
307 return results
308
309
310def get_argument_parser(
311 modes: Optional[List[str]] = None,
312) -> argparse.ArgumentParser:
313
314 if not modes:
315 modes = ["local"]
316
317 # Set up the command line parser
318 parser = argparse.ArgumentParser()
319
320 # Define the accepted command line flags and read them in
321 parser.add_argument(
322 "-d",
323 "--dry",
324 help="Perform a dry run, i.e. run the validation module without "
325 "actually executing the steering files (for debugging purposes).",
326 action="store_true",
327 )
328 parser.add_argument(
329 "-m",
330 "--mode",
331 help="The mode which will be used for running the validation. "
332 "Possible values: " + ", ".join(modes) + ". Default is 'local'",
333 choices=modes,
334 type=str,
335 default="local",
336 )
337 parser.add_argument(
338 "-i",
339 "--intervals",
340 help="Comma separated list of intervals for which to execute the "
341 "validation scripts. Default is 'nightly'",
342 type=str,
343 default="nightly",
344 )
345 parser.add_argument(
346 "-o",
347 "--options",
348 help="One or more strings that will be passed to basf2 as arguments. "
349 "Example: '-n 100'. Quotes are necessary!",
350 type=str,
351 nargs="+",
352 )
353 parser.add_argument(
354 "-p",
355 "--parallel",
356 help="The maximum number of parallel processes to run the "
357 "validation. Only used for local execution. Default is number "
358 "of CPU cores.",
359 type=int,
360 default=None,
361 )
362 parser.add_argument(
363 "-pkg",
364 "--packages",
365 help="The name(s) of one or multiple packages. Validation will be "
366 "run only on these packages! E.g. -pkg analysis arich",
367 type=str,
368 nargs="+",
369 )
370 parser.add_argument(
371 "-s",
372 "--select",
373 help="The file name(s) of one or more space separated validation "
374 "scripts that should be executed exclusively. All dependent "
375 "scripts will also be executed. E.g. -s ECL2D.C "
376 "(use -si instead to execute script(s) ignoring dependencies)",
377 type=str,
378 nargs="+",
379 )
380 parser.add_argument(
381 "-si",
382 "--select-ignore-dependencies",
383 help="The file name of one or more space separated validation "
384 "scripts that should be executed exclusively. This will ignore "
385 "all dependencies. This is useful if you modified a script that "
386 "produces plots based on the output of its dependencies.",
387 type=str,
388 nargs="+",
389 )
390 parser.add_argument(
391 "--send-mails",
392 help="Send email to the contact persons who have failed comparison "
393 "plots. Mail is sent from b2soft@mail.desy.de via "
394 "/usr/sbin/sendmail.",
395 action="store_true",
396 )
397 parser.add_argument(
398 "--send-mails-mode",
399 help="How to send mails: Full report, incremental report (new/changed "
400 "warnings/failures only) or automatic (default; follow hard coded "
401 "rule, e.g. full reports every Monday).",
402 choices=["full", "incremental", "automatic"],
403 default="automatic",
404 )
405 parser.add_argument(
406 "-q", "--quiet", help="Suppress the progress bar", action="store_true"
407 )
408 parser.add_argument(
409 "-t",
410 "--tag",
411 help="The name that will be used for the current revision in the "
412 "results folder. Default is 'current'.",
413 type=str,
414 default="current",
415 )
416 parser.add_argument(
417 "--test",
418 help="Execute validation in testing mode where only the validation "
419 "scripts contained in the validation package are executed. "
420 "During regular validation, these scripts are ignored.",
421 action="store_true",
422 )
423 parser.add_argument(
424 "--use-cache",
425 help="If validation scripts are marked as cacheable and their output "
426 "files already exist, don't execute these scripts again",
427 action="store_true",
428 )
429 parser.add_argument(
430 "--view",
431 help="Once the validation is finished, start the local web server and "
432 "display the validation results in the system's default browser.",
433 action="store_true",
434 )
435 parser.add_argument(
436 "--max-run-time",
437 help="By default, running scripts (that is, steering files executed by"
438 "the validation framework) are terminated after a "
439 "certain time. Use this flag to change this setting by supplying "
440 "the maximal run time in minutes. Value <=0 disables the run "
441 "time upper limit entirely.",
442 type=int,
443 default=None,
444 )
445
446 return parser
447
448
449def parse_cmd_line_arguments(
450 modes: Optional[List[str]] = None,
451) -> argparse.Namespace:
452 """!
453 Sets up a parser for command line arguments, parses them and returns the
454 arguments.
455 @return: An object containing the parsed command line arguments.
456 Arguments are accessed like they are attributes of the object,
457 i.e. [name_of_object].[desired_argument]
458 """
459
460 if not modes:
461 modes = ["local"]
462
463 # Return the parsed arguments!
464 return get_argument_parser(modes).parse_args()
465
466
467def scripts_in_dir(dirpath: str, log: logging.Logger, ext="*") -> List[str]:
468 """!
469 Returns all the files in the given dir (and its subdirs) that have
470 the extension 'ext', if an extension is given (default: all extensions)
471
472 @param dirpath: The directory in which we are looking for files
473 @param log: logging.Logger object
474 @param ext: The extension of the files, which we are looking for.
475 '*' is the wildcard-operator (=all extensions are accepted)
476 @return: A sorted list of all files with the specified extension in the
477 given directory.
478 """
479
480 # Write to log what we are collecting
481 log.debug(f"Collecting *{ext} files from {dirpath}")
482
483 # Some space where we store our results before returning them
484 results = []
485
486 # A list of all folder names that will be ignored (e.g. folders that are
487 # important for SCons
488 blacklist = [
489 "tools",
490 "scripts",
491 "examples",
492 validationpath.folder_name_html_static,
493 ]
494
495 # Loop over the given directory and its subdirectories and find all files
496 for root, dirs, files in os.walk(dirpath):
497
498 # Skip a directory if it is blacklisted
499 if os.path.basename(root) in blacklist:
500 continue
501
502 # Loop over all files
503 for current_file in files:
504 # If the file has the requested extension, append its full paths to
505 # the results
506 if current_file.endswith(ext):
507 results.append(os.path.join(root, current_file))
508
509 # Return our sorted results
510 return sorted(results)
511
512
513def strip_ext(path: str) -> str:
514 """
515 Takes a path and returns only the name of the file, without the
516 extension on the file name
517 """
518 return os.path.splitext(os.path.split(path)[1])[0]
519
520
521def get_style(index: Optional[int], overall_item_count=1):
522 """
523 Takes an index and returns the corresponding line attributes,
524 i.e. LineColor, LineWidth and LineStyle.
525 """
526
527 # Define the colors for the plot
528 colors = [
529 ROOT.kRed,
530 ROOT.kOrange,
531 ROOT.kPink + 9,
532 ROOT.kOrange - 8,
533 ROOT.kGreen + 2,
534 ROOT.kCyan + 2,
535 ROOT.kBlue + 1,
536 ROOT.kRed + 2,
537 ROOT.kOrange + 3,
538 ROOT.kYellow + 2,
539 ROOT.kSpring,
540 ]
541
542 # Define the linestyles for the plot
543 linestyles = {
544 "dashed": 2, # Dashed: - - - - -
545 "solid": 1, # Solid: ----------
546 "dashdot": 10,
547 } # Dash-dot: -?-?-?-
548 ls_index = {0: "dashed", 1: "solid", 2: "dashdot"}
549
550 # Define the linewidth for the plots
551 linewidth = 2
552
553 # make sure the index is set
554 if not index:
555 index = 0
556
557 # Get the color for the (index)th revisions
558 color = colors[index % len(colors)]
559
560 # Figure out the linestyle
561 # If there is only one revision, make it solid!
562 # It cannot overlap with any other line
563 if overall_item_count == 1:
564 linestyle = linestyles["solid"]
565 # Otherwise make sure the newest revision (which is drawn on top) gets a
566 # dashed linestyle
567 else:
568 linestyle = linestyles[ls_index[index % len(ls_index)]]
569
570 return ROOT.TAttLine(color, linestyle, linewidth)
571
572
573def index_from_revision(revision: str, work_folder: str) -> Optional[int]:
574 """
575 Takes the name of a revision and returns the corresponding index. Indices
576 are used to ensure that the color and style of a revision in a plot are
577 always the same, regardless of the displayed revisions.
578 Example: release-X is always red, and no other release get drawn in red if
579 release-X is not selected for display.
580 :param revision: A string containing the name of a revision
581 :param work_folder: The work folder containing the results and plots
582 :return: The index of the requested revision, or None, if no index could
583 be found for 'revision'
584 """
585
586 revisions = available_revisions(work_folder) + ["reference"]
587
588 if revision in revisions:
589 return revisions.index(revision)
590 else:
591 return None
592
593
594def get_log_file_paths(logger: logging.Logger) -> List[str]:
595 """
596 Returns list of paths that the FileHandlers of logger write to.
597 :param logger: logging.logger object.
598 :return: List of paths
599 """
600 ret = []
601 for handler in logger.handlers:
602 try:
603 ret.append(handler.baseFilename)
604 except AttributeError:
605 pass
606 return ret
607
608
609def get_terminal_width() -> int:
610 """
611 Returns width of terminal in characters, or 80 if unknown.
612
613 Copied from basf2 utils. However, we only compile the validation package
614 on b2master, so copy this here.
615 """
616 from shutil import get_terminal_size
617
618 return get_terminal_size(fallback=(80, 24)).columns
619
620
621def congratulator(
622 success: Optional[Union[int, float]] = None,
623 failure: Optional[Union[int, float]] = None,
624 total: Optional[Union[int, float]] = None,
625 just_comment=False,
626 rate_name="Success rate",
627) -> str:
628 """ Keeping the morale up by commenting on success rates.
629
630 Args:
631 success: Number of successes
632 failure: Number of failures
633 total: success + failures (out of success, failure and total, exactly
634 2 have to be specified. If you want to use your own figure of
635 merit, just set total = 1. and set success to a number between 0.0
636 (infernal) to 1.0 (stellar))
637 just_comment: Do not add calculated percentage to return string.
638 rate_name: How to refer to the calculated success rate.
639
640 Returns:
641 Comment on your success rate (str).
642 """
643
644 n_nones = [success, failure, total].count(None)
645
646 if n_nones == 0 and total != success + failure:
647 print(
648 "ERROR (congratulator): Specify 2 of the arguments 'success',"
649 "'failure', 'total'.",
650 file=sys.stderr,
651 )
652 return ""
653 elif n_nones >= 2:
654 print(
655 "ERROR (congratulator): Specify 2 of the arguments 'success',"
656 "'failure', 'total'.",
657 file=sys.stderr,
658 )
659 return ""
660 else:
661 if total is None:
662 total = success + failure
663 if failure is None:
664 failure = total - success
665 if success is None:
666 success = total - failure
667
668 # Beware of zero division errors.
669 if total == 0:
670 return "That wasn't really exciting, was it?"
671
672 success_rate = 100 * success / total
673
674 comments = {
675 00.0: "You're grounded!",
676 10.0: "Infernal...",
677 20.0: "That's terrible!",
678 40.0: "You can do better than that.",
679 50.0: "That still requires some work.",
680 75.0: "Three quarters! Almost there!",
681 80.0: "Way to go ;)",
682 90.0: "Gold medal!",
683 95.0: "Legendary!",
684 99.0: "Nobel price!",
685 99.9: "Godlike!",
686 }
687
688 for value in sorted(comments.keys(), reverse=True):
689 if success_rate >= value:
690 comment = comments[value]
691 break
692 else:
693 # below minimum?
694 comment = comments[0]
695
696 if just_comment:
697 return comment
698 else:
699 return f"{rate_name} {int(success_rate)}%. {comment}"
700
701
702def terminal_title_line(title="", subtitle="", level=0) -> str:
703 """ Print a title line in the terminal.
704
705 Args:
706 title (str): The title. If no title is given, only a separating line
707 is printed.
708 subtitle (str): Subtitle.
709 level (int): The lower, the more dominantly the line will be styled.
710 """
711 linewidth = get_terminal_width()
712
713 # using the markdown title underlining chars for lack of better
714 # alternatives
715 char_dict = {0: "=", 1: "-", 2: "~"}
716
717 for key in sorted(char_dict.keys(), reverse=True):
718 if level >= key:
719 char = char_dict[key]
720 break
721 else:
722 # below minimum, shouldn't happen but anyway
723 char = char_dict[0]
724
725 line = char * linewidth
726 if not title:
727 return line
728
729 # guess we could make a bit more effort with indenting/handling long titles
730 # capitalization etc., but for now:
731 ret = line + "\n"
732 ret += title.capitalize() + "\n"
733 if subtitle:
734 ret += subtitle + "\n"
735 ret += line
736 return ret
737
738
739def get_file_metadata(filename: str) -> str:
740 """
741 Retrieve the metadata for a file using ``b2file-metadata-show -a``.
742
743 Args:
744 metadata (str): File to get number of events from.
745
746 Returns:
747 (str): Metadata of file.
748 """
749 if not Path(filename).exists():
750 raise FileNotFoundError(f"Could not find file {filename}")
751
752 metadata = None
753
754 try:
755 proc = subprocess.run(
756 ["b2file-metadata-show", "-a", str(filename)],
757 stdout=subprocess.PIPE,
758 check=True,
759 )
760 metadata = proc.stdout.decode("utf-8")
761 except subprocess.CalledProcessError as e:
762 print(e.stderr)
763
764 return metadata
get_results_folder(output_base_dir)