Belle II Software  release-06-01-15
validationserver.py
1 
9 from typing import Dict, Any, List, Tuple
10 from glob import glob
11 import json
12 import functools
13 import time
14 import datetime
15 from multiprocessing import Process, Queue
16 import os.path
17 import argparse
18 import logging
19 import sys
20 import queue
21 import webbrowser
22 
23 # 3rd
24 import cherrypy
25 
26 # ours
27 import json_objects
28 from validationplots import create_plots
29 import validationfunctions
30 import validationpath
31 
32 g_plottingProcesses: Dict[str, Tuple[Process, Queue, Dict[str, Any]]] = ({})
33 
34 
35 def get_revision_label_from_json_filename(json_filename: str) -> str:
36  """
37  Gets the label of a revision from the path to the revision.json file
38  for example results/r121/revision.json
39  will result in the label r121
40  This is useful if the results folder has been moved by the user
41  """
42  folder_part = os.path.split(json_filename)[0]
43  last_folder = os.path.basename(folder_part)
44 
45  return last_folder
46 
47 
48 def get_json_object_list(results_folder: str, json_file_name: str) -> List[str]:
49  """
50  Searches one folder's sub-folder for json files of a
51  specific name and returns a combined list of the
52  json file's content
53  """
54 
55  search_string = results_folder + "/*/" + json_file_name
56 
57  found_revs = glob(search_string)
58  found_rev_labels = []
59 
60  for r_file in found_revs:
61  # try loading json file
62  with open(r_file) as json_file:
63  data = json.load(json_file) # noqa
64 
65  # always use the folder name as label
66  found_rev_labels.append(
67  get_revision_label_from_json_filename(r_file)
68  )
69 
70  return found_rev_labels
71 
72 
73 def deliver_json(file_name: str):
74  """
75  Simply load & parse a json file and return the
76  python objects
77  """
78 
79  with open(file_name) as json_file:
80  data = json.load(json_file)
81  return data
82 
83 
84 def create_revision_key(revision_names: List[str]) -> str:
85  """
86  Create a string key out of a revision list, which is handed to tho browser
87  in form of a progress key
88  """
89  return functools.reduce(lambda x, y: x + "-" + y, revision_names, "")
90 
91 
92 def check_plotting_status(progress_key: str):
93  """
94  Check the plotting status via the supplied progress_key
95  """
96 
97  if progress_key not in g_plottingProcesses:
98  return None
99 
100  process, qu, last_status = g_plottingProcesses[progress_key]
101 
102  # read latest message
103  try:
104  # read as much entries from the queue as possible
105  while not qu.empty():
106  msg = qu.get_nowait()
107  last_status = msg
108 
109  # update the last status
110  g_plottingProcesses[progress_key] = (process, qu, last_status)
111  except queue.Empty:
112  pass
113 
114  return last_status
115 
116 
117 # todo: remove this, once we're certain that the bug was fixed!
118 def warn_wrong_directory():
119  if not os.getcwd().endswith("html"):
120  print(
121  f"ERROR: Expected to be in HTML directory, but my current "
122  f"working directory is {os.getcwd()}; abspath: {os.getcwd()}."
123  )
124 
125 
126 # todo: limit the number of running plotting requests and terminate hanging ones
127 def start_plotting_request(
128  revision_names: List[str], results_folder: str
129 ) -> str:
130  """
131  Start a new comparison between the supplied revisions
132 
133  Returns:
134  revision key
135  """
136 
137  rev_key = create_revision_key(revision_names)
138 
139  # still running a plotting for this combination ?
140  if rev_key in g_plottingProcesses:
141  logging.info(f"Plotting request for {rev_key} still running")
142  return rev_key
143 
144  # create queue to stream progress, only one directional from parent to
145  # child
146  qu = Queue()
147 
148  # start a new process for creating the plots
149  p = Process(
150  target=create_plots,
151  args=(
152  revision_names,
153  False,
154  qu,
155  # go one folder up, because this function
156  # expects the work dir, which contains
157  # the results folder
158  os.path.dirname(results_folder),
159  ),
160  )
161  p.start()
162  g_plottingProcesses[rev_key] = (p, qu, None)
163 
164  logging.info(f"Started process for plotting request {rev_key}")
165 
166  return rev_key
167 
168 
170 
171  """
172  Root Validation class to handle non-static HTTP requests into the
173  validation server. The two main functions are to hand out compiled json
174  objects of revisions and comparisons and to start and monitor the
175  creation of comparison plots.
176 
177  """
178 
179  def __init__(self, working_folder):
180  """
181  class initializer, which takes the path to the folders containing the
182  validation run results and plots (aka comparison)
183  """
184 
185 
186  self.working_folderworking_folder = working_folder
187 
188 
189  self.last_restartlast_restart = datetime.datetime.now()
190 
191 
193  os.environ["BELLE2_LOCAL_DIR"]
194  )
195 
196  @cherrypy.expose
197  @cherrypy.tools.json_in()
198  @cherrypy.tools.json_out()
199  def create_comparison(self):
200  """
201  Triggers the start of a now comparison between the revisions supplied
202  in revision_list
203  """
204  rev_list = cherrypy.request.json["revision_list"]
205  logging.debug("Creating plots for revisions: " + str(rev_list))
206  progress_key = start_plotting_request(
207  rev_list, validationpath.get_results_folder(self.working_folderworking_folder),
208  )
209  return {"progress_key": progress_key}
210 
211  @cherrypy.expose
212  def index(self):
213  """
214  forward to the static landing page if
215  the default url is used (like http://localhost:8080/)
216  """
217  raise cherrypy.HTTPRedirect("/static/validation.html")
218 
219  @cherrypy.expose
220  def plots(self, *args):
221  """
222  Serve file from the html/plot directory.
223  :param args: For the request /plots/a/b/c, these will be the strings
224  "a", "b", "c"
225  """
226 
227  warn_wrong_directory()
228 
229  if len(args) < 3:
230  raise cherrypy.HTTPError(404)
231 
232  tag_folder = os.path.relpath(
234  self.working_folderworking_folder, args[:-2]
235  ),
237  )
238  path = os.path.join(tag_folder, *args[-2:])
239  return cherrypy.lib.static.serve_file(path)
240 
241  @cherrypy.expose
242  @cherrypy.tools.json_in()
243  @cherrypy.tools.json_out()
245  """
246  Checks on the status of a comparison creation
247  """
248  progress_key = cherrypy.request.json["input"]
249  logging.debug("Checking status for plot creation: " + str(progress_key))
250  status = check_plotting_status(progress_key)
251  return status
252 
253  @cherrypy.expose
254  @cherrypy.tools.json_out()
255  def revisions(self, revision_label=None):
256  """
257  Return a combined json object with all revisions and
258  mark the newest one with the field most_recent=true
259  """
260 
261  # get list of available revision
262  rev_list = get_json_object_list(
264  validationpath.file_name_results_json,
265  )
266 
267  # always add the reference revision
268  combined_list = []
269  reference_revision = json.loads(
270  json_objects.dumps(json_objects.Revision(label="reference"))
271  )
272 
273  # load and combine
274  for r in rev_list:
275  full_path = os.path.join(
277  r,
278  validationpath.file_name_results_json,
279  )
280 
281  # update label, if dir has been moved
282  lbl_folder = get_revision_label_from_json_filename(full_path)
283  j = deliver_json(full_path)
284  j["label"] = lbl_folder
285  combined_list.append(j)
286 
287  # Sorting
288 
289  # Order by categories (nightly, build, etc.) first, then by date
290  # A pure chronological order doesn't make sense, because we do not
291  # have a linear history ((pre)releases branch off) and for the builds
292  # the date corresponds to the build date, not to the date of the
293  # actual commit.
294  def sort_key(label: str):
295  if "-" not in label:
296  logging.warning(
297  f"Misformatted label encountered: '{label}' "
298  f"(doesn't seem to include date?)"
299  )
300  return label
301  category, datetag = label.split("-", maxsplit=1)
302  print(category, datetag)
303  # Will later reverse order to bring items in the same category
304  # in reverse chronological order, so the following list will have
305  # the items in reverse order as well:
306  order = ["release", "prerelease", "build", "nightly"]
307  try:
308  index = order.index(category)
309  except ValueError:
310  index = 9
311  logging.warning(
312  f"Misformatted label encountered: '{label}' (doesn't seem "
313  f"to belong to any known category?)"
314  )
315  return f"{index}-{datetag}"
316 
317  combined_list.sort(key=lambda rev: sort_key(rev["label"]), reverse=True)
318 
319  # reference always on top
320  combined_list = [reference_revision] + combined_list
321 
322  # Set the most recent one ...
323  newest_date = None
324  newest_rev = None
325  for r in combined_list:
326  rdate_str = r["creation_date"]
327  if isinstance(rdate_str, str):
328  if len(rdate_str) > 0:
329  try:
330  rdate = time.strptime(rdate_str, "%Y-%m-%d %H:%M")
331  except ValueError:
332  # some old validation results might still contain
333  # seconds and therefore cannot properly be converted
334  rdate = None
335 
336  if rdate is None:
337  continue
338 
339  if newest_date is None:
340  newest_date = rdate
341  newest_rev = r
342  if rdate > newest_date:
343  newest_date = rdate
344  newest_rev = r
345 
346  for c in combined_list:
347  if c["most_recent"] is not None:
348  c["most_recent"] = False
349 
350  # if there are no revisions at all, this might also be just None
351  if newest_rev:
352  newest_rev["most_recent"] = True
353 
354  # topmost item must be dictionary for the ractive.os template to match
355  return {"revisions": combined_list}
356 
357  @cherrypy.expose
358  @cherrypy.tools.json_out()
359  def comparisons(self, comparison_label=None):
360  """
361  return the json file of the comparison results of one specific
362  comparison
363  """
364 
365  warn_wrong_directory()
366 
367  # todo: Make this independent of our working directory!
368  path = os.path.join(
369  os.path.relpath(
371  self.working_folderworking_folder, comparison_label.split(",")
372  ),
374  ),
375  "comparison.json",
376  )
377 
378  # check if this comparison actually exists
379  if not os.path.isfile(path):
380  raise cherrypy.HTTPError(
381  404, f"Json Comparison file {path} does not exist"
382  )
383 
384  return deliver_json(path)
385 
386  @cherrypy.expose
387  @cherrypy.tools.json_out()
388  def system_info(self):
389  """
390  Returns:
391  JSON file containing git versions and time of last restart
392  """
393 
394  warn_wrong_directory()
395 
396  # note: for some reason %Z doesn't work like this, so we use
397  # time.tzname for the time zone.
398  return {
399  "last_restart": self.last_restartlast_restart.strftime("%-d %b %H:%M ")
400  + time.tzname[1],
401  "version_restart": self.versionversion,
402  "version_current": validationfunctions.get_compact_git_hash(
403  os.environ["BELLE2_LOCAL_DIR"]
404  ),
405  }
406 
407 
408 def setup_gzip_compression(path, cherry_config):
409  """
410  enable GZip compression for all text-based content the
411  web-server will deliver
412  """
413 
414  cherry_config[path].update(
415  {
416  "tools.gzip.on": True,
417  "tools.gzip.mime_types": [
418  "text/html",
419  "text/plain",
420  "text/css",
421  "application/javascript",
422  "application/json",
423  ],
424  }
425  )
426 
427 
428 def get_argument_parser():
429  """Prepare a parser for all the known command line arguments"""
430 
431  # Set up the command line parser
432  parser = argparse.ArgumentParser()
433 
434  # Define the accepted command line flags and read them in
435  parser.add_argument(
436  "-ip",
437  "--ip",
438  help="The IP address on which the"
439  "server starts. Default is '127.0.0.1'.",
440  type=str,
441  default="127.0.0.1",
442  )
443  parser.add_argument(
444  "-p",
445  "--port",
446  help="The port number on which"
447  " the server starts. Default is '8000'.",
448  type=str,
449  default=8000,
450  )
451  parser.add_argument(
452  "-v",
453  "--view",
454  help="Open validation website" " in the system's default browser.",
455  action="store_true",
456  )
457  parser.add_argument(
458  "--production",
459  help="Run in production environment: "
460  "no log/error output via website and no auto-reload",
461  action="store_true",
462  )
463 
464  return parser
465 
466 
467 def parse_cmd_line_arguments():
468  """!
469  Sets up a parser for command line arguments,
470  parses them and returns the arguments.
471  @return: An object containing the parsed command line arguments.
472  Arguments are accessed like they are attributes of the object,
473  i.e. [name_of_object].[desired_argument]
474  """
475  parser = get_argument_parser()
476  # Return the parsed arguments!
477  return parser.parse_args()
478 
479 
480 def run_server(
481  ip="127.0.0.1",
482  port=8000,
483  parse_command_line=False,
484  open_site=False,
485  dry_run=False,
486 ):
487 
488  # Setup options for logging
489  logging.basicConfig(
490  level=logging.DEBUG,
491  format="%(asctime)s %(levelname)-8s %(message)s",
492  datefmt="%H:%M:%S",
493  )
494 
495  basepath = validationpath.get_basepath()
496  cwd_folder = os.getcwd()
497 
498  # Only execute the program if a basf2 release is set up!
499  if (
500  os.environ.get("BELLE2_RELEASE_DIR", None) is None
501  and os.environ.get("BELLE2_LOCAL_DIR", None) is None
502  ):
503  sys.exit("Error: No basf2 release set up!")
504 
505  cherry_config = dict()
506  # just empty, will be filled below
507  cherry_config["/"] = {}
508  # will ensure also the json requests are gzipped
509  setup_gzip_compression("/", cherry_config)
510 
511  # check if static files are provided via central release
512  static_folder_list = ["validation", "html_static"]
513  static_folder = None
514 
515  if basepath["central"] is not None:
516  static_folder_central = os.path.join(
517  basepath["central"], *static_folder_list
518  )
519  if os.path.isdir(static_folder_central):
520  static_folder = static_folder_central
521 
522  # check if there is also a collection of static files in the local release
523  # this overwrites the usage of the central release
524  if basepath["local"] is not None:
525  static_folder_local = os.path.join(
526  basepath["local"], *static_folder_list
527  )
528  if os.path.isdir(static_folder_local):
529  static_folder = static_folder_local
530 
531  if static_folder is None:
532  sys.exit(
533  "Either BELLE2_RELEASE_DIR or BELLE2_LOCAL_DIR has to bet "
534  "to provide static HTML content. Did you run b2setup ?"
535  )
536 
537  # join the paths of the various result folders
538  results_folder = validationpath.get_results_folder(cwd_folder)
539  comparison_folder = validationpath.get_html_plots_folder(cwd_folder)
540 
541  logging.info(f"Serving static content from {static_folder}")
542  logging.info(f"Serving result content and plots from {cwd_folder}")
543 
544  # check if the results folder exists and has at least one folder
545  if not os.path.isdir(results_folder):
546  sys.exit(
547  "Result folder {} does not exist, run validate_basf2 first "
548  "to create validation output".format(results_folder)
549  )
550 
551  results_count = sum(
552  [
553  os.path.isdir(os.path.join(results_folder, f))
554  for f in os.listdir(results_folder)
555  ]
556  )
557  if results_count == 0:
558  sys.exit(
559  f"Result folder {results_folder} contains no folders, run "
560  f"validate_basf2 first to create validation output"
561  )
562 
563  # Go to the html directory
564  if not os.path.exists("html"):
565  os.mkdir("html")
566  os.chdir("html")
567 
568  if not os.path.exists("plots"):
569  os.mkdir("plots")
570 
571  # export js, css and html templates
572  cherry_config["/static"] = {
573  "tools.staticdir.on": True,
574  # only serve js, css, html and png files
575  "tools.staticdir.match": r"^.*\.(js|css|html|png|js.map)$",
576  "tools.staticdir.dir": static_folder,
577  }
578  setup_gzip_compression("/static", cherry_config)
579 
580  # export generated plots
581  cherry_config["/plots"] = {
582  "tools.staticdir.on": True,
583  # only serve json and png files
584  "tools.staticdir.match": r"^.*\.(png|json|pdf)$",
585  "tools.staticdir.dir": comparison_folder,
586  }
587  setup_gzip_compression("/plots", cherry_config)
588 
589  # export generated results and raw root files
590  cherry_config["/results"] = {
591  "tools.staticdir.on": True,
592  "tools.staticdir.dir": results_folder,
593  # only serve root files
594  "tools.staticdir.match": r"^.*\.(log|root)$",
595  # server the log files as plain text files, and make sure to use
596  # utf-8 encoding. Firefox might decide different, if the files
597  # are located on a .jp domain and use Shift_JIS
598  "tools.staticdir.content_types": {
599  "log": "text/plain; charset=utf-8",
600  "root": "application/octet-stream",
601  },
602  }
603 
604  setup_gzip_compression("/results", cherry_config)
605 
606  # Define the server address and port
607  # only if we got some specific
608  production_env = False
609  if parse_command_line:
610  # Parse command line arguments
611  cmd_arguments = parse_cmd_line_arguments()
612 
613  ip = cmd_arguments.ip
614  port = int(cmd_arguments.port)
615  open_site = cmd_arguments.view
616  production_env = cmd_arguments.production
617 
618  cherrypy.config.update(
619  {"server.socket_host": ip, "server.socket_port": port, }
620  )
621  if production_env:
622  cherrypy.config.update({"environment": "production"})
623 
624  logging.info(f"Server: Starting HTTP server on {ip}:{port}")
625 
626  if open_site:
627  webbrowser.open("http://" + ip + ":" + str(port))
628 
629  if not dry_run:
630  cherrypy.quickstart(
631  ValidationRoot(working_folder=cwd_folder), "/", cherry_config
632  )
633 
634 
635 if __name__ == "__main__":
636  run_server()
def __init__(self, working_folder)
last_restart
Date when this object was instantiated.
def comparisons(self, comparison_label=None)
def revisions(self, revision_label=None)
working_folder
html folder that contains plots etc.
def dumps(obj)
Optional[str] get_compact_git_hash(str repo_folder)
def get_html_plots_tag_comparison_folder(output_base_dir, tags)
def get_html_folder(output_base_dir)
def get_html_plots_folder(output_base_dir)
def get_results_folder(output_base_dir)