13 from typing
import Optional, List
15 from pathlib
import Path
19 import xml.etree.cElementTree
as XMLTree
21 import xml.etree.ElementTree
as XMLTree
30 Enumeration of the states a script can be during its execution
62 The object representation of a steering file.
64 @var path: The path to the steering file
65 @var name: The name of the file, but without special chars or spaces
66 @var package: The package to which the steering file belongs to
67 @var header: The contents of the XML file header
68 @var dependencies: On which scripts does the steering file depend
69 @var status: The current status, e.g. 'running' or 'finished'
70 @var control: Execute locally or on the cluster?
71 @var returncode: The returncode of the steering file
72 @var _object: Pointer to the object itself. Is this even necessary?
76 self, path: str, package: str, log: Optional[logging.Logger] =
None
79 The default constructor.
89 log = logging.Logger(
"script")
90 self.log: logging.Logger = log
96 self.runtime: Optional[int] =
None
97 self.start_time: Optional[int] =
None
102 self.name = self.sanitize_file_name(str(os.path.basename(self.path)))
104 self.name_not_sanitized = str(os.path.basename(self.path))
107 self.package = package
110 self._header = dict()
112 self.header_parsing_errors =
False
114 self._header_parsing_attempted =
False
117 self.dependencies = []
130 self.returncode: Optional[int] =
None
135 self.job_id: Optional[str] =
None
138 def sanitize_file_name(file_name):
140 Replaces the . between the file name and extension with an underscore _
142 return re.sub(
r"[\W_]+",
"_", file_name)
144 def to_json(self, current_tag):
149 string_status =
"failed"
151 string_status =
"finished"
153 string_status =
"running"
155 string_status =
"skipped"
157 string_status =
"waiting"
159 string_status =
"cached"
162 self.name_not_sanitized,
165 log_url=os.path.join(self.package, self.name_not_sanitized)
167 return_code=self.returncode,
170 def get_recursive_dependencies(self, scripts, level=0):
172 Loops over all dependencies of this script and recursively retrieves
173 their sub-dependencies
178 f
"Recurisve dependency lookup reached level {level} and will "
179 f
"quit now. Possibly circular dependcencies in the validation "
184 for dep
in self.dependencies:
186 all_deps.add(dep.name)
188 next_level = level + 1
191 dep_script = [x
for x
in scripts
if x.name == dep.name]
193 if len(dep_script) == 1:
194 rec_deps = dep_script[0].get_recursive_dependencies(
199 f
"Depending script with the name {dep.name} could not be "
200 f
"found in the list of registered scripts. "
209 def unique_name(self):
211 Generates a unique name from the package and name of the script
212 which only occurs once in th whole validation suite
214 return f
"script_unique_name_{self.package}_{self.name}"
216 def compute_dependencies(self, scripts):
218 Loops over the input files given in the header and tries to find the
219 corresponding Script objects, which will then be stored in the
220 script.dependencies-list
224 for root_file
in self.input_files:
228 creator = find_creator(root_file, self.package, scripts, self.log)
233 f
"Unmatched dependency for {self.path}: {root_file} "
234 f
"has no creator! This means that we will have to skip "
242 self.dependencies += creator
245 self.dependencies = list(set(self.dependencies))
247 def load_header(self):
249 This method opens the file given in self.path, tries to extract the
250 XML-header of it and then parse it.
251 It then fills the self.header variable with a dict containing the
252 values that were read from the XML header.
255 if self._header_parsing_attempted:
258 self._header_parsing_attempted =
True
263 with open(self.path, encoding=
"utf-8", errors=
"replace")
as data:
264 steering_file_content = data.read()
267 pat = re.compile(
"(<header>.*?</header>)", re.DOTALL | re.M)
272 xml = pat.findall(steering_file_content)[0].strip()
274 self.log.error(
"No file header found: " + self.path)
275 self.header_parsing_errors =
True
280 xml_tree = XMLTree.ElementTree(XMLTree.fromstring(xml)).getroot()
281 except XMLTree.ParseError:
282 self.log.error(
"Invalid XML in header: " + self.path)
283 self.header_parsing_errors =
True
290 for branch
in xml_tree:
293 list_tags = [
"input",
"output",
"contact"]
297 branch_text = branch.text
or ""
300 if branch.tag.strip()
in list_tags:
301 branch_value = [__.strip()
for __
in branch_text.split(
",")]
302 if branch_value == [
""]:
305 branch_value = re.sub(
" +",
" ", branch_text.replace(
"\n",
""))
306 branch_value = branch_value.strip()
312 if branch.tag.strip()
in self._header:
313 self._header[branch.tag.strip()] += branch_value
315 self._header[branch.tag.strip()] = branch_value
321 def input_files(self):
323 return a list of input files which this script will read.
324 This information is only available, if load_header has been called
327 return self._header.get(
"input", [])
330 def output_files(self):
332 return a list of output files this script will create.
333 This information is only available, if load_header has been called
336 return self._header.get(
"output", [])
339 def is_cacheable(self):
341 Returns true, if the script must not be executed if its output
342 files are already present.
343 This information is only available, if load_header has been called
346 return "cacheable" in self._header
349 def noexecute(self) -> bool:
350 """ A flag set in the header that tells us to simply ignore this
351 script for the purpose of running the validation.
354 return "noexecute" in self._header
357 def description(self) -> str:
358 """ Description of script as set in header """
360 return self._header.get(
"description",
"")
363 def contact(self) -> str:
364 """ Contact of script as set in header """
366 return self._header.get(
"contact",
"")
369 def interval(self) -> str:
370 """ Interval of script executation as set in header """
372 return self._header.get(
"interval",
"nightly")
374 def remove_output_files(self) -> None:
375 """Remove all output files. This is used to clean up files after a
376 script is marked as failed. Leaving the output files in a possible
377 corrupted state and risk having them found by the validation framework
378 later for crashes isn't sensible.
380 for f
in map(Path, self.output_files):
382 f
"Removing output file {f} (if exists) because script failed"
384 f.unlink(missing_ok=
True)
388 outputfile: str, package: str, scripts: List[Script], log: logging.Logger
389 ) -> Optional[List[Script]]:
391 This function receives the name of a file and tries to find the file
392 in the given package which produces this file, i.e. find the file in
393 whose header 'outputfile' is listed under <output></output>.
394 It then returns a list of all Scripts who claim to be creating 'outputfile'
396 @param outputfile: The file of which we want to know by which script is
398 @param package: The package in which we want to search for the creator
399 @param scripts: List of all script objects/candidates
407 for script
in scripts
408 if script.package
in [package,
"validation"]
416 for candidate
in candidates:
417 if outputfile
in candidate.output_files:
418 results.append(candidate)
421 if len(results) == 0:
424 log.warning(
"Found multiple creators for" + outputfile)
Enumeration of the states a script can be during its execution cycle.