13 from typing
import Optional, List
15 from pathlib
import Path
19 import xml.etree.cElementTree
as XMLTree
21 import xml.etree.ElementTree
as XMLTree
30 Enumeration of the states a script can be during its execution
62 The object representation of a steering file.
64 @var path: The path to the steering file
65 @var name: The name of the file, but without special chars or spaces
66 @var package: The package to which the steering file belongs to
67 @var header: The contents of the XML file header
68 @var dependencies: On which scripts does the steering file depend
69 @var status: The current status, e.g. 'running' or 'finished'
70 @var control: Execute locally or on the cluster?
71 @var returncode: The returncode of the steering file
72 @var _object: Pointer to the object itself. Is this even necessary?
76 self, path: str, package: str, log: Optional[logging.Logger] =
None
79 The default constructor.
89 log = logging.Logger(
"script")
90 self.log: logging.Logger = log
96 self.runtime: Optional[int] =
None
97 self.start_time: Optional[int] =
None
102 self.name = self.sanitize_file_name(str(os.path.basename(self.path)))
104 self.name_not_sanitized = str(os.path.basename(self.path))
107 self.package = package
110 self._header = dict()
112 self.header_parsing_errors =
False
114 self._header_parsing_attempted =
False
117 self.dependencies = []
130 self.returncode: Optional[int] =
None
135 self.job_id: Optional[str] =
None
138 def sanitize_file_name(file_name):
140 Replaces the . between the file name and extension with an underscore _
142 return re.sub(
r"[\W_]+",
"_", file_name)
144 def to_json(self, current_tag):
149 string_status =
"failed"
151 string_status =
"finished"
153 string_status =
"running"
155 string_status =
"skipped"
157 string_status =
"waiting"
159 string_status =
"cached"
162 input_file_names = [ip.split(
'/')[-1]
for ip
in self.input_files
if '../' in ip]
163 output_file_names = [op.split(
'/')[-1]
for op
in self.output_files
if '../' in op]
166 self.name_not_sanitized,
169 log_url=os.path.join(self.package, self.name_not_sanitized)
171 return_code=self.returncode,
172 input=input_file_names,
173 output=output_file_names,
176 def get_recursive_dependencies(self, scripts, level=0):
178 Loops over all dependencies of this script and recursively retrieves
179 their sub-dependencies
184 f
"Recurisve dependency lookup reached level {level} and will "
185 f
"quit now. Possibly circular dependcencies in the validation "
190 for dep
in self.dependencies:
192 all_deps.add(dep.name)
194 next_level = level + 1
197 dep_script = [x
for x
in scripts
if x.name == dep.name]
199 if len(dep_script) == 1:
200 rec_deps = dep_script[0].get_recursive_dependencies(
205 f
"Depending script with the name {dep.name} could not be "
206 f
"found in the list of registered scripts. "
215 def unique_name(self):
217 Generates a unique name from the package and name of the script
218 which only occurs once in th whole validation suite
220 return f
"script_unique_name_{self.package}_{self.name}"
222 def compute_dependencies(self, scripts):
224 Loops over the input files given in the header and tries to find the
225 corresponding Script objects, which will then be stored in the
226 script.dependencies-list
230 for root_file
in self.input_files:
234 creator = find_creator(root_file, self.package, scripts, self.log)
239 f
"Unmatched dependency for {self.path}: {root_file} "
240 f
"has no creator! This means that we will have to skip "
248 self.dependencies += creator
251 self.dependencies = list(set(self.dependencies))
253 def load_header(self):
255 This method opens the file given in self.path, tries to extract the
256 XML-header of it and then parse it.
257 It then fills the self.header variable with a dict containing the
258 values that were read from the XML header.
261 if self._header_parsing_attempted:
264 self._header_parsing_attempted =
True
269 with open(self.path, encoding=
"utf-8", errors=
"replace")
as data:
270 steering_file_content = data.read()
273 pat = re.compile(
"(<header>.*?</header>)", re.DOTALL | re.M)
278 xml = pat.findall(steering_file_content)[0].strip()
280 self.log.error(
"No file header found: " + self.path)
281 self.header_parsing_errors =
True
286 xml_tree = XMLTree.ElementTree(XMLTree.fromstring(xml)).getroot()
287 except XMLTree.ParseError:
288 self.log.error(
"Invalid XML in header: " + self.path)
289 self.header_parsing_errors =
True
296 for branch
in xml_tree:
299 list_tags = [
"input",
"output",
"contact"]
303 branch_text = branch.text
or ""
306 if branch.tag.strip()
in list_tags:
307 branch_value = [__.strip()
for __
in branch_text.split(
",")]
308 if branch_value == [
""]:
311 branch_value = re.sub(
" +",
" ", branch_text.replace(
"\n",
""))
312 branch_value = branch_value.strip()
318 if branch.tag.strip()
in self._header:
319 self._header[branch.tag.strip()] += branch_value
321 self._header[branch.tag.strip()] = branch_value
327 def input_files(self):
329 return a list of input files which this script will read.
330 This information is only available, if load_header has been called
333 return self._header.get(
"input", [])
336 def output_files(self):
338 return a list of output files this script will create.
339 This information is only available, if load_header has been called
342 return self._header.get(
"output", [])
345 def is_cacheable(self):
347 Returns true, if the script must not be executed if its output
348 files are already present.
349 This information is only available, if load_header has been called
352 return "cacheable" in self._header
355 def noexecute(self) -> bool:
356 """ A flag set in the header that tells us to simply ignore this
357 script for the purpose of running the validation.
360 return "noexecute" in self._header
363 def description(self) -> str:
364 """ Description of script as set in header """
366 return self._header.get(
"description",
"")
369 def contact(self) -> str:
370 """ Contact of script as set in header """
372 return self._header.get(
"contact",
"")
375 def interval(self) -> str:
376 """ Interval of script executation as set in header """
378 return self._header.get(
"interval",
"nightly")
380 def remove_output_files(self) -> None:
381 """Remove all output files. This is used to clean up files after a
382 script is marked as failed. Leaving the output files in a possible
383 corrupted state and risk having them found by the validation framework
384 later for crashes isn't sensible.
386 for f
in map(Path, self.output_files):
388 f
"Removing output file {f} (if exists) because script failed"
390 f.unlink(missing_ok=
True)
394 outputfile: str, package: str, scripts: List[Script], log: logging.Logger
395 ) -> Optional[List[Script]]:
397 This function receives the name of a file and tries to find the file
398 in the given package which produces this file, i.e. find the file in
399 whose header 'outputfile' is listed under <output></output>.
400 It then returns a list of all Scripts who claim to be creating 'outputfile'
402 @param outputfile: The file of which we want to know by which script is
404 @param package: The package in which we want to search for the creator
405 @param scripts: List of all script objects/candidates
413 for script
in scripts
414 if script.package
in [package,
"validation"]
422 for candidate
in candidates:
423 if outputfile
in candidate.output_files:
424 results.append(candidate)
427 if len(results) == 0:
430 log.warning(
"Found multiple creators for" + outputfile)
Enumeration of the states a script can be during its execution cycle.