13from typing
import Optional, List
15from pathlib
import Path
19 import xml.etree.cElementTree
as XMLTree
21 import xml.etree.ElementTree
as XMLTree
30 Enumeration of the states a script can be during its execution
62 The object representation of a steering file.
64 @var path: The path to the steering file
65 @var name: The name of the file, but without special chars
or spaces
66 @var package: The package to which the steering file belongs to
67 @var _header: The contents of the XML file header
68 @var dependencies: On which scripts does the steering file depend
69 @var status: The current status, e.g.
'running' or 'finished'
70 @var control: Execute locally
or on the cluster?
71 @var returncode: The returncode of the steering file
72 @type returncode: Optional[int]
73 @var _object: Pointer to the object itself. Is this even necessary?
77 self, path: str, package: str, log: Optional[logging.Logger] =
None
80 The default constructor.
90 log = logging.Logger(
"script")
91 self.log: logging.Logger = log
97 self.runtime: Optional[int] =
None
98 self.start_time: Optional[int] =
None
103 self.name = self.sanitize_file_name(str(os.path.basename(self.path)))
105 self.name_not_sanitized = str(os.path.basename(self.path))
108 self.package = package
111 self._header = dict()
113 self.header_parsing_errors =
False
115 self._header_parsing_attempted =
False
118 self.dependencies = []
131 self.returncode =
None
136 self.job_id: Optional[str] =
None
139 def sanitize_file_name(file_name):
141 Replaces the . between the file name and extension
with an underscore _
143 return re.sub(
r"[\W_]+",
"_", file_name)
145 def to_json(self, current_tag):
150 string_status =
"failed"
152 string_status =
"finished"
154 string_status =
"running"
156 string_status =
"skipped"
158 string_status =
"waiting"
160 string_status =
"cached"
163 input_file_names = [ip.split(
'/')[-1]
for ip
in self.input_files
if '../' in ip]
164 output_file_names = [op.split(
'/')[-1]
for op
in self.output_files
if '../' in op]
167 self.name_not_sanitized,
170 log_url=os.path.join(self.package, self.name_not_sanitized)
172 return_code=self.returncode,
173 input=input_file_names,
174 output=output_file_names,
177 def get_recursive_dependencies(self, scripts, level=0):
179 Loops over all dependencies of this script and recursively retrieves
180 their sub-dependencies
185 f
"Recursive dependency lookup reached level {level} and will "
186 f
"quit now. Possibly circular dependencies in the validation "
191 for dep
in self.dependencies:
193 all_deps.add(dep.name)
195 next_level = level + 1
198 dep_script = [x
for x
in scripts
if x.name == dep.name]
200 if len(dep_script) == 1:
201 rec_deps = dep_script[0].get_recursive_dependencies(
206 f
"Depending script with the name {dep.name} could not be "
207 f
"found in the list of registered scripts. "
216 def unique_name(self):
218 Generates a unique name from the package
and name of the script
219 which only occurs once
in the whole validation suite
221 return f
"script_unique_name_{self.package}_{self.name}"
223 def compute_dependencies(self, scripts):
225 Loops over the input files given in the header
and tries to find the
226 corresponding Script objects, which will then be stored
in the
227 script.dependencies-list
231 for root_file
in self.input_files:
235 creator = find_creator(root_file, self.package, scripts, self.log)
240 f
"Unmatched dependency for {self.path}: {root_file} "
241 f
"has no creator! This means that we will have to skip "
249 self.dependencies += creator
252 self.dependencies = list(set(self.dependencies))
254 def load_header(self):
256 This method opens the file given in self.path, tries to extract the
257 XML-header of it
and then parse it.
258 It then fills the self.header variable
with a dict containing the
259 values that were read
from the XML header.
262 if self._header_parsing_attempted:
265 self._header_parsing_attempted =
True
270 with open(self.path, encoding=
"utf-8", errors=
"replace")
as data:
271 steering_file_content = data.read()
274 pat = re.compile(
"(<header>.*?</header>)", re.DOTALL | re.M)
279 xml = pat.findall(steering_file_content)[0].strip()
281 self.log.error(
"No file header found: " + self.path)
282 self.header_parsing_errors =
True
287 xml_tree = XMLTree.ElementTree(XMLTree.fromstring(xml)).getroot()
288 except XMLTree.ParseError:
289 self.log.error(
"Invalid XML in header: " + self.path)
290 self.header_parsing_errors =
True
297 for branch
in xml_tree:
300 list_tags = [
"input",
"output",
"contact"]
304 branch_text = branch.text
or ""
307 if branch.tag.strip()
in list_tags:
308 branch_value = [__.strip()
for __
in branch_text.split(
",")]
309 if branch_value == [
""]:
312 branch_value = re.sub(
" +",
" ", branch_text.replace(
"\n",
""))
313 branch_value = branch_value.strip()
319 if branch.tag.strip()
in self._header:
320 self._header[branch.tag.strip()] += branch_value
322 self._header[branch.tag.strip()] = branch_value
328 def input_files(self):
330 return a list of input files which this script will read.
331 This information
is only available,
if load_header has been called
334 return self._header.get(
"input", [])
337 def output_files(self):
339 return a list of output files this script will create.
340 This information
is only available,
if load_header has been called
343 return self._header.get(
"output", [])
346 def is_cacheable(self):
348 Returns true, if the script must
not be executed
if its output
349 files are already present.
350 This information
is only available,
if load_header has been called
353 return "cacheable" in self._header
356 def noexecute(self) -> bool:
357 """ A flag set in the header that tells us to simply ignore this
358 script for the purpose of running the validation.
361 return "noexecute" in self._header
364 def description(self) -> str:
365 """ Description of script as set in header """
367 return self._header.get(
"description",
"")
370 def contact(self) -> str:
371 """ Contact of script as set in header """
373 return self._header.get(
"contact",
"")
376 def interval(self) -> str:
377 """ Interval of script execution as set in header """
379 return self._header.get(
"interval",
"nightly")
381 def remove_output_files(self) -> None:
382 """Remove all output files. This is used to clean up files after a
383 script is marked
as failed. Leaving the output files
in a possible
384 corrupted state
and risk having them found by the validation framework
385 later
for crashes isn
't sensible. """
386 for f
in map(Path, self.output_files):
388 f
"Removing output file {f} (if exists) because script failed"
390 f.unlink(missing_ok=
True)
394 outputfile: str, package: str, scripts: List[Script], log: logging.Logger
395) -> Optional[List[Script]]:
397 This function receives the name of a file and tries to find the file
398 in the given package which produces this file, i.e. find the file
in
399 whose header
'outputfile' is listed under <output></output>.
400 It then returns a list of all Scripts who claim to be creating
'outputfile'
402 @param outputfile: The file of which we want to know by which script
is
404 @param package: The package
in which we want to search
for the creator
405 @param scripts: List of all script objects/candidates
413 for script
in scripts
414 if script.package
in [package,
"validation"]
422 for candidate
in candidates:
423 if outputfile
in candidate.output_files:
424 results.append(candidate)
427 if len(results) == 0:
430 log.warning(
"Found multiple creators for" + outputfile)
Enumeration of the states a script can be during its execution cycle.