6 from typing
import Optional, Dict, Any, List
14 import xml.etree.cElementTree
as XMLTree
16 import xml.etree.ElementTree
as XMLTree
21 pp = pprint.PrettyPrinter(depth=6, indent=1, width=80)
27 Enumeration of the states a script can be during its execution
59 The object representation of a steering file.
61 @var path: The path to the steering file
62 @var name: The name of the file, but without special chars or spaces
63 @var package: The package to which the steering file belongs to
64 @var header: The contents of the XML file header
65 @var dependencies: On which scripts does the steering file depend
66 @var status: The current status, e.g. 'running' or 'finished'
67 @var control: Execute locally or on the cluster?
68 @var returncode: The returncode of the steering file
69 @var _object: Pointer to the object itself. Is this even necessary?
72 def __init__(self, path: str, package: str, log: Optional[logging.Logger]):
74 The default constructor.
90 self.start_time =
None
95 self.name = Script.sanitize_file_name(str(os.path.basename(self.path)))
97 self.name_not_sanitized = str(os.path.basename(self.path))
100 self.package = package
106 self.dependencies = []
119 self.returncode =
None
127 def sanitize_file_name(file_name):
129 Replaces the . between the file name and extension with an underscore _
131 return re.sub(
r'[\W_]+',
'_', file_name)
135 Print out all properties = attributes of a script.
139 pp.pprint(vars(self))
141 def to_json(self, current_tag):
146 string_status =
"failed"
148 string_status =
"finished"
150 string_status =
"running"
152 string_status =
"skipped"
154 string_status =
"waiting"
156 string_status =
"cached"
159 self.name_not_sanitized,
162 log_url=os.path.join(self.package, self.name_not_sanitized) +
164 return_code=self.returncode
167 def get_recursive_dependencies(self, scripts, level=0):
169 Loops over all dependencies of this script and recursively retrieves
170 their sub-dependencies
175 f
'Recurisve dependency lookup reached level {level} and will '
176 f
'quit now. Possibly circular dependcencies in the validation '
181 for dep
in self.dependencies:
183 all_deps.add(dep.name)
185 next_level = level + 1
188 dep_script = [x
for x
in scripts
if x.name == dep.name]
190 if len(dep_script) == 1:
191 rec_deps = dep_script[0].get_recursive_dependencies(
195 f
'Depending script with the name {dep.name} could not be '
196 f
'found in the list of registered scripts. '
205 def unique_name(self):
207 Generates a unique name from the package and name of the script
208 which only occurs once in th whole validation suite
210 return "script_unique_name_{}_{}".format(self.package, self.name)
212 def compute_dependencies(self, scripts):
214 Loops over the input files given in the header and tries to find the
215 corresponding Script objects, which will then be stored in the
216 script.dependencies-list
220 if self.header
is not None:
223 for root_file
in self.header.get(
'input', []):
227 creator = find_creator(
237 f
'Unmatched dependency for {self.path}:{root_file} '
244 self.dependencies += creator
247 self.dependencies = list(set(self.dependencies))
255 in_same_pkg = [script
for script
in scripts
256 if script.package == self.package]
260 py_files = [_
for _
in in_same_pkg
if _.path.endswith(
'py')]
261 c_files = [_
for _
in in_same_pkg
if _.path.endswith(
'C')]
264 py_files.sort(key=
lambda x: x.path)
265 c_files.sort(key=
lambda x: x.path)
268 in_same_pkg = py_files + c_files
270 if in_same_pkg.index(self) - 1 >= 0:
271 predecessor = in_same_pkg[in_same_pkg.index(self) - 1]
272 self.dependencies.append(predecessor)
274 def get_input_files(self):
276 return a list of input files which this script will read.
277 This information is only available, if load_header has been called
279 if self.header
is None:
282 return self.header.get(
'input', [])
284 def get_output_files(self):
286 return a list of output files this script will create.
287 This information is only available, if load_header has been called
289 if self.header
is None:
292 return self.header.get(
'output', [])
294 def is_cacheable(self):
296 Returns true, if the script must not be executed if its output
297 files are already present.
298 This information is only available, if load_header has been called
300 if self.header
is None:
303 return 'cacheable' in self.header
305 def load_header(self):
307 This method opens the file given in self.path, tries to extract the
308 XML-header of it and then parse it.
309 It then fills the self.header variable with a dict containing the
310 values that were read from the XML header.
315 with open(self.path,
"r")
as data:
316 steering_file_content = data.read()
319 pat = re.compile(
'(<header>.*?</header>)', re.DOTALL | re.M)
324 xml = pat.findall(steering_file_content)[0].strip()
326 self.log.error(
'No file header found: ' + self.path)
331 xml_tree = XMLTree.ElementTree(XMLTree.fromstring(xml)).getroot()
332 except XMLTree.ParseError:
333 self.log.error(
'Invalid XML in header: ' + self.path)
340 for branch
in xml_tree:
343 list_tags = [
'input',
'output',
'contact']
347 branch_text = branch.text
or ""
350 if branch.tag.strip()
in list_tags:
351 branch_value = [__.strip()
for __
in branch_text.split(
',')]
352 if branch_value == [
'']:
355 branch_value = re.sub(
' +',
' ', branch_text.replace(
'\n',
''))
356 branch_value = branch_value.strip()
362 if branch.tag.strip()
in self.header:
363 self.header[branch.tag.strip()] += branch_value
365 self.header[branch.tag.strip()] = branch_value
371 scripts: List[Script],
373 ) -> Optional[List[Script]]:
375 This function receives the name of a file and tries to find the file
376 in the given package which produces this file, i.e. find the file in
377 whose header 'outputfile' is listed under <output></output>.
378 It then returns a list of all Scripts who claim to be creating 'outputfile'
380 @param outputfile: The file of which we want to know by which script is
382 @param package: The package in which we want to search for the creator
387 candidates = [script
for script
in scripts
388 if script.package
in [package,
'validation']]
395 for candidate
in candidates:
396 if candidate.header
and \
397 outputfile
in candidate.header.get(
'output', []):
398 results.append(candidate)
401 if len(results) == 0:
404 log.warning(
'Found multiple creators for' + outputfile)