Belle II Software  release-06-02-00
create_file_to_iov_map.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
11 
12 
26 
27 import fnmatch
28 from pprint import PrettyPrinter
29 pp = PrettyPrinter(indent=2)
30 
31 # Lets take some file patterns. We could have put wildcards in more places but this is enough for testing
32 file_path_patterns = ["/hsm/belle2/bdata/Data/Raw/e0003/r0495[5,6]*/**/*.root"]
33 
34 
35 def bad_run_finder(filepath):
36  """
37  Returns True if a file path contains '.bad' after the run number
38  """
39  return fnmatch.fnmatch(filepath, "*r?????.bad/*")
40 
41 # Here's how to do 2.
42 
43 
44 def from_raw_data_file_paths(file_path_patterns):
45  from caf.utils import find_absolute_file_paths, parse_raw_data_iov
46  # First get the absolute file paths from the patterns
47  file_paths = find_absolute_file_paths(file_path_patterns)
48  # Now remove bad runs using our filter function
49  import itertools
50  file_paths = list(itertools.filterfalse(bad_run_finder, file_paths))
51 
52  file_to_iov = {}
53  # Now loop over them and parse the IoVs out by looking at their directory/filenames
54  for file_path in file_paths:
55  file_to_iov[file_path] = parse_raw_data_iov(file_path)
56  return file_to_iov
57 
58 
59 # Here's how to do 3.
60 
61 def from_metadata_of_files(file_path_patterns):
62 
63  from caf.utils import make_file_to_iov_dictionary
64 
65  def run_in_one_process():
66  """
67  Creates the file_to_iov dictionary but only one file at a time.
68  Uses bad_run_finder to filter out runs marked as bad from our glob pattern.
69  """
70  return make_file_to_iov_dictionary(file_path_patterns, filterfalse=bad_run_finder)
71 
72  def run_with_multiprocessing(max_processes):
73  """
74  Creates the file_to_iov dictionary but using a Pool object to control the number of subprocesses.
75  Note that even though we're using a ThreadPool, we aren't bound by the GIL because we are subprocessing to
76  run b2file-metadata-show in each Thread.
77 
78  Uses bad_run_finder to filter out runs marked as bad from our glob pattern.
79  """
80  from multiprocessing.pool import ThreadPool
81  tp = ThreadPool(processes=max_processes)
82  mapping = make_file_to_iov_dictionary(file_path_patterns, polling_time=5, pool=tp, filterfalse=bad_run_finder)
83  tp.close()
84  tp.join()
85  return mapping
86 
87  return run_in_one_process()
88 # return run_with_multiprocessing(max_processes=4)
89 
90 
91 # Define a map of our possible argparse choices to functions that run them
92 function_map = {"filepath": from_raw_data_file_paths,
93  "metadata": from_metadata_of_files}
94 
95 
96 def get_argparser():
97  import argparse
98  parser = argparse.ArgumentParser()
99  parser.add_argument("method", help="The method by which you want to create the mapping of file paths -> IoVs.",
100  choices=function_map.keys())
101  return parser
102 
103 
104 def main():
105  # Do argument parsing
106  parser = get_argparser()
107  args = parser.parse_args()
108 
109  # Run the method we asked for from command line
110  file_to_iov = function_map[args.method](file_path_patterns)
111 
112  print("Created the file to IoV map:")
113  pp.pprint(file_to_iov)
114 
115  # Save for later use
116  import pickle
117  filename = "file_iov_map.pkl"
118  with open(filename, 'bw') as iov_map_file:
119  pickle.dump(file_to_iov, iov_map_file)
120  print("Saved dictionary to the file '{}' for later use.".format(filename))
121 
122  # To read it in, usually in a separate process/steering file
123  # fils_to_iov = pickle.load(open("file_iov_map.pkl", 'rb'))
124 
125 
126 if __name__ == "__main__":
127  import sys
128  sys.exit(main())
int main(int argc, char **argv)
Run all tests.
Definition: test_main.cc:75