Belle II Software  release-08-01-10
create_file_to_iov_map.py
1 #!/usr/bin/env python3
2 
3 
10 
11 
25 
26 import fnmatch
27 from pprint import PrettyPrinter
28 pp = PrettyPrinter(indent=2)
29 
30 # Lets take some file patterns. We could have put wildcards in more places but this is enough for testing
31 file_path_patterns = ["/hsm/belle2/bdata/Data/Raw/e0003/r0495[5,6]*/**/*.root"]
32 
33 
34 def bad_run_finder(filepath):
35  """
36  Returns True if a file path contains '.bad' after the run number
37  """
38  return fnmatch.fnmatch(filepath, "*r?????.bad/*")
39 
40 # Here's how to do 2.
41 
42 
43 def from_raw_data_file_paths(file_path_patterns):
44  from caf.utils import find_absolute_file_paths, parse_raw_data_iov
45  # First get the absolute file paths from the patterns
46  file_paths = find_absolute_file_paths(file_path_patterns)
47  # Now remove bad runs using our filter function
48  import itertools
49  file_paths = list(itertools.filterfalse(bad_run_finder, file_paths))
50 
51  file_to_iov = {}
52  # Now loop over them and parse the IoVs out by looking at their directory/filenames
53  for file_path in file_paths:
54  file_to_iov[file_path] = parse_raw_data_iov(file_path)
55  return file_to_iov
56 
57 
58 # Here's how to do 3.
59 
60 def from_metadata_of_files(file_path_patterns):
61 
62  from caf.utils import make_file_to_iov_dictionary
63 
64  def run_in_one_process():
65  """
66  Creates the file_to_iov dictionary but only one file at a time.
67  Uses bad_run_finder to filter out runs marked as bad from our glob pattern.
68  """
69  return make_file_to_iov_dictionary(file_path_patterns, filterfalse=bad_run_finder)
70 
71  def run_with_multiprocessing(max_processes):
72  """
73  Creates the file_to_iov dictionary but using a Pool object to control the number of subprocesses.
74  Note that even though we're using a ThreadPool, we aren't bound by the GIL because we are subprocessing to
75  run b2file-metadata-show in each Thread.
76 
77  Uses bad_run_finder to filter out runs marked as bad from our glob pattern.
78  """
79  from multiprocessing.pool import ThreadPool
80  tp = ThreadPool(processes=max_processes)
81  mapping = make_file_to_iov_dictionary(file_path_patterns, polling_time=5, pool=tp, filterfalse=bad_run_finder)
82  tp.close()
83  tp.join()
84  return mapping
85 
86  return run_in_one_process()
87 # return run_with_multiprocessing(max_processes=4)
88 
89 
90 # Define a map of our possible argparse choices to functions that run them
91 function_map = {"filepath": from_raw_data_file_paths,
92  "metadata": from_metadata_of_files}
93 
94 
95 def get_argparser():
96  import argparse
97  parser = argparse.ArgumentParser()
98  parser.add_argument("method", help="The method by which you want to create the mapping of file paths -> IoVs.",
99  choices=function_map.keys())
100  return parser
101 
102 
103 def main():
104  # Do argument parsing
105  parser = get_argparser()
106  args = parser.parse_args()
107 
108  # Run the method we asked for from command line
109  file_to_iov = function_map[args.method](file_path_patterns)
110 
111  print("Created the file to IoV map:")
112  pp.pprint(file_to_iov)
113 
114  # Save for later use
115  import pickle
116  filename = "file_iov_map.pkl"
117  with open(filename, 'bw') as iov_map_file:
118  pickle.dump(file_to_iov, iov_map_file)
119  print("Saved dictionary to the file '{}' for later use.".format(filename))
120 
121  # To read it in, usually in a separate process/steering file
122  # fils_to_iov = pickle.load(open("file_iov_map.pkl", 'rb'))
123 
124 
125 if __name__ == "__main__":
126  import sys
127  sys.exit(main())
Definition: main.py:1
int main(int argc, char **argv)
Run all tests.
Definition: test_main.cc:91