Belle II Software  release-05-01-25
create_file_to_iov_map.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
18 
19 from pprint import PrettyPrinter
20 pp = PrettyPrinter(indent=2)
21 
22 # Lets take some file patterns. We could have put wildcards in more places but this is enough for testing
23 file_path_patterns = ["/hsm/belle2/bdata/Data/Raw/e0003/r0495[5,6]*/**/*.root"]
24 
25 
26 import fnmatch
27 
28 
29 def bad_run_finder(filepath):
30  """
31  Returns True if a file path contains '.bad' after the run number
32  """
33  return fnmatch.fnmatch(filepath, "*r?????.bad/*")
34 
35 # Here's how to do 2.
36 
37 
38 def from_raw_data_file_paths(file_path_patterns):
39  from caf.utils import find_absolute_file_paths, parse_raw_data_iov
40  # First get the absolute file paths from the patterns
41  file_paths = find_absolute_file_paths(file_path_patterns)
42  # Now remove bad runs using our filter function
43  import itertools
44  file_paths = list(itertools.filterfalse(bad_run_finder, file_paths))
45 
46  file_to_iov = {}
47  # Now loop over them and parse the IoVs out by looking at their directory/filenames
48  for file_path in file_paths:
49  file_to_iov[file_path] = parse_raw_data_iov(file_path)
50  return file_to_iov
51 
52 
53 # Here's how to do 3.
54 
55 def from_metadata_of_files(file_path_patterns):
56 
57  from caf.utils import make_file_to_iov_dictionary
58 
59  def run_in_one_process():
60  """
61  Creates the file_to_iov dictionary but only one file at a time.
62  Uses bad_run_finder to filter out runs marked as bad from our glob pattern.
63  """
64  return make_file_to_iov_dictionary(file_path_patterns, filterfalse=bad_run_finder)
65 
66  def run_with_multiprocessing(max_processes):
67  """
68  Creates the file_to_iov dictionary but using a Pool object to control the number of subprocesses.
69  Note that even though we're using a ThreadPool, we aren't bound by the GIL because we are subprocessing to
70  run b2file-metadata-show in each Thread.
71 
72  Uses bad_run_finder to filter out runs marked as bad from our glob pattern.
73  """
74  from multiprocessing.pool import ThreadPool
75  tp = ThreadPool(processes=max_processes)
76  mapping = make_file_to_iov_dictionary(file_path_patterns, polling_time=5, pool=tp, filterfalse=bad_run_finder)
77  tp.close()
78  tp.join()
79  return mapping
80 
81  return run_in_one_process()
82 # return run_with_multiprocessing(max_processes=4)
83 
84 
85 # Define a map of our possible argparse choices to functions that run them
86 function_map = {"filepath": from_raw_data_file_paths,
87  "metadata": from_metadata_of_files}
88 
89 
90 def get_argparser():
91  import argparse
92  parser = argparse.ArgumentParser()
93  parser.add_argument("method", help="The method by which you want to create the mapping of file paths -> IoVs.",
94  choices=function_map.keys())
95  return parser
96 
97 
98 def main():
99  # Do argument parsing
100  parser = get_argparser()
101  args = parser.parse_args()
102 
103  # Run the method we asked for from command line
104  file_to_iov = function_map[args.method](file_path_patterns)
105 
106  print("Created the file to IoV map:")
107  pp.pprint(file_to_iov)
108 
109  # Save for later use
110  import pickle
111  filename = "file_iov_map.pkl"
112  with open(filename, 'bw') as iov_map_file:
113  pickle.dump(file_to_iov, iov_map_file)
114  print("Saved dictionary to the file '{}' for later use.".format(filename))
115 
116  # To read it in, usually in a separate process/steering file
117  # fils_to_iov = pickle.load(open("file_iov_map.pkl", 'rb'))
118 
119 
120 if __name__ == "__main__":
121  import sys
122  sys.exit(main())
main
int main(int argc, char **argv)
Run all tests.
Definition: test_main.cc:77