Belle II Software development
create_file_to_iov_map.py
1#!/usr/bin/env python3
2
3
10
11
25
26import fnmatch
27from pprint import PrettyPrinter
28pp = PrettyPrinter(indent=2)
29
30# Lets take some file patterns. We could have put wildcards in more places but this is enough for testing
31file_path_patterns = ["/hsm/belle2/bdata/Data/Raw/e0003/r0495[5,6]*/**/*.root"]
32
33
34def bad_run_finder(filepath):
35 """
36 Returns True if a file path contains '.bad' after the run number
37 """
38 return fnmatch.fnmatch(filepath, "*r?????.bad/*")
39
40# Here's how to do 2.
41
42
43def from_raw_data_file_paths(file_path_patterns):
44 from caf.utils import find_absolute_file_paths, parse_raw_data_iov
45 # First get the absolute file paths from the patterns
46 file_paths = find_absolute_file_paths(file_path_patterns)
47 # Now remove bad runs using our filter function
48 import itertools
49 file_paths = list(itertools.filterfalse(bad_run_finder, file_paths))
50
51 file_to_iov = {}
52 # Now loop over them and parse the IoVs out by looking at their directory/filenames
53 for file_path in file_paths:
54 file_to_iov[file_path] = parse_raw_data_iov(file_path)
55 return file_to_iov
56
57
58# Here's how to do 3.
59
60def from_metadata_of_files(file_path_patterns):
61
62 from caf.utils import make_file_to_iov_dictionary
63
64 def run_in_one_process():
65 """
66 Creates the file_to_iov dictionary but only one file at a time.
67 Uses bad_run_finder to filter out runs marked as bad from our glob pattern.
68 """
69 return make_file_to_iov_dictionary(file_path_patterns, filterfalse=bad_run_finder)
70
71 def run_with_multiprocessing(max_processes):
72 """
73 Creates the file_to_iov dictionary but using a Pool object to control the number of subprocesses.
74 Note that even though we're using a ThreadPool, we aren't bound by the GIL because we are subprocessing to
75 run b2file-metadata-show in each Thread.
76
77 Uses bad_run_finder to filter out runs marked as bad from our glob pattern.
78 """
79 from multiprocessing.pool import ThreadPool
80 tp = ThreadPool(processes=max_processes)
81 mapping = make_file_to_iov_dictionary(file_path_patterns, polling_time=5, pool=tp, filterfalse=bad_run_finder)
82 tp.close()
83 tp.join()
84 return mapping
85
86 return run_in_one_process()
87# return run_with_multiprocessing(max_processes=4)
88
89
90# Define a map of our possible argparse choices to functions that run them
91function_map = {"filepath": from_raw_data_file_paths,
92 "metadata": from_metadata_of_files}
93
94
95def get_argparser():
96 import argparse
97 parser = argparse.ArgumentParser()
98 parser.add_argument("method", help="The method by which you want to create the mapping of file paths -> IoVs.",
99 choices=function_map.keys())
100 return parser
101
102
103def main():
104 # Do argument parsing
105 parser = get_argparser()
106 args = parser.parse_args()
107
108 # Run the method we asked for from command line
109 file_to_iov = function_map[args.method](file_path_patterns)
110
111 print("Created the file to IoV map:")
112 pp.pprint(file_to_iov)
113
114 # Save for later use
115 import pickle
116 filename = "file_iov_map.pkl"
117 with open(filename, 'bw') as iov_map_file:
118 pickle.dump(file_to_iov, iov_map_file)
119 print(f"Saved dictionary to the file '{filename}' for later use.")
120
121 # To read it in, usually in a separate process/steering file
122 # fils_to_iov = pickle.load(open("file_iov_map.pkl", 'rb'))
123
124
125if __name__ == "__main__":
126 import sys
127 sys.exit(main())
Definition: main.py:1