Belle II Software development
create_file_to_iov_map.py
1#!/usr/bin/env python3
2
3
10
11
26
27
28import pickle
29from pprint import PrettyPrinter
30import sys
31import argparse
32parser = argparse.ArgumentParser(
33 description="Make a mapping file of file paths -> IoV")
34parser.add_argument(
35 '--file_path_patterns',
36 default='/hsm/belle2/bdata/Data/Raw/e0003/r*/**/*.root',
37 type=str,
38 help='Lets take some file patterns. We could have put wildcards in more places but this is enough for testing.')
39parser.add_argument('--output', default='file_iov_map.pkl', type=str,
40 help='Name of the output mapping file.')
41parser.add_argument(
42 '--option',
43 default='filepath',
44 type=str,
45 help='Either take IoV from FileMetaData (option=metadata) or parse it from filepath (option=filepath).')
46args = parser.parse_args()
47
48file_path_patterns = [args.file_path_patterns, ]
49print(file_path_patterns)
50
51# Here's how to do 1.
52
53
54def from_raw_data_file_paths(file_path_patterns):
55 from caf.utils import find_absolute_file_paths, parse_raw_data_iov
56 # First get the absolute file paths from the patterns
57 file_paths = find_absolute_file_paths(file_path_patterns)
58 file_to_iov = {}
59 # Now loop over them and parse the IoVs out by looking at their
60 # directory/filenames
61 for file_path in file_paths:
62 file_to_iov[file_path] = parse_raw_data_iov(file_path)
63 return file_to_iov
64
65
66# Here's how to do 2.
67
68def from_metadata_of_files(file_path_patterns):
69
70 from caf.utils import make_file_to_iov_dictionary
71
72 def run_in_one_process():
73 """
74 Creates the file_to_iov dictionary but only one file at a time.
75 """
76 return make_file_to_iov_dictionary(file_path_patterns)
77
78 def run_with_multiprocessing(max_processes):
79 """
80 Creates the file_to_iov dictionary but using a Pool object to control the number of subprocesses.
81 Note that even though we're using a ThreadPool, we aren't bound by the GIL because we are subprocessing to
82 run b2file-metadata-show in each Thread.
83 """
84 from multiprocessing.pool import ThreadPool
85 tp = ThreadPool(processes=max_processes)
86 mapping = make_file_to_iov_dictionary(
87 file_path_patterns, polling_time=5, pool=tp)
88 tp.close()
89 tp.join()
90 return mapping
91
92 # return run_in_one_process()
93 return run_with_multiprocessing(max_processes=6)
94
95
96if args.option == "metadata":
97 file_to_iov = from_metadata_of_files(file_path_patterns)
98elif args.option == "filepath":
99 file_to_iov = from_raw_data_file_paths(file_path_patterns)
100else:
101 print("That wasn't one of the available options for this script. Run it again with no arguments to see the options.")
102 sys.exit(1)
103
104pp = PrettyPrinter(indent=2)
105pp.pprint(file_to_iov)
106
107# Save for later use
108with open(args.output, 'bw') as iov_map_file:
109 pickle.dump(file_to_iov, iov_map_file)
110 print("Saved dictionary to a file for later use.")
111
112# To read it in, usually in a separate process/steering file
113# fils_to_iov = pickle.load(open("file_iov_map.pkl", 'rb'))