Belle II Software  release-05-01-25
create_file_to_iov_map.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
18 
19 
20 import argparse
21 parser = argparse.ArgumentParser(description="Make a mapping file of file paths -> IoV")
22 parser.add_argument('--file_path_patterns', default='/hsm/belle2/bdata/Data/Raw/e0003/r*/**/*.root', type=str,
23  help='Lets take some file patterns. We could have put wildcards in more places but this is enough for testing.')
24 parser.add_argument('--output', default='file_iov_map.pkl', type=str,
25  help='Name of the output mapping file.')
26 parser.add_argument(
27  '--option',
28  default='filepath',
29  type=str,
30  help='Either take IoV from FileMetaData (option=metadata) or parse it from filepath (option=filepath).')
31 args = parser.parse_args()
32 
33 file_path_patterns = [args.file_path_patterns, ]
34 print(file_path_patterns)
35 
36 # Here's how to do 1.
37 
38 
39 def from_raw_data_file_paths(file_path_patterns):
40  from caf.utils import find_absolute_file_paths, parse_raw_data_iov
41  # First get the absolute file paths from the patterns
42  file_paths = find_absolute_file_paths(file_path_patterns)
43  file_to_iov = {}
44  # Now loop over them and parse the IoVs out by looking at their directory/filenames
45  for file_path in file_paths:
46  file_to_iov[file_path] = parse_raw_data_iov(file_path)
47  return file_to_iov
48 
49 
50 # Here's how to do 2.
51 
52 def from_metadata_of_files(file_path_patterns):
53 
54  from caf.utils import make_file_to_iov_dictionary
55 
56  def run_in_one_process():
57  """
58  Creates the file_to_iov dictionary but only one file at a time.
59  """
60  return make_file_to_iov_dictionary(file_path_patterns)
61 
62  def run_with_multiprocessing(max_processes):
63  """
64  Creates the file_to_iov dictionary but using a Pool object to control the number of subprocesses.
65  Note that even though we're using a ThreadPool, we aren't bound by the GIL because we are subprocessing to
66  run b2file-metadata-show in each Thread.
67  """
68  from multiprocessing.pool import ThreadPool
69  tp = ThreadPool(processes=max_processes)
70  mapping = make_file_to_iov_dictionary(file_path_patterns, polling_time=5, pool=tp)
71  tp.close()
72  tp.join()
73  return mapping
74 
75  # return run_in_one_process()
76  return run_with_multiprocessing(max_processes=6)
77 
78 
79 if args.option == "metadata":
80  file_to_iov = from_metadata_of_files(file_path_patterns)
81 elif args.option == "filepath":
82  file_to_iov = from_raw_data_file_paths(file_path_patterns)
83 else:
84  print("That wasn't one of the available options for this script. Run it again with no arguments to see the options.")
85  sys.exit(1)
86 
87 from pprint import PrettyPrinter
88 pp = PrettyPrinter(indent=2)
89 pp.pprint(file_to_iov)
90 
91 import pickle
92 # Save for later use
93 with open(args.output, 'bw') as iov_map_file:
94  pickle.dump(file_to_iov, iov_map_file)
95  print("Saved dictionary to a file for later use.")
96 
97 # To read it in, usually in a separate process/steering file
98 # fils_to_iov = pickle.load(open("file_iov_map.pkl", 'rb'))