Belle II Software  release-06-00-14
create_file_to_iov_map.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
11 
12 
27 
28 
29 import pickle
30 from pprint import PrettyPrinter
31 import sys
32 import argparse
33 parser = argparse.ArgumentParser(
34  description="Make a mapping file of file paths -> IoV")
35 parser.add_argument(
36  '--file_path_patterns',
37  default='/hsm/belle2/bdata/Data/Raw/e0003/r*/**/*.root',
38  type=str,
39  help='Lets take some file patterns. We could have put wildcards in more places but this is enough for testing.')
40 parser.add_argument('--output', default='file_iov_map.pkl', type=str,
41  help='Name of the output mapping file.')
42 parser.add_argument(
43  '--option',
44  default='filepath',
45  type=str,
46  help='Either take IoV from FileMetaData (option=metadata) or parse it from filepath (option=filepath).')
47 args = parser.parse_args()
48 
49 file_path_patterns = [args.file_path_patterns, ]
50 print(file_path_patterns)
51 
52 # Here's how to do 1.
53 
54 
55 def from_raw_data_file_paths(file_path_patterns):
56  from caf.utils import find_absolute_file_paths, parse_raw_data_iov
57  # First get the absolute file paths from the patterns
58  file_paths = find_absolute_file_paths(file_path_patterns)
59  file_to_iov = {}
60  # Now loop over them and parse the IoVs out by looking at their
61  # directory/filenames
62  for file_path in file_paths:
63  file_to_iov[file_path] = parse_raw_data_iov(file_path)
64  return file_to_iov
65 
66 
67 # Here's how to do 2.
68 
69 def from_metadata_of_files(file_path_patterns):
70 
71  from caf.utils import make_file_to_iov_dictionary
72 
73  def run_in_one_process():
74  """
75  Creates the file_to_iov dictionary but only one file at a time.
76  """
77  return make_file_to_iov_dictionary(file_path_patterns)
78 
79  def run_with_multiprocessing(max_processes):
80  """
81  Creates the file_to_iov dictionary but using a Pool object to control the number of subprocesses.
82  Note that even though we're using a ThreadPool, we aren't bound by the GIL because we are subprocessing to
83  run b2file-metadata-show in each Thread.
84  """
85  from multiprocessing.pool import ThreadPool
86  tp = ThreadPool(processes=max_processes)
87  mapping = make_file_to_iov_dictionary(
88  file_path_patterns, polling_time=5, pool=tp)
89  tp.close()
90  tp.join()
91  return mapping
92 
93  # return run_in_one_process()
94  return run_with_multiprocessing(max_processes=6)
95 
96 
97 if args.option == "metadata":
98  file_to_iov = from_metadata_of_files(file_path_patterns)
99 elif args.option == "filepath":
100  file_to_iov = from_raw_data_file_paths(file_path_patterns)
101 else:
102  print("That wasn't one of the available options for this script. Run it again with no arguments to see the options.")
103  sys.exit(1)
104 
105 pp = PrettyPrinter(indent=2)
106 pp.pprint(file_to_iov)
107 
108 # Save for later use
109 with open(args.output, 'bw') as iov_map_file:
110  pickle.dump(file_to_iov, iov_map_file)
111  print("Saved dictionary to a file for later use.")
112 
113 # To read it in, usually in a separate process/steering file
114 # fils_to_iov = pickle.load(open("file_iov_map.pkl", 'rb'))