Belle II Software  release-08-01-10
FileHandler.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #include <framework/gearbox/FileHandler.h>
10 #include <framework/gearbox/Gearbox.h>
11 #include <framework/logging/Logger.h>
12 #include <framework/utilities/FileSystem.h>
13 #include <framework/datastore/StoreObjPtr.h>
14 #include <framework/dataobjects/EventMetaData.h>
15 
16 #include <regex>
17 #include <filesystem>
18 
19 #include <boost/iostreams/device/file.hpp>
20 #include <boost/iostreams/filter/gzip.hpp>
21 
22 using namespace std;
23 namespace io = boost::iostreams;
24 namespace fs = std::filesystem;
25 
26 namespace Belle2::gearbox {
27 
28  FileContext::FileContext(const string& filename, bool compressed)
29  {
30  B2DEBUG(100, "Opening " << filename << (compressed ? " (gzip)" : ""));
31  if (compressed) m_stream.push(io::gzip_decompressor());
32  m_stream.push(io::file_source(filename));
33  }
34 
35  FileHandler::FileHandler(const std::string& uri): InputHandler(uri), m_path(uri)
36  {
37 
38  if (m_path.empty()) {
39  m_path = "/data/";
40  } else {
41  //Check if we have placeholder for experiment or run information
42  std::regex exp(R"(\{EXP(?::(\d+))?\})");
43  std::regex run(R"(\{RUN(?::(\d+))?\})");
44  m_rundependence = std::regex_search(m_path, exp) || std::regex_search(m_path, run);
45  if (m_rundependence) {
46  //Apparently we do have placeholders, replace them by something
47  //boost::format will understand. The placeholder is something like
48  //{EXP} or {EXP:<n>} where <n> is an integer to make the number
49  //zero-filled with <n> digits, so we replace it with %1$0d or
50  //%1$0<n>d to denote to the first argument in a boost format string.
51  //Same for {RUN} and {RUN:<n>} but as second argument
52  StoreObjPtr<EventMetaData> eventMetaDataPtr;
53  eventMetaDataPtr.isRequired();
54  std::string tmp = std::regex_replace(m_path, exp, std::string("%1$$0$1d"));
55  tmp = std::regex_replace(tmp, run, std::string("%2$$0$1d"));
56  B2DEBUG(300, "Found run-dependence in file path, resulting in " << tmp);
57  m_pathformat = boost::format(tmp);
58  m_pathformat.exceptions(boost::io::all_error_bits ^ boost::io::too_many_args_bit);
59  }
60  }
61  B2DEBUG(300, "Created FileHandler for directory " << m_path);
62  }
63 
64  InputContext* FileHandler::open(const std::string& path)
65  {
66  if (m_rundependence) {
67  StoreObjPtr<EventMetaData> eventMetaDataPtr;
68  if (!eventMetaDataPtr.isValid()) {
69  B2DEBUG(300, "No run info, probably initialize? Skipping backend");
70  return nullptr;
71  }
72  int exp = eventMetaDataPtr->getExperiment();
73  int run = eventMetaDataPtr->getRun();
74  m_path = (m_pathformat % exp % run).str();
75  }
76  fs::path basedir(m_path);
77  std::string filename = (basedir / path).string();
78  bool repeat(false);
79  do {
80  //Ok, let's try to find the file
81  B2DEBUG(350, "Trying to find " << filename);
82 
83  std::string fullpath = FileSystem::findFile(filename, true);
84  if (!fullpath.empty()) {
85  if (m_rundependence) B2INFO("gearbox::FileHandler: Reading '" << fullpath << "'");
86  return new FileContext(fullpath, false);
87  }
88  fullpath = FileSystem::findFile(filename + ".gz", true);
89  if (!fullpath.empty()) {
90  if (m_rundependence) B2INFO("gearbox::FileHandler: Reading '" << fullpath << "' (gzip)");
91  return new FileContext(fullpath, true);
92  }
93 
94  //did not work, replace last slash by a - and try again if such a
95  //replacement was successful. This allows flattening the directory
96  //structure from e.g. data/pxd/PXD-Alignment.xml to
97  //data-pxd-PXD-Alignment.xml to more easily override single files
98  size_t last_slash = filename.find_last_of('/');
99  //If the last slash we found is after the primary path we try again.
100  //This means we try to replace all slashes in the requested filename
101  //but not in the given base path, for example:
102  // - if the base path is "/somedir/data/" and the file to be opened is
103  // "foo/bar/baz.xml", we will look only for files up to
104  // "/somedir/data/foo-bar-baz.xml"
105  // - if the base path does not end in "/", e.g. "/somedir/data" we also
106  // look for "/somedir/data-foo-bar-baz.xml"
107  repeat = (last_slash >= m_path.size()) && (last_slash != std::string::npos);
108  if (repeat) {
109  filename[last_slash] = '-';
110  }
111  } while (repeat);
112  return nullptr;
113  }
114 
116 }
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:148
bool isRequired(const std::string &name="")
Ensure this array/object has been registered previously.
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:96
bool isValid() const
Check whether the object was created.
Definition: StoreObjPtr.h:111
InputContext which serves the content of a plain file.
Definition: FileHandler.h:23
InputHandler which will read XML from plain files, optionally gzip compressed.
Definition: FileHandler.h:67
std::string m_path
Search path to look for files.
Definition: FileHandler.h:83
virtual InputContext * open(const std::string &path) override
create a new FileContext by searching the file system for a file named like path.
Definition: FileHandler.cc:64
boost::format m_pathformat
format object in case of run-dependent data path
Definition: FileHandler.h:85
bool m_rundependence
bool indicating whether the data path is run-dependent
Definition: FileHandler.h:87
Class representing a resource context for gearbox.
Definition: InputHandler.h:25
Class to provide an InputContext for a given XML resource name.
Definition: InputHandler.h:47
#define B2_GEARBOX_REGISTER_INPUTHANDLER(classname, prefix)
Helper macro to easily register new input handlers.
Definition: Gearbox.h:256