Belle II Software  release-06-00-14
FileHandler.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #include <framework/gearbox/FileHandler.h>
10 #include <framework/gearbox/Gearbox.h>
11 #include <framework/logging/Logger.h>
12 #include <framework/utilities/FileSystem.h>
13 #include <framework/datastore/StoreObjPtr.h>
14 #include <framework/dataobjects/EventMetaData.h>
15 
16 #include <regex>
17 #include <boost/filesystem.hpp>
18 #include <boost/iostreams/device/file.hpp>
19 #include <boost/iostreams/filter/gzip.hpp>
20 
21 using namespace std;
22 namespace io = boost::iostreams;
23 namespace fs = boost::filesystem;
24 
25 namespace Belle2::gearbox {
26 
27  FileContext::FileContext(const string& filename, bool compressed)
28  {
29  B2DEBUG(100, "Opening " << filename << (compressed ? " (gzip)" : ""));
30  if (compressed) m_stream.push(io::gzip_decompressor());
31  m_stream.push(io::file_source(filename));
32  }
33 
34  FileHandler::FileHandler(const std::string& uri): InputHandler(uri), m_path(uri)
35  {
36 
37  if (m_path.empty()) {
38  m_path = "/data/";
39  } else {
40  //Check if we have placeholder for experiment or run information
41  std::regex exp(R"(\{EXP(?::(\d+))?\})");
42  std::regex run(R"(\{RUN(?::(\d+))?\})");
43  m_rundependence = std::regex_search(m_path, exp) || std::regex_search(m_path, run);
44  if (m_rundependence) {
45  //Apparently we do have placeholders, replace them by something
46  //boost::format will understand. The placeholder is something like
47  //{EXP} or {EXP:<n>} where <n> is an integer to make the number
48  //zero-filled with <n> digits, so we replace it with %1$0d or
49  //%1$0<n>d to denote to the first argument in a boost format string.
50  //Same for {RUN} and {RUN:<n>} but as second argument
51  StoreObjPtr<EventMetaData> eventMetaDataPtr;
52  eventMetaDataPtr.isRequired();
53  std::string tmp = std::regex_replace(m_path, exp, std::string("%1$$0$1d"));
54  tmp = std::regex_replace(tmp, run, std::string("%2$$0$1d"));
55  B2DEBUG(300, "Found run-dependence in file path, resulting in " << tmp);
56  m_pathformat = boost::format(tmp);
57  m_pathformat.exceptions(boost::io::all_error_bits ^ boost::io::too_many_args_bit);
58  }
59  }
60  B2DEBUG(300, "Created FileHandler for directory " << m_path);
61  }
62 
63  InputContext* FileHandler::open(const std::string& path)
64  {
65  if (m_rundependence) {
66  StoreObjPtr<EventMetaData> eventMetaDataPtr;
67  if (!eventMetaDataPtr.isValid()) {
68  B2DEBUG(300, "No run info, probably initialize? Skipping backend");
69  return nullptr;
70  }
71  int exp = eventMetaDataPtr->getExperiment();
72  int run = eventMetaDataPtr->getRun();
73  m_path = (m_pathformat % exp % run).str();
74  }
75  fs::path basedir(m_path);
76  std::string filename = (basedir / path).string();
77  bool repeat(false);
78  do {
79  //Ok, let's try to find the file
80  B2DEBUG(350, "Trying to find " << filename);
81 
82  std::string fullpath = FileSystem::findFile(filename, true);
83  if (!fullpath.empty()) {
84  if (m_rundependence) B2INFO("gearbox::FileHandler: Reading '" << fullpath << "'");
85  return new FileContext(fullpath, false);
86  }
87  fullpath = FileSystem::findFile(filename + ".gz", true);
88  if (!fullpath.empty()) {
89  if (m_rundependence) B2INFO("gearbox::FileHandler: Reading '" << fullpath << "' (gzip)");
90  return new FileContext(fullpath, true);
91  }
92 
93  //did not work, replace last slash by a - and try again if such a
94  //replacement was successful. This allows flattening the directory
95  //structure from e.g. data/pxd/PXD-Alignment.xml to
96  //data-pxd-PXD-Alignment.xml to more easily override single files
97  size_t last_slash = filename.find_last_of('/');
98  //If the last slash we found is after the primary path we try again.
99  //This means we try to replace all slashes in the requested filename
100  //but not in the given base path, for example:
101  // - if the base path is "/somedir/data/" and the file to be opened is
102  // "foo/bar/baz.xml", we will look only for files up to
103  // "/somedir/data/foo-bar-baz.xml"
104  // - if the base path does not end in "/", e.g. "/somedir/data" we also
105  // look for "/somedir/data-foo-bar-baz.xml"
106  repeat = (last_slash >= m_path.size()) && (last_slash != std::string::npos);
107  if (repeat) {
108  filename[last_slash] = '-';
109  }
110  } while (repeat);
111  return nullptr;
112  }
113 
115 }
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:145
bool isRequired(const std::string &name="")
Ensure this array/object has been registered previously.
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:95
bool isValid() const
Check whether the object was created.
Definition: StoreObjPtr.h:110
InputContext which serves the content of a plain file.
Definition: FileHandler.h:23
InputHandler which will read XML from plain files, optionally gzip compressed.
Definition: FileHandler.h:67
std::string m_path
Search path to look for files.
Definition: FileHandler.h:83
virtual InputContext * open(const std::string &path) override
create a new FileContext by searching the file system for a file named like path.
Definition: FileHandler.cc:63
boost::format m_pathformat
format object in case of run-dependent data path
Definition: FileHandler.h:85
bool m_rundependence
bool indicating whether the data path is run-dependent
Definition: FileHandler.h:87
Class representing a resource context for gearbox.
Definition: InputHandler.h:25
Class to provide an InputContext for a given XML resource name.
Definition: InputHandler.h:47
#define B2_GEARBOX_REGISTER_INPUTHANDLER(classname, prefix)
Helper macro to easily register new input handlers.
Definition: Gearbox.h:256