Belle II Software  release-05-02-19
FileHandler.cc
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2010 - Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Martin Ritter *
7  * *
8  * This software is provided "as is" without any warranty. *
9  **************************************************************************/
10 
11 #include <framework/gearbox/FileHandler.h>
12 #include <framework/gearbox/Gearbox.h>
13 #include <framework/logging/Logger.h>
14 #include <framework/utilities/FileSystem.h>
15 #include <framework/datastore/StoreObjPtr.h>
16 #include <framework/dataobjects/EventMetaData.h>
17 
18 #include <regex>
19 #include <boost/filesystem.hpp>
20 #include <boost/iostreams/device/file.hpp>
21 #include <boost/iostreams/filter/gzip.hpp>
22 
23 using namespace std;
24 namespace io = boost::iostreams;
25 namespace fs = boost::filesystem;
26 
27 namespace Belle2::gearbox {
28 
29  FileContext::FileContext(const string& filename, bool compressed)
30  {
31  B2DEBUG(100, "Opening " << filename << (compressed ? " (gzip)" : ""));
32  if (compressed) m_stream.push(io::gzip_decompressor());
33  m_stream.push(io::file_source(filename));
34  }
35 
36  FileHandler::FileHandler(const std::string& uri): InputHandler(uri), m_path(uri)
37  {
38 
39  if (m_path.empty()) {
40  m_path = "/data/";
41  } else {
42  //Check if we have placeholder for experiment or run information
43  std::regex exp(R"(\{EXP(?::(\d+))?\})");
44  std::regex run(R"(\{RUN(?::(\d+))?\})");
45  m_rundependence = std::regex_search(m_path, exp) || std::regex_search(m_path, run);
46  if (m_rundependence) {
47  //Apparently we do have placeholders, replace them by something
48  //boost::format will understand. The placeholder is something like
49  //{EXP} or {EXP:<n>} where <n> is an integer to make the number
50  //zero-filled with <n> digits, so we replace it with %1$0d or
51  //%1$0<n>d to denote to the first argument in a boost format string.
52  //Same for {RUN} and {RUN:<n>} but as second argument
53  StoreObjPtr<EventMetaData> eventMetaDataPtr;
54  eventMetaDataPtr.isRequired();
55  std::string tmp = std::regex_replace(m_path, exp, std::string("%1$$0$1d"));
56  tmp = std::regex_replace(tmp, run, std::string("%2$$0$1d"));
57  B2DEBUG(300, "Found run-dependence in file path, resulting in " << tmp);
58  m_pathformat = boost::format(tmp);
59  m_pathformat.exceptions(boost::io::all_error_bits ^ boost::io::too_many_args_bit);
60  }
61  }
62  B2DEBUG(300, "Created FileHandler for directory " << m_path);
63  }
64 
65  InputContext* FileHandler::open(const std::string& path)
66  {
67  if (m_rundependence) {
68  StoreObjPtr<EventMetaData> eventMetaDataPtr;
69  if (!eventMetaDataPtr.isValid()) {
70  B2DEBUG(300, "No run info, probably initialize? Skipping backend");
71  return nullptr;
72  }
73  int exp = eventMetaDataPtr->getExperiment();
74  int run = eventMetaDataPtr->getRun();
75  m_path = (m_pathformat % exp % run).str();
76  }
77  fs::path basedir(m_path);
78  std::string filename = (basedir / path).string();
79  bool repeat(false);
80  do {
81  //Ok, let's try to find the file
82  B2DEBUG(350, "Trying to find " << filename);
83 
84  std::string fullpath = FileSystem::findFile(filename, true);
85  if (!fullpath.empty()) {
86  if (m_rundependence) B2INFO("gearbox::FileHandler: Reading '" << fullpath << "'");
87  return new FileContext(fullpath, false);
88  }
89  fullpath = FileSystem::findFile(filename + ".gz", true);
90  if (!fullpath.empty()) {
91  if (m_rundependence) B2INFO("gearbox::FileHandler: Reading '" << fullpath << "' (gzip)");
92  return new FileContext(fullpath, true);
93  }
94 
95  //did not work, replace last slash by a - and try again if such a
96  //replacement was successful. This allows flattening the directory
97  //structure from e.g. data/pxd/PXD-Alignment.xml to
98  //data-pxd-PXD-Alignment.xml to more easily override single files
99  size_t last_slash = filename.find_last_of('/');
100  //If the last slash we found is after the primary path we try again.
101  //This means we try to replace all slashes in the requested filename
102  //but not in the given base path, for example:
103  // - if the base path is "/somedir/data/" and the file to be opened is
104  // "foo/bar/baz.xml", we will look only for files up to
105  // "/somedir/data/foo-bar-baz.xml"
106  // - if the base path does not end in "/", e.g. "/somedir/data" we also
107  // look for "/somedir/data-foo-bar-baz.xml"
108  repeat = (last_slash >= m_path.size()) && (last_slash != std::string::npos);
109  if (repeat) {
110  filename[last_slash] = '-';
111  }
112  } while (repeat);
113  return nullptr;
114  }
115 
117 }
B2_GEARBOX_REGISTER_INPUTHANDLER
#define B2_GEARBOX_REGISTER_INPUTHANDLER(classname, prefix)
Helper macro to easily register new input handlers.
Definition: Gearbox.h:266
Belle2::gearbox::FileHandler
InputHandler which will read XML from plain files, optionally gzip compressed.
Definition: FileHandler.h:77
Belle2::StoreObjPtr
Type-safe access to single objects in the data store.
Definition: ParticleList.h:33
Belle2::gearbox::FileHandler::m_path
std::string m_path
Search path to look for files.
Definition: FileHandler.h:93
Belle2::gearbox::InputContext
Class representing a resource context for gearbox.
Definition: InputHandler.h:35
Belle2::gearbox::InputHandler
Class to provide an InputContext for a given XML resource name.
Definition: InputHandler.h:57
Belle2::FileSystem::findFile
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:147
Belle2::gearbox::FileContext
InputContext which serves the content of a plain file.
Definition: FileHandler.h:33
Belle2::gearbox::FileHandler::m_pathformat
boost::format m_pathformat
format object in case of run-dependent data path
Definition: FileHandler.h:95
Belle2::gearbox::FileHandler::open
virtual InputContext * open(const std::string &path) override
create a new FileContext by searching the file system for a file named like path.
Definition: FileHandler.cc:65
Belle2::StoreObjPtr::isValid
bool isValid() const
Check whether the object was created.
Definition: StoreObjPtr.h:120
Belle2::gearbox::FileHandler::m_rundependence
bool m_rundependence
bool indicating whether the data path is run-dependent
Definition: FileHandler.h:97