Belle II Software development
FileHandler.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <framework/gearbox/FileHandler.h>
10#include <framework/gearbox/Gearbox.h>
11#include <framework/logging/Logger.h>
12#include <framework/utilities/FileSystem.h>
13#include <framework/datastore/StoreObjPtr.h>
14#include <framework/dataobjects/EventMetaData.h>
15
16#include <regex>
17#include <filesystem>
18
19#include <boost/iostreams/device/file.hpp>
20#include <boost/iostreams/filter/gzip.hpp>
21
22using namespace std;
23namespace io = boost::iostreams;
24namespace fs = std::filesystem;
25
26namespace Belle2::gearbox {
27
28 FileContext::FileContext(const string& filename, bool compressed)
29 {
30 B2DEBUG(100, "Opening " << filename << (compressed ? " (gzip)" : ""));
31 if (compressed) m_stream.push(io::gzip_decompressor());
32 m_stream.push(io::file_source(filename));
33 }
34
35 FileHandler::FileHandler(const std::string& uri): InputHandler(uri), m_path(uri)
36 {
37
38 if (m_path.empty()) {
39 m_path = "/data/";
40 } else {
41 //Check if we have placeholder for experiment or run information
42 std::regex exp(R"(\{EXP(?::(\d+))?\})");
43 std::regex run(R"(\{RUN(?::(\d+))?\})");
44 m_rundependence = std::regex_search(m_path, exp) || std::regex_search(m_path, run);
45 if (m_rundependence) {
46 //Apparently we do have placeholders, replace them by something
47 //boost::format will understand. The placeholder is something like
48 //{EXP} or {EXP:<n>} where <n> is an integer to make the number
49 //zero-filled with <n> digits, so we replace it with %1$0d or
50 //%1$0<n>d to denote to the first argument in a boost format string.
51 //Same for {RUN} and {RUN:<n>} but as second argument
52 StoreObjPtr<EventMetaData> eventMetaDataPtr;
53 eventMetaDataPtr.isRequired();
54 std::string tmp = std::regex_replace(m_path, exp, std::string("%1$$0$1d"));
55 tmp = std::regex_replace(tmp, run, std::string("%2$$0$1d"));
56 B2DEBUG(300, "Found run-dependence in file path, resulting in " << tmp);
57 m_pathformat = boost::format(tmp);
58 m_pathformat.exceptions(boost::io::all_error_bits ^ boost::io::too_many_args_bit);
59 }
60 }
61 B2DEBUG(300, "Created FileHandler for directory " << m_path);
62 }
63
64 InputContext* FileHandler::open(const std::string& path)
65 {
66 if (m_rundependence) {
67 StoreObjPtr<EventMetaData> eventMetaDataPtr;
68 if (!eventMetaDataPtr.isValid()) {
69 B2DEBUG(300, "No run info, probably initialize? Skipping backend");
70 return nullptr;
71 }
72 int exp = eventMetaDataPtr->getExperiment();
73 int run = eventMetaDataPtr->getRun();
74 m_path = (m_pathformat % exp % run).str();
75 }
76 fs::path basedir(m_path);
77 std::string filename = (basedir / path).string();
78 bool repeat(false);
79 do {
80 //Ok, let's try to find the file
81 B2DEBUG(350, "Trying to find " << filename);
82
83 std::string fullpath = FileSystem::findFile(filename, true);
84 if (!fullpath.empty()) {
85 if (m_rundependence) B2INFO("gearbox::FileHandler: Reading '" << fullpath << "'");
86 return new FileContext(fullpath, false);
87 }
88 fullpath = FileSystem::findFile(filename + ".gz", true);
89 if (!fullpath.empty()) {
90 if (m_rundependence) B2INFO("gearbox::FileHandler: Reading '" << fullpath << "' (gzip)");
91 return new FileContext(fullpath, true);
92 }
93
94 //did not work, replace last slash by a - and try again if such a
95 //replacement was successful. This allows flattening the directory
96 //structure from e.g. data/pxd/PXD-Alignment.xml to
97 //data-pxd-PXD-Alignment.xml to more easily override single files
98 size_t last_slash = filename.find_last_of('/');
99 //If the last slash we found is after the primary path we try again.
100 //This means we try to replace all slashes in the requested filename
101 //but not in the given base path, for example:
102 // - if the base path is "/somedir/data/" and the file to be opened is
103 // "foo/bar/baz.xml", we will look only for files up to
104 // "/somedir/data/foo-bar-baz.xml"
105 // - if the base path does not end in "/", e.g. "/somedir/data" we also
106 // look for "/somedir/data-foo-bar-baz.xml"
107 repeat = (last_slash >= m_path.size()) && (last_slash != std::string::npos);
108 if (repeat) {
109 filename[last_slash] = '-';
110 }
111 } while (repeat);
112 return nullptr;
113 }
114
116}
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:151
bool isRequired(const std::string &name="")
Ensure this array/object has been registered previously.
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:96
bool isValid() const
Check whether the object was created.
Definition: StoreObjPtr.h:111
InputContext which serves the content of a plain file.
Definition: FileHandler.h:23
boost::iostreams::filtering_istream m_stream
stream to read data from
Definition: FileHandler.h:40
FileContext(const std::string &filename, bool compressed)
Open the given filename.
Definition: FileHandler.cc:28
InputHandler which will read XML from plain files, optionally gzip compressed.
Definition: FileHandler.h:67
std::string m_path
Search path to look for files.
Definition: FileHandler.h:83
virtual InputContext * open(const std::string &path) override
create a new FileContext by searching the file system for a file named like path.
Definition: FileHandler.cc:64
FileHandler(const std::string &uri)
Instantiate a new file handler, using the uri as base search path.
Definition: FileHandler.cc:35
boost::format m_pathformat
format object in case of run-dependent data path
Definition: FileHandler.h:85
bool m_rundependence
bool indicating whether the data path is run-dependent
Definition: FileHandler.h:87
Class representing a resource context for gearbox.
Definition: InputHandler.h:25
Class to provide an InputContext for a given XML resource name.
Definition: InputHandler.h:47
#define B2_GEARBOX_REGISTER_INPUTHANDLER(classname, prefix)
Helper macro to easily register new input handlers.
Definition: Gearbox.h:256
STL namespace.