Belle II Software development
PayloadProvider.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <framework/database/PayloadProvider.h>
10#include <framework/logging/Logger.h>
11#include <framework/utilities/FileSystem.h>
12
13#include <boost/algorithm/string.hpp>
14
15#include <filesystem>
16#include <sys/stat.h>
17
18namespace fs = std::filesystem;
19
20namespace Belle2::Conditions {
21 PayloadProvider::PayloadProvider(const std::vector<std::string>& locations, const std::string& cacheDir, int timeout): m_timeout{timeout}
22 {
23 // check whether we have a cache directory ... otherwise use default
24 if (!cacheDir.empty()) {
25 m_cacheDir = {fs::absolute(cacheDir).string(), false};
26 } else {
27 m_cacheDir = {fs::absolute(fs::temp_directory_path() / "basf2-conditions").string(), false};
28 }
29 m_locations.reserve(locations.size() + 2); //cache location + all configured + central server
30 // always look in cache directory first
31 m_locations.emplace_back(m_cacheDir);
32 B2DEBUG(33, "Added payload cache location" << LogVar("path", m_cacheDir.base));
33 // and then in the other directories as specified
34 for (auto path : locations) {
35 boost::algorithm::trim(path);
36 if (path.empty()) {
37 B2FATAL("Found empty payload location in configuration. "
38 "Please make sure that the conditions database settings are correct");
39 }
40 bool remote = false;
41 if (auto pos = path.find("://"); pos != std::string::npos) {
42 //found a protocol: if file remove, otherwise keep as is and set as remote ...
43 auto protocol = path.substr(0, pos);
44 boost::algorithm::to_lower(protocol);
45 if (protocol == "file") {
46 path = path.substr(pos + 3);
47 } else if (protocol == "http" or protocol == "https") {
48 remote = true;
49 } else {
50 B2ERROR("Unknown protocol, only supported protocols for payload download are file, http, https" << LogVar("protocol", protocol));
51 continue;
52 }
53 }
54 // Also make sure files are absolute
55 if (!remote) path = fs::absolute(path).string();
56 // And then add it to the list
57 B2DEBUG(33, "Added payload search location" << LogVar(remote ? "url" : "path", path));
58 m_locations.emplace_back(PayloadLocation{path, remote});
59 }
60 // and as as last resort always go to the central server
61 m_locations.emplace_back(PayloadLocation{"", true});
62 }
63
65 {
66 // Check all locations for the file ... but dispatch to the correct member function
67 return std::any_of(m_locations.begin(), m_locations.end(), [this, &metadata](const auto & loc) {
68 return loc.isRemote ? getRemoteFile(loc, metadata) : getLocalFile(loc, metadata);
69 });
70 }
71
73 {
74 // look in both flat and hashed directory structures.
76 auto fullPath = fs::path(loc.base) / getFilename(structure, metadata);
77 // No such file? nothing to do
78 if (!fs::exists(fullPath)) continue;
79 // Otherwise check the md5
80 B2DEBUG(36, "Checking checksum for payload file" << LogVar("name", metadata.name) << LogVar("local dir", loc.base)
81 << LogVar("revision", metadata.revision) << LogVar("filename", fullPath) << LogVar("checksum", metadata.checksum));
82 const auto actual = FileSystem::calculateMD5(fullPath.string());
83 if (actual == metadata.checksum) {
84 metadata.filename = fullPath.string();
85 B2DEBUG(37, "Found matching payload file");
86 return true;
87 }
88 B2DEBUG(37, "Checksum doesn't match, continue with next");
89 }
90 return false;
91 }
92
94 {
95 // we want to download payloads in a hashed directory structure to keep amount of payloads per directory to a managable level
96 const auto local = fs::path(m_cacheDir.base) / getFilename(EDirectoryLayout::c_hashed, metadata);
97 // empty location: use the central server supplied baseUrl from payload metadata
98 const bool fallback = loc.base.empty();
99 const auto base = fallback ? metadata.baseUrl : loc.base;
100 // but we assume that on servers we have logical directory structure: value of payloadUrl is taken without modification
101 const auto url = m_downloader.joinWithSlash(base, metadata.payloadUrl);
102 // If anything fails we might want to go to temporary file and this happens at multiple places so lets use exception for that
103 try {
104 // now we need to make the directories to the file
105 try {
106 // Make sure that we create directories writable for all users
107 auto oldUmask = umask(0);
108 ScopeGuard umaskGuard([oldUmask] {umask(oldUmask);});
109 fs::create_directories(local.parent_path());
110 } catch (fs::filesystem_error& e) {
111 B2WARNING("Cannot create local payload directory" << LogVar("directory", local.parent_path())
112 << LogVar("error", e.code().message()));
113 throw; //rethrow existing exception
114 }
115 // ok, directory exists, now we need a write lock on the file to avoid race conditions
116 Belle2::FileSystem::Lock writelock(local.string());
117 B2DEBUG(37, "Attempting to lock payload file for writing ..." << LogVar("filename", local));
118 // if we cannot get one the folder/file might be write protected or
119 // download by another process takes to long. So let's download into
120 // temporary file.
121 if (!writelock.lock(m_timeout, true)) {
122 throw std::runtime_error("write lock failed");
123 }
124 // Ok we have the write lock, check if we can open the file for writing which should be a guaranteed success but who knows
125 // so bail if that fails.
126 std::fstream localStream(local.string().c_str(), std::ios::binary | std::ios::in | std::ios::out);
127 B2DEBUG(37, "Got write lock, check for file access ...");
128 if (!localStream.good()) {
129 B2ERROR("Cannot open file for writing" << LogVar("filename", local) << LogVar("error", strerror(errno)));
130 throw std::runtime_error("cannot open file for writing????");
131 }
132 // and make sure it's readable for all
133 fs::permissions(local, fs::perms::all &
134 ~(fs::perms::owner_exec | fs::perms::group_exec | fs::perms::others_exec));
135 // File is open. Someone might have downloaded the file
136 // while we waited, check md5sum again.
137 B2DEBUG(37, "Ok, check digest in case another process downloaded already...");
138 if (not m_downloader.verifyChecksum(localStream, metadata.checksum)) {
139 // we have lock and it's broken so download the file
140 B2DEBUG(37, "Still not good, download now ...");
141 try {
142 if (not m_downloader.download(url, localStream, not fallback)) {
143 // if this returns false we have a 404: no need to try again
144 B2DEBUG(37, "Payload not found ... trying next source");
145 return false;
146 }
147 // ok, download was fine, check checksum again
148 if (not m_downloader.verifyChecksum(localStream, metadata.checksum)) {
149 B2WARNING("Conditions Database: checksum mismatch after download. Trying once more in a temporary file"
150 << LogVar("name", metadata.name) << LogVar("revision", metadata.revision) << LogVar("filename", local.string()));
151 throw std::runtime_error("checksum mismatch");
152 }
153 } catch (std::exception& e) {
154 B2ERROR("Conditions Database: failure downloading url" << LogVar("url", url) << LogVar("error", e.what()));
155 throw; // rethrow existing exception
156 }
157 B2DEBUG(37, "Download of payload successful");
158 }
159 // found in cache or downloaded, fine
160 metadata.filename = local.string();
161 return true;
162 } catch (std::exception&) {
163 // errors are already shown, just try again
164 return getTemporaryFile(url, metadata, not fallback);
165 }
166 }
167
169 const PayloadMetadata& payload) const
170 {
171 fs::path path("");
172 switch (structure) {
174 path /= payload.checksum.substr(0, 2);
175 path /= payload.name + "_r" + std::to_string(payload.revision) + ".root";
176 break;
178 path /= "dbstore_" + payload.name + "_rev_" + std::to_string(payload.revision) + ".root";
179 break;
180 };
181 return path.string();
182 }
183
184 bool PayloadProvider::getTemporaryFile(const std::string& url, PayloadMetadata& metadata, bool silentOnMissing)
185 {
186 if (auto&& it = m_temporaryFiles.find(metadata.checksum); it != m_temporaryFiles.end()) {
187 metadata.filename = it->second->getName();
188 return true;
189 }
190 const auto openmode = std::ios_base::binary | std::ios_base::in | std::ios_base::out | std::ios_base::trunc;
191 std::unique_ptr<FileSystem::TemporaryFile> tmpfile(new FileSystem::TemporaryFile(openmode));
192 B2DEBUG(37, "Trying to download into temporary file " << tmpfile->getName());
193 try {
194 if (not m_downloader.download(url, *tmpfile, silentOnMissing)) return false;
195 if (not m_downloader.verifyChecksum(*tmpfile, metadata.checksum)) return false;
196 } catch (std::exception& e) {
197 B2ERROR("Conditions Database: failure downloading url" << LogVar("url", url) << LogVar("error", e.what()));
198 return false;
199 }
200 metadata.filename = tmpfile->getName();
201 m_temporaryFiles[metadata.checksum] = std::move(tmpfile);
202 return true;
203 }
204}
bool download(const std::string &url, std::ostream &stream, bool silentOnMissing=false)
get an url and save the content to stream This function raises exceptions when there are any problems
Definition: Downloader.cc:260
std::string joinWithSlash(const std::string &base, const std::string &second)
Join two strings and make sure that there is exactly one '/' between them.
Definition: Downloader.cc:156
bool verifyChecksum(std::istream &input, const std::string &checksum)
check the digest of a stream
Definition: Downloader.h:81
PayloadProvider(const std::vector< std::string > &locations, const std::string &cachedir="", int timeout=60)
Constructor for a given list of locations and optionally the location where downloaded payloads shoul...
bool getTemporaryFile(const std::string &url, PayloadMetadata &meta, bool silentOnMissing)
Try to download url into a temporary file, if successful set the filename member of the metadata and ...
PayloadLocation m_cacheDir
Location of the cache directory where/how we want to store downloaded payloads.
bool getLocalFile(const PayloadLocation &loc, PayloadMetadata &meta) const
Look for a payload in the local directory location, set the filename member of the metadata instance ...
std::vector< PayloadLocation > m_locations
List of configured lookup locations: The first one will always be the cache directory and the last on...
std::unordered_map< std::string, std::unique_ptr< FileSystem::TemporaryFile > > m_temporaryFiles
Map of all active temporary files we downloaded and keep around until they can be closed.
int m_timeout
Timeout to wait for a write look when trying to download payloads.
EDirectoryLayout
Enumeration of different directory layouts.
@ c_flat
Flat directory containing the payloads in the form dbstore_{NAME}_rev_{REVISION}.root
@ c_hashed
Hashed directory structure containing the payloads in the form AB/{NAME}_r{REVISION}....
std::string getFilename(EDirectoryLayout structure, const PayloadMetadata &payload) const
Return the filename of a payload to look for given a directory structure and some metadata.
Downloader & m_downloader
Instance to the database file downloading instance.
bool find(PayloadMetadata &meta)
Try to find a payload, return true on success, false if it cannot be found.
bool getRemoteFile(const PayloadLocation &loc, PayloadMetadata &meta)
Look for a payload on a remote server and download if possible, set the filename member of the metada...
Helper class for locking a file.
Definition: FileSystem.h:97
bool lock(int timeout=300, bool ignoreErrors=false)
Try to lock the file.
Definition: FileSystem.cc:189
Helper file to create a temporary file and ensure deletion if object goes out of scope.
Definition: FileSystem.h:128
static std::string calculateMD5(const std::string &filename)
Calculate the MD5 checksum of a given file.
Definition: FileSystem.cc:78
Simple ScopeGuard to execute a function at the end of the object lifetime.
Definition: ScopeGuard.h:36
Class to store variables with their name which were sent to the logging service.
Simple struct to group all information necessary for a single payload.
Simple struct to represent a lookup location.