Belle II Software  release-08-01-10
PayloadProvider.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #include <framework/database/PayloadProvider.h>
10 #include <framework/logging/Logger.h>
11 #include <framework/utilities/FileSystem.h>
12 
13 #include <boost/algorithm/string.hpp>
14 
15 #include <filesystem>
16 #include <sys/stat.h>
17 
18 namespace fs = std::filesystem;
19 
20 namespace Belle2::Conditions {
21  PayloadProvider::PayloadProvider(const std::vector<std::string>& locations, const std::string& cacheDir, int timeout): m_timeout{timeout}
22  {
23  // check whether we have a cache directory ... otherwise use default
24  if (!cacheDir.empty()) {
25  m_cacheDir = {fs::absolute(cacheDir).string(), false};
26  } else {
27  m_cacheDir = {fs::absolute(fs::temp_directory_path() / "basf2-conditions").string(), false};
28  }
29  m_locations.reserve(locations.size() + 2); //cache location + all configured + central server
30  // always look in cache directory first
31  m_locations.emplace_back(m_cacheDir);
32  B2DEBUG(33, "Added payload cache location" << LogVar("path", m_cacheDir.base));
33  // and then in the other directories as specified
34  for (auto path : locations) {
35  boost::algorithm::trim(path);
36  if (path.empty()) {
37  B2FATAL("Found empty payload location in configuration. "
38  "Please make sure that the conditions database settings are correct");
39  }
40  bool remote = false;
41  if (auto pos = path.find("://"); pos != std::string::npos) {
42  //found a protocol: if file remove, otherwise keep as is and set as remote ...
43  auto protocol = path.substr(0, pos);
44  boost::algorithm::to_lower(protocol);
45  if (protocol == "file") {
46  path = path.substr(pos + 3);
47  } else if (protocol == "http" or protocol == "https") {
48  remote = true;
49  } else {
50  B2ERROR("Unknown protocol, only supported protocols for payload download are file, http, https" << LogVar("protocol", protocol));
51  continue;
52  }
53  }
54  // Also make sure files are absolute
55  if (!remote) path = fs::absolute(path).string();
56  // And then add it to the list
57  B2DEBUG(33, "Added payload search location" << LogVar(remote ? "url" : "path", path));
58  m_locations.emplace_back(PayloadLocation{path, remote});
59  }
60  // and as as last resort always go to the central server
61  m_locations.emplace_back(PayloadLocation{"", true});
62  }
63 
65  {
66  // Check all locations for the file ... but dispatch to the correct member function
67  return std::any_of(m_locations.begin(), m_locations.end(), [this, &metadata](const auto & loc) {
68  return loc.isRemote ? getRemoteFile(loc, metadata) : getLocalFile(loc, metadata);
69  });
70  }
71 
73  {
74  // look in both flat and hashed directory structures.
76  auto fullPath = fs::path(loc.base) / getFilename(structure, metadata);
77  // No such file? nothing to do
78  if (!fs::exists(fullPath)) continue;
79  // Otherwise check the md5
80  B2DEBUG(36, "Checking checksum for payload file" << LogVar("name", metadata.name) << LogVar("local dir", loc.base)
81  << LogVar("revision", metadata.revision) << LogVar("filename", fullPath) << LogVar("checksum", metadata.checksum));
82  const auto actual = FileSystem::calculateMD5(fullPath.string());
83  if (actual == metadata.checksum) {
84  metadata.filename = fullPath.string();
85  B2DEBUG(37, "Found matching payload file");
86  return true;
87  }
88  B2DEBUG(37, "Checksum doesn't match, continue with next");
89  }
90  return false;
91  }
92 
94  {
95  // we want to download payloads in a hashed directory structure to keep amount of payloads per directory to a managable level
96  const auto local = fs::path(m_cacheDir.base) / getFilename(EDirectoryLayout::c_hashed, metadata);
97  // empty location: use the central server supplied baseUrl from payload metadata
98  const bool fallback = loc.base.empty();
99  const auto base = fallback ? metadata.baseUrl : loc.base;
100  // but we assume that on servers we have logical directory structure: value of payloadUrl is taken without modification
101  const auto url = m_downloader.joinWithSlash(base, metadata.payloadUrl);
102  // If anything fails we might want to go to temporary file and this happens at multiple places so lets use exception for that
103  try {
104  // now we need to make the directories to the file
105  try {
106  // Make sure that we create directories writable for all users
107  auto oldUmask = umask(0);
108  ScopeGuard umaskGuard([oldUmask] {umask(oldUmask);});
109  fs::create_directories(local.parent_path());
110  } catch (fs::filesystem_error& e) {
111  B2WARNING("Cannot create local payload directory" << LogVar("directory", local.parent_path())
112  << LogVar("error", e.code().message()));
113  throw; //rethrow existing exception
114  }
115  // ok, directory exists, now we need a write lock on the file to avoid race conditions
116  Belle2::FileSystem::Lock writelock(local.string());
117  B2DEBUG(37, "Attempting to lock payload file for writing ..." << LogVar("filename", local));
118  // if we cannot get one the folder/file might be write protected or
119  // download by another process takes to long. So let's download into
120  // temporary file.
121  if (!writelock.lock(m_timeout, true)) {
122  throw std::runtime_error("write lock failed");
123  }
124  // Ok we have the write lock, check if we can open the file for writing which should be a guaranteed success but who knows
125  // so bail if that fails.
126  std::fstream localStream(local.string().c_str(), std::ios::binary | std::ios::in | std::ios::out);
127  B2DEBUG(37, "Got write lock, check for file access ...");
128  if (!localStream.good()) {
129  B2ERROR("Cannot open file for writing" << LogVar("filename", local) << LogVar("error", strerror(errno)));
130  throw std::runtime_error("cannot open file for writing????");
131  }
132  // and make sure it's readable for all
133  fs::permissions(local, fs::perms::all &
134  ~(fs::perms::owner_exec | fs::perms::group_exec | fs::perms::others_exec));
135  // File is open. Someone might have downloaded the file
136  // while we waited, check md5sum again.
137  B2DEBUG(37, "Ok, check digest in case another process downloaded already...");
138  if (not m_downloader.verifyChecksum(localStream, metadata.checksum)) {
139  // we have lock and it's broken so download the file
140  B2DEBUG(37, "Still not good, download now ...");
141  try {
142  if (not m_downloader.download(url, localStream, not fallback)) {
143  // if this returns false we have a 404: no need to try again
144  B2DEBUG(37, "Payload not found ... trying next source");
145  return false;
146  }
147  // ok, download was fine, check checksum again
148  if (not m_downloader.verifyChecksum(localStream, metadata.checksum)) {
149  B2WARNING("Conditions Database: checksum mismatch after download. Trying once more in a temporary file"
150  << LogVar("name", metadata.name) << LogVar("revision", metadata.revision) << LogVar("filename", local.string()));
151  throw std::runtime_error("checksum mismatch");
152  }
153  } catch (std::exception& e) {
154  B2ERROR("Conditions Database: failure downloading url" << LogVar("url", url) << LogVar("error", e.what()));
155  throw; // rethrow existing exception
156  }
157  B2DEBUG(37, "Download of payload successful");
158  }
159  // found in cache or downloaded, fine
160  metadata.filename = local.string();
161  return true;
162  } catch (std::exception&) {
163  // errors are already shown, just try again
164  return getTemporaryFile(url, metadata, not fallback);
165  }
166  }
167 
169  const PayloadMetadata& payload) const
170  {
171  fs::path path("");
172  switch (structure) {
174  path /= payload.checksum.substr(0, 2);
175  path /= payload.name + "_r" + std::to_string(payload.revision) + ".root";
176  break;
178  path /= "dbstore_" + payload.name + "_rev_" + std::to_string(payload.revision) + ".root";
179  break;
180  };
181  return path.string();
182  }
183 
184  bool PayloadProvider::getTemporaryFile(const std::string& url, PayloadMetadata& metadata, bool silentOnMissing)
185  {
186  if (auto&& it = m_temporaryFiles.find(metadata.checksum); it != m_temporaryFiles.end()) {
187  metadata.filename = it->second->getName();
188  return true;
189  }
190  const auto openmode = std::ios_base::binary | std::ios_base::in | std::ios_base::out | std::ios_base::trunc;
191  std::unique_ptr<FileSystem::TemporaryFile> tmpfile(new FileSystem::TemporaryFile(openmode));
192  B2DEBUG(37, "Trying to download into temporary file " << tmpfile->getName());
193  try {
194  if (not m_downloader.download(url, *tmpfile, silentOnMissing)) return false;
195  if (not m_downloader.verifyChecksum(*tmpfile, metadata.checksum)) return false;
196  } catch (std::exception& e) {
197  B2ERROR("Conditions Database: failure downloading url" << LogVar("url", url) << LogVar("error", e.what()));
198  return false;
199  }
200  metadata.filename = tmpfile->getName();
201  m_temporaryFiles[metadata.checksum] = std::move(tmpfile);
202  return true;
203  }
204 }
bool download(const std::string &url, std::ostream &stream, bool silentOnMissing=false)
get an url and save the content to stream This function raises exceptions when there are any problems
Definition: Downloader.cc:258
std::string joinWithSlash(const std::string &base, const std::string &second)
Join two strings and make sure that there is exactly one '/' between them.
Definition: Downloader.cc:155
bool verifyChecksum(std::istream &input, const std::string &checksum)
check the digest of a stream
Definition: Downloader.h:82
PayloadProvider(const std::vector< std::string > &locations, const std::string &cachedir="", int timeout=60)
Constructor for a given list of locations and optionally the location where downloaded payloads shoul...
bool getTemporaryFile(const std::string &url, PayloadMetadata &meta, bool silentOnMissing)
Try to download url into a temporary file, if successful set the filename member of the metadata and ...
PayloadLocation m_cacheDir
Location of the cache directory where/how we want to store downloaded payloads.
bool getLocalFile(const PayloadLocation &loc, PayloadMetadata &meta) const
Look for a payload in the local directory location, set the filename member of the metadata instance ...
std::vector< PayloadLocation > m_locations
List of configured lookup locations: The first one will always be the cache directory and the last on...
std::unordered_map< std::string, std::unique_ptr< FileSystem::TemporaryFile > > m_temporaryFiles
Map of all active temporary files we downloaded and keep around until they can be closed.
int m_timeout
Timeout to wait for a write look when trying to download payloads.
EDirectoryLayout
Enumeration of different directory layouts.
@ c_flat
Flat directory containing the payloads in the form dbstore_{NAME}_rev_{REVISION}.root
@ c_hashed
Hashed directory structure containing the payloads in the form AB/{NAME}_r{REVISION}....
std::string getFilename(EDirectoryLayout structure, const PayloadMetadata &payload) const
Return the filename of a payload to look for given a directory structure and some metadata.
Downloader & m_downloader
Instance to the database file downloading instance.
bool find(PayloadMetadata &meta)
Try to find a payload, return true on success, false if it cannot be found.
bool getRemoteFile(const PayloadLocation &loc, PayloadMetadata &meta)
Look for a payload on a remote server and download if possible, set the filename member of the metada...
Helper class for locking a file.
Definition: FileSystem.h:97
bool lock(int timeout=300, bool ignoreErrors=false)
Try to lock the file.
Definition: FileSystem.cc:186
Helper file to create a temporary file and ensure deletion if object goes out of scope.
Definition: FileSystem.h:128
static std::string calculateMD5(const std::string &filename)
Calculate the MD5 checksum of a given file.
Definition: FileSystem.cc:78
Simple ScopeGuard to execute a function at the end of the object lifetime.
Definition: ScopeGuard.h:36
Class to store variables with their name which were sent to the logging service.
Simple struct to group all information necessary for a single payload.
Simple struct to represent a lookup location.