Belle II Software  release-05-01-25
PayloadProvider.cc
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2019 - Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Martin Ritter *
7  * *
8  * This software is provided "as is" without any warranty. *
9  **************************************************************************/
10 
11 #include <framework/database/PayloadProvider.h>
12 #include <framework/logging/Logger.h>
13 #include <framework/utilities/FileSystem.h>
14 
15 #include <boost/filesystem.hpp>
16 #include <boost/algorithm/string.hpp>
17 
18 #include <sys/stat.h>
19 
20 namespace fs = boost::filesystem;
21 
22 namespace Belle2::Conditions {
23  PayloadProvider::PayloadProvider(const std::vector<std::string>& locations, const std::string& cacheDir, int timeout): m_timeout{timeout}
24  {
25  // check whether we have a chache directory ... otherwise use default
26  if (!cacheDir.empty()) {
27  m_cacheDir = {fs::absolute(cacheDir).string(), false};
28  } else {
29  m_cacheDir = {fs::absolute(fs::temp_directory_path() / "basf2-conditions").string(), false};
30  }
31  m_locations.reserve(locations.size() + 2); //cache location + all configured + central server
32  // always look in cache directory first
33  m_locations.emplace_back(m_cacheDir);
34  B2DEBUG(33, "Added payload cache location" << LogVar("path", m_cacheDir.base));
35  // and then in the other directories as specified
36  for (auto path : locations) {
37  boost::algorithm::trim(path);
38  if (path.empty()) {
39  B2FATAL("Found empty payload location in configuration. "
40  "Please make sure that the conditions database settings are correct");
41  }
42  bool remote = false;
43  if (auto pos = path.find("://"); pos != std::string::npos) {
44  //found a protocol: if file remove, otherwise keep as is and set as remote ...
45  auto protocol = path.substr(0, pos);
46  boost::algorithm::to_lower(protocol);
47  if (protocol == "file") {
48  path = path.substr(pos + 3);
49  } else if (protocol == "http" or protocol == "https") {
50  remote = true;
51  } else {
52  B2ERROR("Unknown protocol, only supported protocols for payload download are file, http, https" << LogVar("protocol", protocol));
53  continue;
54  }
55  }
56  // Also make sure files are absolute
57  if (!remote) path = fs::absolute(path).string();
58  // And then add it to the list
59  B2DEBUG(33, "Added payload search location" << LogVar(remote ? "url" : "path", path));
60  m_locations.emplace_back(PayloadLocation{path, remote});
61  }
62  // and as as last resort always go to the central server
63  m_locations.emplace_back(PayloadLocation{"", true});
64  }
65 
66  bool PayloadProvider::find(PayloadMetadata& metadata)
67  {
68  // Check all locations for the file ... but dispatch to the correct member function
69  return std::any_of(m_locations.begin(), m_locations.end(), [this, &metadata](const auto & loc) {
70  return loc.isRemote ? getRemoteFile(loc, metadata) : getLocalFile(loc, metadata);
71  });
72  }
73 
74  bool PayloadProvider::getLocalFile(const PayloadLocation& loc, PayloadMetadata& metadata) const
75  {
76  // look in both flat and hashed directory structures.
77  for (EDirectoryLayout structure : {EDirectoryLayout::c_hashed, EDirectoryLayout::c_flat}) {
78  auto fullPath = fs::path(loc.base) / getFilename(structure, metadata);
79  // No such file? nothing to do
80  if (!fs::exists(fullPath)) continue;
81  // Otherwise check the md5
82  B2DEBUG(36, "Checking checksum for payload file" << LogVar("name", metadata.name) << LogVar("local dir", loc.base)
83  << LogVar("revision", metadata.revision) << LogVar("filename", fullPath) << LogVar("checksum", metadata.checksum));
84  const auto actual = FileSystem::calculateMD5(fullPath.string());
85  if (actual == metadata.checksum) {
86  metadata.filename = fullPath.string();
87  B2DEBUG(37, "Found matching payload file");
88  return true;
89  }
90  B2DEBUG(37, "Checksum doesn't match, continue with next");
91  }
92  return false;
93  }
94 
95  bool PayloadProvider::getRemoteFile(const PayloadLocation& loc, PayloadMetadata& metadata)
96  {
97  // we want to download payloads in a hashed directory structure to keep amount of payloads per directory to a managable level
98  const auto local = fs::path(m_cacheDir.base) / getFilename(EDirectoryLayout::c_hashed, metadata);
99  // empty location: use the central server supplied baseUrl from payload metadata
100  const bool fallback = loc.base.empty();
101  const auto base = fallback ? metadata.baseUrl : loc.base;
102  // but we assume that on servers we have logical directory structure: value of payloadUrl is taken without modification
103  const auto url = m_downloader.joinWithSlash(base, metadata.payloadUrl);
104  // If anything fails we might want to go to temporary file and this happens at multiple places so lets use exception for that
105  try {
106  // now we need to make the directories to the file
107  try {
108  // Make sure that we create directories writable for all users
109  auto oldUmask = umask(0);
110  ScopeGuard umaskGuard([oldUmask] {umask(oldUmask);});
111  fs::create_directories(local.parent_path());
112  } catch (fs::filesystem_error& e) {
113  B2WARNING("Cannot create local payload directory" << LogVar("directory", local.parent_path())
114  << LogVar("error", e.code().message()));
115  throw; //rethrow existing exception
116  }
117  // ok, directory exists, now we need a write lock on the file to avoid race conditions
118  Belle2::FileSystem::Lock writelock(local.string());
119  B2DEBUG(37, "Attempting to lock payload file for writing ..." << LogVar("filename", local));
120  // if we cannot get one the folder/file might be write protected or
121  // download by another process takes to long. So let's download into
122  // temporary file.
123  if (!writelock.lock(m_timeout, true)) {
124  throw std::runtime_error("write lock failed");
125  }
126  // Ok we have the write lock, check if we can open the file for writing which should be a guaranteed success but who knows
127  // so bail if that fails.
128  std::fstream localStream(local.string().c_str(), std::ios::binary | std::ios::in | std::ios::out);
129  B2DEBUG(37, "Got write lock, check for file access ...");
130  if (!localStream.good()) {
131  B2ERROR("Cannot open file for writing" << LogVar("filename", local) << LogVar("error", strerror(errno)));
132  throw std::runtime_error("cannot open file for writing????");
133  }
134  // and make sure it's readable for all
135  boost::filesystem::permissions(local, boost::filesystem::all_all &
136  ~(boost::filesystem::owner_exe | boost::filesystem::group_exe | boost::filesystem::others_exe));
137  // File is open. Someone might have downloaded the file
138  // while we waited, check md5sum again.
139  B2DEBUG(37, "Ok, check digest in case another process downloaded already...");
140  if (not m_downloader.verifyChecksum(localStream, metadata.checksum)) {
141  // we have lock and it's broken so download the file
142  B2DEBUG(37, "Still not good, download now ...");
143  try {
144  if (not m_downloader.download(url, localStream, not fallback)) {
145  // if this returns false we have a 404: no need to try again
146  B2DEBUG(37, "Payload not found ... trying next source");
147  return false;
148  }
149  // ok, download was fine, check checksum again
150  if (not m_downloader.verifyChecksum(localStream, metadata.checksum)) {
151  B2WARNING("Conditions Database: checksum mismatch after download. Trying once more in a temporary file"
152  << LogVar("name", metadata.name) << LogVar("revision", metadata.revision) << LogVar("filename", local.string()));
153  throw std::runtime_error("checksum mismatch");
154  }
155  } catch (std::exception& e) {
156  B2ERROR("Conditions Database: failure downloading url" << LogVar("url", url) << LogVar("error", e.what()));
157  throw; // rethrow existing exception
158  }
159  B2DEBUG(37, "Download of payload successful");
160  }
161  // found in cache or downloaded, fine
162  metadata.filename = local.string();
163  return true;
164  } catch (std::exception&) {
165  // errors are already shown, just try again
166  return getTemporaryFile(url, metadata, not fallback);
167  }
168  }
169 
170  std::string PayloadProvider::getFilename(EDirectoryLayout structure,
171  const PayloadMetadata& payload) const
172  {
173  fs::path path("");
174  switch (structure) {
175  case EDirectoryLayout::c_hashed:
176  path /= payload.checksum.substr(0, 2);
177  path /= payload.name + "_r" + std::to_string(payload.revision) + ".root";
178  break;
179  case EDirectoryLayout::c_flat:
180  path /= "dbstore_" + payload.name + "_rev_" + std::to_string(payload.revision) + ".root";
181  break;
182  };
183  return path.string();
184  }
185 
186  bool PayloadProvider::getTemporaryFile(const std::string& url, PayloadMetadata& metadata, bool silentOnMissing)
187  {
188  // cppcheck-suppress stlIfFind ; cppcheck apparently cannot cope with if with initializer yet
189  if (auto && it = m_temporaryFiles.find(metadata.checksum); it != m_temporaryFiles.end()) {
190  metadata.filename = it->second->getName();
191  return true;
192  }
193  const auto openmode = std::ios_base::binary | std::ios_base::in | std::ios_base::out | std::ios_base::trunc;
194  std::unique_ptr<FileSystem::TemporaryFile> tmpfile(new FileSystem::TemporaryFile(openmode));
195  B2DEBUG(37, "Trying to download into temporary file " << tmpfile->getName());
196  try {
197  if (not m_downloader.download(url, *tmpfile, silentOnMissing)) return false;
198  if (not m_downloader.verifyChecksum(*tmpfile, metadata.checksum)) return false;
199  } catch (std::exception& e) {
200  B2ERROR("Conditions Database: failure downloading url" << LogVar("url", url) << LogVar("error", e.what()));
201  return false;
202  }
203  metadata.filename = tmpfile->getName();
204  m_temporaryFiles[metadata.checksum] = std::move(tmpfile);
205  return true;
206  }
207 }
Belle2::FileSystem::Lock::lock
bool lock(int timeout=300, bool ignoreErrors=false)
Try to lock the file.
Definition: FileSystem.cc:190
Belle2::Conditions::PayloadProvider::PayloadProvider
PayloadProvider(const std::vector< std::string > &locations, const std::string &cachedir="", int timeout=60)
Constructor for a given list of locations and optionally the location where downloaded payloads shoul...
Definition: PayloadProvider.cc:31
Belle2::ScopeGuard
Simple ScopeGuard to execute a function at the end of the object lifetime.
Definition: ScopeGuard.h:46
Belle2::Conditions::PayloadProvider::EDirectoryLayout
EDirectoryLayout
Enumeration of different directory layouts.
Definition: PayloadProvider.h:50
Belle2::Conditions::PayloadProvider::PayloadLocation
Simple struct to represent a lookup location.
Definition: PayloadProvider.h:42
Belle2::FileSystem::calculateMD5
static std::string calculateMD5(const std::string &filename)
Calculate the MD5 checksum of a given file.
Definition: FileSystem.cc:79
LogVar
Class to store variables with their name which were sent to the logging service.
Definition: LogVariableStream.h:24
Belle2::Conditions::PayloadMetadata
Simple struct to group all information necessary for a single payload.
Definition: PayloadMetadata.h:25
Belle2::FileSystem::TemporaryFile
Helper file to create a temporary file and ensure deletion if object goes out of scope.
Definition: FileSystem.h:138
Belle2::FileSystem::Lock
Helper class for locking a file.
Definition: FileSystem.h:107