Belle II Software  release-08-01-10
MetadataService.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #include <framework/core/MetadataService.h>
10 #include <framework/core/ProcessStatistics.h>
11 #include <framework/core/Environment.h>
12 #include <framework/datastore/StoreObjPtr.h>
13 #include <framework/dataobjects/FileMetaData.h>
14 #include <framework/gearbox/Unit.h>
15 #include <framework/utilities/FileSystem.h>
16 #include <framework/utilities/Utils.h>
17 #include <fstream>
18 
19 using namespace Belle2;
20 
21 MetadataService::MetadataService(): m_basf2StartTime{Utils::getClock()}
22 {
23  m_json["basf2_apiversion"] = 1;
24 }
25 
27 {
28  static MetadataService instance;
29  return instance;
30 }
31 
32 void MetadataService::addRootOutputFile(const std::string& fileName, const FileMetaData* metaData)
33 {
34  if (m_fileName.empty()) return;
35  if (!FileSystem::isFile(fileName)) return;
36 
37  nlohmann::json file_json = {{"type", "RootOutput"}, {"filename", fileName}};
38 
39  if (metaData) {
40  file_json["metadata"] = nlohmann::json::parse(metaData->getJsonStr());
41  }
42 
43  try {
44  std::string check = Utils::getCommandOutput("b2file-check", {"--json", fileName});
45  file_json.merge_patch(nlohmann::json::parse(check));
46  } catch (...) {}
47 
48  file_json["checksums"]["md5"] = FileSystem::calculateMD5(fileName);
49  file_json["checksums"]["adler32"] = FileSystem::calculateAdler32(fileName);
50  // no sha256 yet
51 
52  m_json["output_files"].push_back(file_json);
53 
54  writeJson();
55 }
56 
57 void MetadataService::addRootNtupleFile(const std::string& fileName)
58 {
59  if (m_fileName.empty()) return;
60  if (!FileSystem::isFile(fileName)) return;
61 
62  nlohmann::json file_json = {{"type", "RootNtuple"}, {"filename", fileName}};
63 
64  // no metadata and no check
65 
66  file_json["checksums"]["md5"] = FileSystem::calculateMD5(fileName);
67  file_json["checksums"]["adler32"] = FileSystem::calculateAdler32(fileName);
68  // no sha256 yet
69 
70  m_json["output_files"].push_back(file_json);
71 
72  writeJson();
73 }
74 
75 void MetadataService::addHDF5File(const std::string& fileName)
76 {
77  if (m_fileName.empty()) return;
78  if (!FileSystem::isFile(fileName)) return;
79 
80  nlohmann::json file_json = {{"type", "HDF5"}, {"filename", fileName}};
81 
82  file_json["checksums"]["adler32"] = FileSystem::calculateAdler32(fileName);
83 
84  m_json["output_files"].push_back(file_json);
85 
86  writeJson();
87 }
88 
89 void MetadataService::addBasf2Status(const std::string& message)
90 {
91  if (m_fileName.empty()) return;
92  auto& status = m_json["basf2_status"];
93  status["elapsed_walltime_sec"] = (Utils::getClock() - m_basf2StartTime) / Unit::s;
94  status["resident_memory_mb"] = Utils::getRssMemoryKB() / 1024;
96  if (processStatistics.isValid()) {
97  const auto& stats = processStatistics->getGlobal();
98  status["runs_processed"] = int(stats.getCalls(ModuleStatistics::EStatisticCounters::c_BeginRun));
99  status["events_processed"] = int(stats.getCalls());
100  }
101  if ((status.count("total_events") == 0) || (status["total_events"] == 0)) {
102  status["total_events"] = Environment::Instance().getNumberOfEvents();
103  }
107  status["finished"] = false;
108  status["message"] = message;
109 
110  writeJson();
111 }
112 
114 {
115  if (m_fileName.empty()) return;
116 
118 
119  if (processStatistics.isValid()) {
120 
121  std::vector<ModuleStatistics> modulesSortedByIndex(processStatistics->getAll());
122  sort(modulesSortedByIndex.begin(), modulesSortedByIndex.end(), [](const ModuleStatistics & a, const ModuleStatistics & b) { return a.getIndex() < b.getIndex(); });
123 
124  for (const ModuleStatistics& stats : modulesSortedByIndex) {
125  m_json["modules_calls"][stats.getName()] = int(stats.getCalls(ModuleStatistics::EStatisticCounters::c_Event));
126  }
127  }
128 
129  writeJson();
130 }
131 
133 {
134  m_json["basf2_status"]["finished"] = true;
135  m_json["basf2_status"]["success"] = success;
136 
137  writeJson();
138 }
139 
141 {
142  if (m_fileName.empty()) return;
143  std::ofstream jsonFile(m_fileName.c_str());
144  jsonFile << m_json.dump(2) << std::endl;
145 }
@ c_Persistent
Object is available during entire execution time.
Definition: DataStore.h:60
unsigned int getNumberOfEvents() const
Return the number of events, from either input or EventInfoSetter, or -n command line override (if le...
Definition: Environment.cc:39
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:28
Metadata information about a file.
Definition: FileMetaData.h:29
std::string getJsonStr() const
Get a json representation.
static bool isFile(const std::string &filename)
Check if filename points to an existing file.
Definition: FileSystem.cc:45
static std::string calculateMD5(const std::string &filename)
Calculate the MD5 checksum of a given file.
Definition: FileSystem.cc:78
static std::string calculateAdler32(const std::string &filename)
Calculate the Adler-32 checksum of a given file.
Definition: FileSystem.cc:86
@ c_Error
Error: for things that went wrong and have to be fixed.
Definition: LogConfig.h:30
@ c_Fatal
Fatal: for situations were the program execution can not be continued.
Definition: LogConfig.h:31
@ c_Warning
Warning: for potential problems that the user should pay attention to.
Definition: LogConfig.h:29
int getMessageCounter(LogConfig::ELogLevel logLevel) const
Returns the number of logging calls per log level.
Definition: LogSystem.cc:161
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
Definition: LogSystem.cc:31
This class provides a service for writing metadata about the basf2 execution and about output files t...
void writeJson()
Serialize the current json content to the json file.
double m_basf2StartTime
the start time of basf2
void addModuleCallsCount()
Add the metadata of number of calls of all modules.
void addBasf2Status(const std::string &message="")
Add metadata of basf2 status.
std::string m_fileName
The name of the json file.
MetadataService()
Constructor.
void addRootOutputFile(const std::string &fileName, const FileMetaData *metaData=nullptr)
Add the metadata of a root output file.
nlohmann::json m_json
The json object.
void addHDF5File(const std::string &fileName)
Add the metadata of a HDF5 file.
static MetadataService & Instance()
Static method to get a reference to the MetadataService instance.
void finishBasf2(bool success=true)
Add metadata for basf2 completion.
void addRootNtupleFile(const std::string &fileName)
Add the metadata of a root ntuple file.
Keep track of time and memory consumption during processing.
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:96
bool isValid() const
Check whether the object was created.
Definition: StoreObjPtr.h:111
static const double s
[second]
Definition: Unit.h:95
std::string getCommandOutput(const std::string &command, const std::vector< std::string > &arguments={}, bool searchPath=true)
Execute a shell command and return its output.
Definition: Utils.cc:100
double getClock()
Return current value of the real-time clock.
Definition: Utils.cc:66
unsigned long getRssMemoryKB()
Returns the amount of memory the process actually occupies in the physical RAM of the machine.
Definition: Utils.cc:84
Abstract base class for different kinds of events.