Belle II Software light-2406-ragdoll
MetadataService.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <framework/core/MetadataService.h>
10#include <framework/core/ProcessStatistics.h>
11#include <framework/core/Environment.h>
12#include <framework/datastore/StoreObjPtr.h>
13#include <framework/dataobjects/FileMetaData.h>
14#include <framework/gearbox/Unit.h>
15#include <framework/utilities/FileSystem.h>
16#include <framework/utilities/Utils.h>
17#include <fstream>
18
19using namespace Belle2;
20
21MetadataService::MetadataService(): m_basf2StartTime{Utils::getClock()}
22{
23 m_json["basf2_apiversion"] = 1;
24}
25
27{
28 static MetadataService instance;
29 return instance;
30}
31
32void MetadataService::addRootOutputFile(const std::string& fileName, const FileMetaData* metaData)
33{
34 if (m_fileName.empty()) return;
35 if (!FileSystem::isFile(fileName)) return;
36
37 nlohmann::json file_json = {{"type", "RootOutput"}, {"filename", fileName}};
38
39 if (metaData) {
40 file_json["metadata"] = nlohmann::json::parse(metaData->getJsonStr());
41 }
42
43 try {
44 std::string check = Utils::getCommandOutput("b2file-check", {"--json", fileName});
45 file_json.merge_patch(nlohmann::json::parse(check));
46 } catch (...) {}
47
48 file_json["checksums"]["md5"] = FileSystem::calculateMD5(fileName);
49 file_json["checksums"]["adler32"] = FileSystem::calculateAdler32(fileName);
50 // no sha256 yet
51
52 m_json["output_files"].push_back(file_json);
53
54 writeJson();
55}
56
57void MetadataService::addRootNtupleFile(const std::string& fileName)
58{
59 if (m_fileName.empty()) return;
60 if (!FileSystem::isFile(fileName)) return;
61
62 nlohmann::json file_json = {{"type", "RootNtuple"}, {"filename", fileName}};
63
64 // no metadata and no check
65
66 file_json["checksums"]["md5"] = FileSystem::calculateMD5(fileName);
67 file_json["checksums"]["adler32"] = FileSystem::calculateAdler32(fileName);
68 // no sha256 yet
69
70 m_json["output_files"].push_back(file_json);
71
72 writeJson();
73}
74
75void MetadataService::addHDF5File(const std::string& fileName)
76{
77 if (m_fileName.empty()) return;
78 if (!FileSystem::isFile(fileName)) return;
79
80 nlohmann::json file_json = {{"type", "HDF5"}, {"filename", fileName}};
81
82 file_json["checksums"]["adler32"] = FileSystem::calculateAdler32(fileName);
83
84 m_json["output_files"].push_back(file_json);
85
86 writeJson();
87}
88
89void MetadataService::addBasf2Status(const std::string& message)
90{
91 if (m_fileName.empty()) return;
92 auto& status = m_json["basf2_status"];
93 status["elapsed_walltime_sec"] = (Utils::getClock() - m_basf2StartTime) / Unit::s;
94 status["resident_memory_mb"] = Utils::getRssMemoryKB() / 1024;
96 if (processStatistics.isValid()) {
97 const auto& stats = processStatistics->getGlobal();
98 status["runs_processed"] = int(stats.getCalls(ModuleStatistics::EStatisticCounters::c_BeginRun));
99 status["events_processed"] = int(stats.getCalls());
100 }
101 if ((status.count("total_events") == 0) || (status["total_events"] == 0)) {
102 status["total_events"] = Environment::Instance().getNumberOfEvents();
103 }
107 status["finished"] = false;
108 status["message"] = message;
109
110 writeJson();
111}
112
114{
115 if (m_fileName.empty()) return;
116
118
119 if (processStatistics.isValid()) {
120
121 std::vector<ModuleStatistics> modulesSortedByIndex(processStatistics->getAll());
122 sort(modulesSortedByIndex.begin(), modulesSortedByIndex.end(), [](const ModuleStatistics & a, const ModuleStatistics & b) { return a.getIndex() < b.getIndex(); });
123
124 for (const ModuleStatistics& stats : modulesSortedByIndex) {
125 m_json["modules_calls"][stats.getName()] = int(stats.getCalls(ModuleStatistics::EStatisticCounters::c_Event));
126 }
127 }
128
129 writeJson();
130}
131
133{
134 m_json["basf2_status"]["finished"] = true;
135 m_json["basf2_status"]["success"] = success;
136
137 writeJson();
138}
139
141{
142 if (m_fileName.empty()) return;
143 std::ofstream jsonFile(m_fileName.c_str());
144 jsonFile << m_json.dump(2) << std::endl;
145}
@ c_Persistent
Object is available during entire execution time.
Definition: DataStore.h:60
unsigned int getNumberOfEvents() const
Return the number of events, from either input or EventInfoSetter, or -n command line override (if le...
Definition: Environment.cc:39
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:28
Metadata information about a file.
Definition: FileMetaData.h:29
std::string getJsonStr() const
Get a json representation.
static bool isFile(const std::string &filename)
Check if filename points to an existing file.
Definition: FileSystem.cc:45
static std::string calculateMD5(const std::string &filename)
Calculate the MD5 checksum of a given file.
Definition: FileSystem.cc:78
static std::string calculateAdler32(const std::string &filename)
Calculate the Adler-32 checksum of a given file.
Definition: FileSystem.cc:86
@ c_Error
Error: for things that went wrong and have to be fixed.
Definition: LogConfig.h:30
@ c_Fatal
Fatal: for situations were the program execution can not be continued.
Definition: LogConfig.h:31
@ c_Warning
Warning: for potential problems that the user should pay attention to.
Definition: LogConfig.h:29
int getMessageCounter(LogConfig::ELogLevel logLevel) const
Returns the number of logging calls per log level.
Definition: LogSystem.cc:161
static LogSystem & Instance()
Static method to get a reference to the LogSystem instance.
Definition: LogSystem.cc:31
This class provides a service for writing metadata about the basf2 execution and about output files t...
void writeJson()
Serialize the current json content to the json file.
double m_basf2StartTime
the start time of basf2
void addModuleCallsCount()
Add the metadata of number of calls of all modules.
void addBasf2Status(const std::string &message="")
Add metadata of basf2 status.
std::string m_fileName
The name of the json file.
MetadataService()
Constructor.
void addRootOutputFile(const std::string &fileName, const FileMetaData *metaData=nullptr)
Add the metadata of a root output file.
nlohmann::json m_json
The json object.
void addHDF5File(const std::string &fileName)
Add the metadata of a HDF5 file.
static MetadataService & Instance()
Static method to get a reference to the MetadataService instance.
void finishBasf2(bool success=true)
Add metadata for basf2 completion.
void addRootNtupleFile(const std::string &fileName)
Add the metadata of a root ntuple file.
Keep track of time and memory consumption during processing.
@ c_BeginRun
Counting time/calls in beginRun()
@ c_Event
Counting time/calls in event()
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:96
bool isValid() const
Check whether the object was created.
Definition: StoreObjPtr.h:111
static const double s
[second]
Definition: Unit.h:95
std::string getCommandOutput(const std::string &command, const std::vector< std::string > &arguments={}, bool searchPath=true)
Execute a shell command and return its output.
Definition: Utils.cc:100
double getClock()
Return current value of the real-time clock.
Definition: Utils.cc:66
unsigned long getRssMemoryKB()
Returns the amount of memory the process actually occupies in the physical RAM of the machine.
Definition: Utils.cc:84
Abstract base class for different kinds of events.
Definition: ClusterUtils.h:24